In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from nn_2 import read_data, save_predictions

In [2]:
def check_accuracy(preds, targets):
    return np.mean(np.argmax(preds, axis=-1)==targets)

In [3]:
train_x,train_y,val_x,val_y,test_x = read_data()
train_x = torch.from_numpy(train_x).float()
train_y = torch.from_numpy(np.argmax(train_y, axis=-1)) # remove one-hot encoding
val_x = torch.from_numpy(val_x).float()
val_y = torch.from_numpy(np.argmax(val_y, axis=-1)) # remove one-hot encoding
test_x = torch.from_numpy(test_x).float()

In [4]:
torch.cuda.empty_cache()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cpu')

In [5]:
class Net(nn.Module):
    def __init__(self, num_layers, num_units, input_dim, output_dim, drop_prob=0.5):
        super(Net, self).__init__()
        neurons = [input_dim] + [num_units] * num_layers + [output_dim]
        self.hidden_layers = nn.ModuleList()

        for i in range(num_layers+1):
            self.hidden_layers.append(nn.Linear(neurons[i], neurons[i+1], bias=True))

        self.batchNorm1d = nn.BatchNorm1d(num_units)
        self.dropout = nn.Dropout(drop_prob)
    
    def forward(self, x):
        for fc in self.hidden_layers[:-1]:
            x = self.dropout(F.relu(fc(x)))
#             x = self.batchNorm1d(x)
        x = self.hidden_layers[-1](x)
        return x

In [6]:
def train(model, optimizer, criterion, train_x, train_y, val_x, val_y, 
          epochs, device=device, verbose=True):
    trainset = TensorDataset(train_x, train_y)
    trainloader = DataLoader(trainset, shuffle=True, batch_size=batch_size)
    losslist = []
    for epoch in range(epochs):  # loop over the dataset multiple times
        model.train()
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            optimizer.step()
        if (1+epoch) % 10 == 0 and verbose:
            model.eval()
            train_preds = model(train_x.to(device))
            train_loss = criterion(train_preds, train_y.to(device)).item()
            train_acc = check_accuracy(train_preds.detach().numpy(), train_y.detach().numpy())
            val_preds = model(val_x.to(device))
            val_loss = criterion(val_preds, val_y.to(device)).item()
            val_acc = check_accuracy(val_preds.detach().numpy(), val_y.detach().numpy())
            print("Epoch = {}, train_loss = {:.3f}, val_loss = {:.3f}, train_acc={:.3f}, val_acc={:.3f}"
                  .format(epoch+1, train_loss,val_loss,train_acc,val_acc))
        
    return criterion(model(val_x.to(device)), val_y.to(device)).item()

In [7]:
num_layers = 1
num_units = 300
epochs = 500
batch_size = 128
learning_rate = 0.01

model = Net(num_layers,num_units,train_x.shape[1],26).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=learning_rate)

train(model, optimizer, criterion, train_x, train_y, val_x, val_y,
      epochs, device=device)

Epoch = 10, train_loss = 1.106, val_loss = 1.109, train_acc=0.730, val_acc=0.727
Epoch = 20, train_loss = 0.909, val_loss = 0.918, train_acc=0.776, val_acc=0.773
Epoch = 30, train_loss = 0.807, val_loss = 0.819, train_acc=0.790, val_acc=0.789
Epoch = 40, train_loss = 0.730, val_loss = 0.746, train_acc=0.812, val_acc=0.807
Epoch = 50, train_loss = 0.676, val_loss = 0.694, train_acc=0.822, val_acc=0.821
Epoch = 60, train_loss = 0.632, val_loss = 0.656, train_acc=0.832, val_acc=0.828
Epoch = 70, train_loss = 0.593, val_loss = 0.615, train_acc=0.844, val_acc=0.836
Epoch = 80, train_loss = 0.561, val_loss = 0.587, train_acc=0.852, val_acc=0.839
Epoch = 90, train_loss = 0.536, val_loss = 0.561, train_acc=0.859, val_acc=0.849
Epoch = 100, train_loss = 0.510, val_loss = 0.539, train_acc=0.864, val_acc=0.852
Epoch = 110, train_loss = 0.489, val_loss = 0.520, train_acc=0.871, val_acc=0.857
Epoch = 120, train_loss = 0.470, val_loss = 0.501, train_acc=0.876, val_acc=0.861
Epoch = 130, train_loss =

0.268623948097229

In [11]:
model.eval()
save_predictions(model(train_x.to(device)).cpu().detach().numpy())

In [10]:
# extra training

train(model, optimizer, criterion, train_x, train_y, val_x, val_y,
      150, device=device)

Epoch = 10, train_loss = 0.222, val_loss = 0.267, train_acc=0.940, val_acc=0.925
Epoch = 20, train_loss = 0.219, val_loss = 0.267, train_acc=0.938, val_acc=0.926
Epoch = 30, train_loss = 0.217, val_loss = 0.261, train_acc=0.940, val_acc=0.925
Epoch = 40, train_loss = 0.217, val_loss = 0.266, train_acc=0.940, val_acc=0.922
Epoch = 50, train_loss = 0.213, val_loss = 0.260, train_acc=0.941, val_acc=0.926
Epoch = 60, train_loss = 0.211, val_loss = 0.259, train_acc=0.941, val_acc=0.927
Epoch = 70, train_loss = 0.208, val_loss = 0.256, train_acc=0.943, val_acc=0.925
Epoch = 80, train_loss = 0.206, val_loss = 0.254, train_acc=0.942, val_acc=0.926
Epoch = 90, train_loss = 0.207, val_loss = 0.256, train_acc=0.942, val_acc=0.925
Epoch = 100, train_loss = 0.204, val_loss = 0.253, train_acc=0.944, val_acc=0.927
Epoch = 110, train_loss = 0.201, val_loss = 0.248, train_acc=0.945, val_acc=0.928
Epoch = 120, train_loss = 0.199, val_loss = 0.248, train_acc=0.946, val_acc=0.927
Epoch = 130, train_loss =

0.2464468777179718