In [72]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from nn_1 import read_data

In [73]:
def check_accuracy(preds, targets):
    return np.mean(np.argmax(preds, axis=-1)==targets)

In [67]:
train_x,train_y,val_x,val_y,test_x = read_data()
train_x = torch.from_numpy(train_x).float()
train_y = torch.from_numpy(np.argmax(train_y, axis=-1)) # remove one-hot encoding
val_x = torch.from_numpy(val_x).float()
val_y = torch.from_numpy(np.argmax(val_y, axis=-1)) # remove one-hot encoding
test_x = torch.from_numpy(test_x).float()

In [68]:
torch.cuda.empty_cache()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cpu')

In [69]:
class Net(nn.Module):
    def __init__(self, num_layers, num_units, input_dim, output_dim, drop_prob=0.5):
        super(Net, self).__init__()
        neurons = [input_dim] + [num_units] * num_layers + [output_dim]
        self.hidden_layers = nn.ModuleList()

        for i in range(num_layers+1):
            self.hidden_layers.append(nn.Linear(neurons[i], neurons[i+1], bias=True))

        self.batchNorm1d = nn.BatchNorm1d(num_units)
        self.dropout = nn.Dropout(drop_prob)
    
    def forward(self, x):
        for fc in self.hidden_layers[:-1]:
            x = self.dropout(F.relu(fc(x)))
#             x = self.batchNorm1d(x)
        x = self.hidden_layers[-1](x)
        return x

In [86]:
def train(model, optimizer, criterion, train_x, train_y, val_x, val_y, 
          epochs, device=device, verbose=True):
    trainset = TensorDataset(train_x, train_y)
    trainloader = DataLoader(trainset, shuffle=True, batch_size=batch_size)
    losslist = []
    for epoch in range(epochs):  # loop over the dataset multiple times
        model.train()
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            optimizer.step()
        if (1+epoch) % 10 == 0 and verbose:
            model.eval()
            train_preds = model(train_x.to(device))
            train_loss = criterion(train_preds, train_y.to(device)).item()
            train_acc = check_accuracy(train_preds.detach().numpy(), train_y.detach().numpy())
            val_preds = model(val_x.to(device))
            val_loss = criterion(val_preds, val_y.to(device)).item()
            val_acc = check_accuracy(val_preds.detach().numpy(), val_y.detach().numpy())
            print("Epoch = {}, train_loss = {:.3f}, val_loss = {:.3f}, train_acc={:.3f}, val_acc={:.3f}"
                  .format(epoch+1, train_loss,val_loss,train_acc,val_acc))
        
    return criterion(model(val_x.to(device)), val_y.to(device)).item()

In [None]:
num_layers = 1
num_units = 64
epochs = 100
batch_size = 128
learning_rate = 0.01

model = Net(num_layers,num_units,train_x.shape[1],26).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=learning_rate)

train(model, optimizer, criterion, train_x, train_y, val_x, val_y,
      epochs, device=device)