In [135]:
import numpy as np
import pandas as pd
import pickle as pkl
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import itertools
from sklearn import metrics
import datetime
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import random

manualSeed = 1 # fix seed
print("Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

Seed:  1


<torch._C.Generator at 0x7f2a3d2eb7c8>

Hyper Parameters of NN

In [145]:
batchSize = 64
nhidden = 300 # Hidden Layer Size
nepochs = 10
lr = .0001
beta1 = .5 # Adam beta1 parameter
noise = .1
dropout = .65
adam = True
workers = 1 # Number of cores for loading data
inputSize = 217
log_interval = 1000

Load Datasets And create Loaders

In [146]:
path = 'data/'

def loaderize(data_X, data_Y, balance):
    # We want to bal
    tensor_data_set = torch.utils.data.TensorDataset(torch.from_numpy(data_X).float(), torch.from_numpy(data_Y))
    if balance:
        # We increase probability of minority class, and decrease probability of dominant class so in average
        # We sample the same amount of 1s and 0s even though classes are not balanced.
        proba_1 = data_Y.mean()
        weights = np.where(data_Y == 1., .5/proba_1, .5/(1.-proba_1))
        sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, data_Y.shape[0])
        return torch.utils.data.DataLoader(tensor_data_set, batch_size=batchSize, sampler=sampler, num_workers=int(workers))
    return torch.utils.data.DataLoader(tensor_data_set, batch_size=batchSize, shuffle=True, num_workers=int(workers))

In [138]:
trainloader = loaderize(pkl.load(open(path+'train_X.pkl','rb')), pkl.load(open(path+'train_y.pkl','rb')).values, True)
valloader = loaderize(pkl.load(open(path+'val_X.pkl','rb')), pkl.load(open(path+'val_y.pkl','rb')).values, False)
testloader = loaderize(pkl.load(open(path+'test_X.pkl','rb')), pkl.load(open(path+'test_y.pkl','rb')).values, False)

In [147]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(inputSize, nhidden)
        self.fc2 = nn.Linear(nhidden, 2)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input):
        input = F.relu(self.fc1(input))
        input = self.dropout(input)
        input = F.sigmoid(self.fc2(input))
        return input

In [148]:
model = Net()

if adam:
    optimizer = optim.Adam(model.parameters(), lr = lr, betas = (beta1, 0.999))
else:
    optimizer = optim.RMSprop(model.parameters(), lr = lr)
    
input = torch.FloatTensor(batchSize, inputSize)
label = torch.LongTensor(batchSize)
criterion = nn.CrossEntropyLoss()

input = Variable(input)
label = Variable(label)

In [149]:
def train(trainloader, epoch):
    model.train()
    
    for i, (data, target) in enumerate(trainloader, 0):
        input.data.resize_(data.size()).copy_(data)
        label.data.resize_(target.size()).copy_(target)
        model.zero_grad()
        output = model(input)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        if i % log_interval == 0:
            print('[%d/%d] [%d/%d] Train Loss : %.4f' % 
                  (epoch, nepochs, 
                   i, len(trainloader), 
                    loss.data[0]))

def test(testloader, epoch, isVal):
    model.eval()
    test_loss = 0
    correct = 0
    
    all_labels = 0
    all_preds = 0
    
    for i, (data, target) in enumerate(testloader, 0):
        input.data.resize_(data.size()).copy_(data)
        label.data.resize_(target.size()).copy_(target)
        output = model(input)
        test_loss += criterion(output, label)
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(label.data).cpu().sum()
        if not torch.is_tensor(all_labels):
            all_labels = target
            all_preds = output.data[:,1]
        else:
            all_labels = torch.cat((all_labels, target), 0)
            all_preds = torch.cat((all_preds, output.data[:,1]), 0)
        
    test_loss /= len(testloader)
    
    auc = metrics.roc_auc_score(all_labels.numpy(), all_preds.numpy())
    if isVal:
        print('\n [%d/%d] ||VAL|| Average loss: %.4f, Accuracy: %d / %d (%.1f) AUC : %.6f \n' % (
                epoch, nepochs,
                test_loss.data[0],
                correct, len(testloader.dataset), 100. * correct / len(testloader.dataset), auc)
             )
    else:
        print('\n [%d/%d] ||TEST|| Average loss: %.4f, Accuracy: %d / %d (%.1f) AUC : %.6f \n' % (
                epoch, nepochs,
                test_loss.data[0],
                correct, len(testloader.dataset), 100. * correct / len(testloader.dataset), auc)
             )
    return test_loss

In [None]:
val_loss_stored = np.inf

for epoch in range(1, nepochs + 1):
    train(trainloader, epoch)
    val_loss = test(valloader, epoch, True)
    if val_loss > val_loss_stored:
        lr /= 2
    val_loss_stored = val_loss
test(testloader, epoch, False)

[1/10] [0/8678] Train Loss : 0.6988
[1/10] [1000/8678] Train Loss : 0.5897
[1/10] [2000/8678] Train Loss : 0.5249
[1/10] [3000/8678] Train Loss : 0.6046
[1/10] [4000/8678] Train Loss : 0.5770
[1/10] [5000/8678] Train Loss : 0.6476
[1/10] [6000/8678] Train Loss : 0.5515
[1/10] [7000/8678] Train Loss : 0.6581
[1/10] [8000/8678] Train Loss : 0.5525

 [1/10] ||VAL|| Average loss: 0.5576, Accuracy: 125002 / 158523 (78.9) AUC : 0.7655 

[2/10] [0/8678] Train Loss : 0.5935
[2/10] [1000/8678] Train Loss : 0.5290
[2/10] [2000/8678] Train Loss : 0.6046
[2/10] [3000/8678] Train Loss : 0.5317
[2/10] [4000/8678] Train Loss : 0.5285
[2/10] [5000/8678] Train Loss : 0.5881
[2/10] [6000/8678] Train Loss : 0.5581
[2/10] [7000/8678] Train Loss : 0.4926
[2/10] [8000/8678] Train Loss : 0.5376

 [2/10] ||VAL|| Average loss: 0.5675, Accuracy: 123522 / 158523 (77.9) AUC : 0.7669 

[3/10] [0/8678] Train Loss : 0.5154
[3/10] [1000/8678] Train Loss : 0.5603
