In [1]:
import numpy as np
import pandas as pd
import pickle as pkl
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import itertools
from sklearn import metrics
import datetime
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import random

manualSeed = 1 # fix seed
print("Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

Seed:  1


<torch._C.Generator at 0x7fa77aef77c8>

Hyper Parameters of NN

In [2]:
batchSize = 10
nhidden = 1000 # Hidden Layer Size
nepochs = 10
lr = .0001
beta1 = .5 # Adam beta1 parameter
noise = .1
dropout = .65
adam = True
workers = 1 # Number of cores for loading data
inputSize = 217
log_interval = 1000

Load Datasets And create Loaders

In [3]:
path = 'data/'

def loaderize(data_X, data_Y, balance):
    # We want to bal
    tensor_data_set = torch.utils.data.TensorDataset(torch.from_numpy(data_X).float(), torch.from_numpy(data_Y))
    if balance:
        # We increase probability of minority class, and decrease probability of dominant class so in average
        # We sample the same amount of 1s and 0s even though classes are not balanced.
        proba_1 = data_Y.mean()
        weights = np.where(data_Y == 1., .5/proba_1, .5/(1.-proba_1))
        sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, data_Y.shape[0])
        return torch.utils.data.DataLoader(tensor_data_set, batch_size=batchSize, sampler=sampler, num_workers=int(workers))
    return torch.utils.data.DataLoader(tensor_data_set, batch_size=batchSize, shuffle=True, num_workers=int(workers))

In [4]:
trainloader = loaderize(pkl.load(open(path+'train_X.pkl','rb')), pkl.load(open(path+'train_y.pkl','rb')).values, True)
valloader = loaderize(pkl.load(open(path+'val_X.pkl','rb')), pkl.load(open(path+'val_y.pkl','rb')).values, False)
testloader = loaderize(pkl.load(open(path+'test_X.pkl','rb')), pkl.load(open(path+'test_y.pkl','rb')).values, False)

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(inputSize, nhidden)
        self.fc2 = nn.Linear(nhidden, nhidden)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input):
        input = F.relu(self.fc1(input))
        input = self.dropout(input)
        input = self.fc2(input)
        return input

In [6]:
model = Net()

if adam:
    optimizer = optim.Adam(model.parameters(), lr = lr, betas = (beta1, 0.999))
else:
    optimizer = optim.RMSprop(model.parameters(), lr = lr)
    
input = torch.FloatTensor(batchSize, inputSize)
label = torch.LongTensor(batchSize)
criterion = nn.CrossEntropyLoss()

input = Variable(input)
label = Variable(label)

In [7]:
def train(trainloader, epoch):
    model.train()
    
    for i, (data, target) in enumerate(trainloader, 0):
        input.data.resize_(data.size()).copy_(data)
        label.data.resize_(target.size()).copy_(target)
        model.zero_grad()
        output = model(input)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        if i % log_interval == 0:
            print('[%d/%d] [%d/%d] Train Loss : %.4f' % 
                  (epoch, nepochs, 
                   i, len(trainloader), 
                    loss.data[0]))

def test(testloader, epoch, isVal):
    model.eval()
    test_loss = 0
    correct = 0
    
    all_labels = 0
    all_preds = 0
    
    for i, (data, target) in enumerate(testloader, 0):
        input.data.resize_(data.size()).copy_(data)
        label.data.resize_(target.size()).copy_(target)
        output = model(input)
        test_loss += criterion(output, label)
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(label.data).cpu().sum()
        if not torch.is_tensor(all_labels):
            all_labels = target
            all_preds = output.data[:,1]
        else:
            all_labels = torch.cat((all_labels, target), 0)
            all_preds = torch.cat((all_preds, output.data[:,1]), 0)
        
    test_loss /= len(testloader)
    
    auc = metrics.roc_auc_score(all_labels.numpy(), all_preds.numpy())
    if isVal:
        print('\n[%d/%d] ||VAL|| Average loss: %.4f, Accuracy: %d / %d (%.1f) AUC : %.6f \n' % (
                epoch, nepochs,
                test_loss.data[0],
                correct, len(testloader.dataset), 100. * correct / len(testloader.dataset), auc)
             )
    else:
        print('\n[%d/%d] ||TEST|| Average loss: %.4f, Accuracy: %d / %d (%.1f) AUC : %.6f \n' % (
                epoch, nepochs,
                test_loss.data[0],
                correct, len(testloader.dataset), 100. * correct / len(testloader.dataset), auc)
             )
    return test_loss

In [8]:
val_loss_stored = np.inf

for epoch in range(1, nepochs + 1):
    train(trainloader, epoch)
    val_loss = test(valloader, epoch, True)
    if val_loss > val_loss_stored:
        lr /= 2
    val_loss_stored = val_loss
test(testloader, epoch, False)

[1/10] [0/55534] Train Loss : 7.0826
[1/10] [1000/55534] Train Loss : 0.5726
[1/10] [2000/55534] Train Loss : 0.4926
[1/10] [3000/55534] Train Loss : 0.6295
[1/10] [4000/55534] Train Loss : 0.4638
[1/10] [5000/55534] Train Loss : 0.8354
[1/10] [6000/55534] Train Loss : 0.7817
[1/10] [7000/55534] Train Loss : 0.4424
[1/10] [8000/55534] Train Loss : 0.5802
[1/10] [9000/55534] Train Loss : 0.6458
[1/10] [10000/55534] Train Loss : 1.0711
[1/10] [11000/55534] Train Loss : 0.7285
[1/10] [12000/55534] Train Loss : 0.6142
[1/10] [13000/55534] Train Loss : 0.3797
[1/10] [14000/55534] Train Loss : 0.4925
[1/10] [15000/55534] Train Loss : 0.6473
[1/10] [16000/55534] Train Loss : 0.7183
[1/10] [17000/55534] Train Loss : 1.0548
[1/10] [18000/55534] Train Loss : 0.3965
[1/10] [19000/55534] Train Loss : 0.4711
[1/10] [20000/55534] Train Loss : 0.4842
[1/10] [21000/55534] Train Loss : 0.4378
[1/10] [22000/55534] Train Loss : 0.9077
[1/10] [23000/55534] Train Loss : 0.7519
[1/10] [24000/55534] Train Lo

OSError: [Errno 12] Cannot allocate memory