In [1]:
import numpy as np
import pickle

In [2]:
from sklearn.metrics import confusion_matrix
def accuracy( C ):
    ''' Compute accuracy given Numpy array confusion matrix C. Returns a floating point value '''
    a = 0
    for i in range(4):
      a += C[i][i]
    
    return a/np.sum(C)

In [3]:
# get batches for each epoch
def get_batches(inputs, targets, batch_size, shuffle=True):
    """Divide a dataset into mini-batches of a given size. This is a'generator'."""
    
    if inputs.shape[0] % batch_size != 0:
        raise RuntimeError('The number of data points must be a multiple of the batch size.')
    num_batches = inputs.shape[0] // batch_size

    if shuffle:
        idxs = np.random.permutation(inputs.shape[0])
        inputs = inputs[idxs, :]
        targets = targets[idxs]

    for m in range(num_batches):
        yield inputs[m*batch_size:(m+1)*batch_size, :], \
              targets[m*batch_size:(m+1)*batch_size]

In [4]:
def train_test_split(inputs, targets, ratio=0.2, shuffle=True):
    num_train = int(inputs.shape[0]*(1-ratio))
    indices = np.arange(inputs.shape[0])
    if shuffle:
        np.random.shuffle(indices)
    X_train = inputs[indices[:num_train]]
    Y_train = targets[indices[:num_train]]
    X_test = inputs[indices[num_train:]]
    Y_test = targets[indices[num_train:]]
    return X_train, Y_train, X_test, Y_test

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class MLP(nn.Module):

    def __init__(self):
        super(MLP, self).__init__()
        self.h1 = nn.Sequential(
                    nn.Linear(164, 32),
                    nn.ReLU())
        self.h2 = nn.Sequential(
                    nn.Linear(32, 164),
                    nn.ReLU())
        self.o = nn.Sequential(
                    nn.Linear(196, 4),
                    nn.Softmax())


    def forward(self, x):
        h1 = self.h1(x)
        h2 = self.h2(h1)
        y = self.o(torch.cat((h2,h1), 1))
        return y


mlp = MLP()
print(mlp)
loss_f = F.cross_entropy




MLP(
  (h1): Sequential(
    (0): Linear(in_features=164, out_features=32, bias=True)
    (1): ReLU()
  )
  (h2): Sequential(
    (0): Linear(in_features=32, out_features=164, bias=True)
    (1): ReLU()
  )
  (o): Sequential(
    (0): Linear(in_features=196, out_features=4, bias=True)
    (1): Softmax()
  )
)


In [7]:
# load data and labels
with open("extracted/dataSmaller.pk", "rb") as f:
    X, Y = pickle.load(f)

In [8]:
X_train, Y_train, X_test, Y_test = train_test_split(X, Y)

In [9]:
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

((80000, 164), (80000,), (20000, 164), (20000,))

In [10]:
from torch.autograd import Variable
X_train = Variable(torch.from_numpy(X_train).float())
Y_train = Variable(torch.from_numpy(Y_train).long())
X_test = Variable(torch.from_numpy(X_test).float())
Y_test = Variable(torch.from_numpy(Y_test).long())

## train

In [22]:
epoches = 10
lr = 0.0005
optimizer = optim.Adam(mlp.parameters(), lr=lr)
batch_size = 100
loss_report = 200

for epoch in range(epoches):
    print("Epoch:", epoch+1)
    for i, (input_batch, target_batch) in enumerate(get_batches(X_train, Y_train, batch_size)):
        optimizer.zero_grad()   # zero the gradient buffers
        output = mlp(input_batch)
        loss = loss_f(output, target_batch)
        loss.backward()
        optimizer.step()    # Does the update
        if (i+1)%loss_report == 0:
            print("Batch:", i+1, "loss:",loss.item())
    # validation error
    with torch.no_grad():
        y_predict = mlp(X_test)
        loss_t = loss_f(y_predict, Y_test)
#         if loss_t < lbest:
#             lbest = loss_t
#             bestp = mlp.state_dict()
        y_p = torch.argmax(y_predict, dim = 1).numpy()
        c2 = confusion_matrix(Y_test, y_p)
        print("test loss:",loss_t.item(), "acc :", accuracy(c2))

Epoch: 1


  input = module(input)


Batch: 200 loss: 1.2624366283416748
Batch: 400 loss: 1.2252612113952637
Batch: 600 loss: 1.3314286470413208
Batch: 800 loss: 1.215613842010498
test loss: 1.3210723400115967 acc : 0.3899
Epoch: 2
Batch: 200 loss: 1.257117748260498
Batch: 400 loss: 1.2614922523498535
Batch: 600 loss: 1.3160804510116577
Batch: 800 loss: 1.2621575593948364
test loss: 1.3223047256469727 acc : 0.3871
Epoch: 3
Batch: 200 loss: 1.2208399772644043
Batch: 400 loss: 1.3214794397354126
Batch: 600 loss: 1.2622885704040527
Batch: 800 loss: 1.222031593322754
test loss: 1.3224413394927979 acc : 0.38805
Epoch: 4
Batch: 200 loss: 1.2287784814834595
Batch: 400 loss: 1.2196550369262695
Batch: 600 loss: 1.242885947227478
Batch: 800 loss: 1.2609449625015259
test loss: 1.321339726448059 acc : 0.3884
Epoch: 5
Batch: 200 loss: 1.2397960424423218
Batch: 400 loss: 1.191847562789917
Batch: 600 loss: 1.3163020610809326
Batch: 800 loss: 1.266919493675232
test loss: 1.3229745626449585 acc : 0.38855
Epoch: 6
Batch: 200 loss: 1.213465