In [66]:
import utils
import predict
import time as tm
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.datasets import make_classification
import scipy
import pandas as pd
from sklearn.model_selection import train_test_split

In [67]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.utils.data as data_utils
import torch.nn.functional as F

In [68]:
# load Data
dictSize = 225
(X, y) = utils.loadData( "train", dictSize = dictSize )
X = scipy.sparse.csr_matrix.toarray(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [69]:
# https://discuss.pytorch.org/t/multi-class-classification/47565
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        return out
    
    def predict(self, x):
        with torch.no_grad():
            outp = self.forward(x)
            return F.softmax(outp)
# add softmax layer: used for multiclass classification

In [81]:
# model declared
model = NeuralNet(225 , 10000, 51)  
# earlier hidden layers- 1000 
# 1 - poor, 2- ok

In [82]:
loss_function = nn.CrossEntropyLoss()
model_opt = optim.SGD(model.parameters(), lr = 0.02)

train_set_X = Variable(torch.from_numpy(X_train)).float()
train_set_y = Variable(torch.LongTensor(y_train)).long()
test_set_X = Variable(torch.from_numpy(X_test)).float()
test_set_y = Variable(torch.LongTensor(y_test)).long()

In [83]:
# 1
epochs = 50
for epochs in range(epochs):
    model_opt.zero_grad()
    out = model(train_set_X)
    loss = loss_function(out, train_set_y)
    loss.backward()
    model_opt.step()

In [84]:
ans = model.predict(test_set_X)
# for i in range(3299):
#     ans_max,ind = torch.max(ans[i],0)
#     print (test_set_y[i].numpy() - ind.numpy())
    



In [85]:
k = 5
y_pr = ans.numpy() 
y_pred = np.argsort(-y_pr,axis=1)[:,:k]
print (y_pred)
print (y_test)

[[ 3  4  1  7  2]
 [ 2  1  4  3  9]
 [ 3  1  4  2 10]
 ...
 [ 1  2  4  9  3]
 [ 2  1  4  3  9]
 [ 3  4  1  2 10]]
[3. 2. 3. ... 9. 4. 3.]


In [86]:
# eval functions for Deep Learning
preck = utils.getPrecAtK( y_test, y_pred, k )
# The macro precision code takes a bit longer to execute due to the for loop over labels
mpreck = utils.getMPrecAtK( y_test, y_pred, k )

# According to our definitions, both prec@k and mprec@k should go up as k goes up i.e. for your
# method, prec@i > prec@j if i > j and mprec@i > mprec@j if i > j. See the assignment description
# to convince yourself why this must be the case.

print( "prec@1: %0.3f" % preck[0], "prec@3: %0.3f" % preck[2], "prec@5: %0.3f" % preck[4] )
# Dont be surprised if mprec is small -- it is hard to do well on rare error classes
print( "mprec@1: %0.3e" % mpreck[0], "mprec@3: %0.3e" % mpreck[2], "mprec@5: %0.3e" % mpreck[4] )
print ("prec matrix",preck)
print ("mprec matrix",mpreck)

prec@1: 0.454 prec@3: 0.669 prec@5: 0.765
mprec@1: 1.509e-02 mprec@3: 6.147e-02 mprec@5: 1.019e-01
prec matrix [0.45363636 0.59242424 0.66939394 0.71393939 0.76454545]
mprec matrix [0.01508809 0.04888622 0.06147312 0.08406356 0.1019489 ]


In [None]:
# 2 
# same model diff train file

In [87]:
tr_latent_X = data_utils.TensorDataset(train_set_X, train_set_y)
te_latent_X = data_utils.TensorDataset(test_set_X,  test_set_y)
train_loader_X = torch.utils.data.DataLoader(dataset=tr_latent_X)
test_loader_X = torch.utils.data.DataLoader(dataset=te_latent_X)

In [88]:
def train(epoch):
    model.train()

    train_loss = 0

    for batch_idx, (data,label) in enumerate(train_loader_X):

        model_opt.zero_grad()

        out = model(data)
        loss = loss_function(out, label)

        loss.backward()
        train_loss += loss.item()
        model_opt.step()

        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader_X.dataset),
                100. * batch_idx / len(train_loader_X), loss.item() / len(data)))
    print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader_X.dataset)))

In [89]:
# 2
for epoch in range(1, 10):
    train(epoch)

====> Epoch: 1 Average loss: 1.5913
====> Epoch: 2 Average loss: 1.0147


====> Epoch: 3 Average loss: 0.8058
====> Epoch: 4 Average loss: 0.6828
====> Epoch: 5 Average loss: 0.5900


====> Epoch: 6 Average loss: 0.5348
====> Epoch: 7 Average loss: 0.4954


====> Epoch: 8 Average loss: 0.4503
====> Epoch: 9 Average loss: 0.4598


In [79]:
ans = model.predict(test_set_X)
k = 5
y_pr = ans.numpy() 
y_pred = np.argsort(-y_pr,axis=1)[:,:k]
print (y_pred)
print (y_test)

[[ 3 12 22  4  7]
 [ 2  4  1  9 21]
 [ 3 12  4  1 39]
 ...
 [ 9  2 45 15 16]
 [ 4 37  5 18  2]
 [ 3 12 37 22 10]]
[3. 2. 3. ... 9. 4. 3.]




In [80]:
# eval functions for Deep Learning
preck = utils.getPrecAtK( y_test, y_pred, k )
# The macro precision code takes a bit longer to execute due to the for loop over labels
mpreck = utils.getMPrecAtK( y_test, y_pred, k )

# According to our definitions, both prec@k and mprec@k should go up as k goes up i.e. for your
# method, prec@i > prec@j if i > j and mprec@i > mprec@j if i > j. See the assignment description
# to convince yourself why this must be the case.

print( "prec@1: %0.3f" % preck[0], "prec@3: %0.3f" % preck[2], "prec@5: %0.3f" % preck[4] )
# Dont be surprised if mprec is small -- it is hard to do well on rare error classes
print( "mprec@1: %0.3e" % mpreck[0], "mprec@3: %0.3e" % mpreck[2], "mprec@5: %0.3e" % mpreck[4] )
print ("prec matrix",preck)
print ("mprec matrix",mpreck)

prec@1: 0.762 prec@3: 0.918 prec@5: 0.955
mprec@1: 8.627e-03 mprec@3: 6.489e-02 mprec@5: 1.113e-01
prec matrix [0.76212121 0.88       0.91757576 0.94030303 0.95545455]
mprec matrix [0.00862691 0.0407148  0.06489299 0.09325235 0.11130134]
