In [None]:
import numpy as np
import sklearn
import matplotlib.pyplot as plt
%matplotlib inline
import os

import sklearn.metrics

In [None]:
trfiles = os.listdir("./train/")
cats = [c.split(".")[0] for c in trfiles]
cid_to_cat = {}
cat_to_cid = {}
for idx,cat in zip(range(len(cats)),cats):
    cat_to_cid[cat] = idx
    cid_to_cat[idx] = cat

In [None]:
known_data = None
for file in trfiles:
    d = np.load("train/"+file)
    myidx = cat_to_cid[file.split(".")[0]]
    idarr = myidx + np.zeros(d.shape[0])
    d = np.concatenate((d,idarr[:,np.newaxis]),axis=1)
    if known_data is None:
        known_data = d
    else:
        known_data = np.concatenate((known_data,d),axis=0)

In [None]:
np.random.shuffle(known_data)

In [None]:
train_data = known_data[:int(0.8*known_data.shape[0]),:]
val_data = known_data[int(0.8*known_data.shape[0]):,:]

In [None]:
train_x = train_data[:,:784]
train_y = train_data[:,784].astype(int)
val_x = val_data[:,:784]
val_y = val_data[:,784].astype(int)
train_x = (train_x - train_x.mean(axis=0) ) / train_x.std(axis=0) 
val_x = (val_x - val_x.mean(axis=0) ) / val_x.std(axis=0) 

In [None]:
full_x = known_data[:,:784]
full_y = known_data[:,784].astype(int)
full_x = (full_x - full_x.mean(axis=0) ) / full_x.std(axis=0) 

In [None]:
full_x.shape

## Clustering

In [None]:
from  sklearn.cluster import KMeans 
from sklearn.cluster import AgglomerativeClustering 

In [None]:
km = KMeans(n_clusters=20,n_init=40,precompute_distances=True,n_jobs=-1,max_iter=50)

In [None]:
km.fit(train_x)

In [None]:
clus_to_pred_dic = {}
for p in range(train_y.shape[0]):
    clus = km.labels_[p]
    truel = train_y[p]
    if clus not in clus_to_pred_dic.keys():
        clus_to_pred_dic[clus] = {}
    if truel not in clus_to_pred_dic[clus].keys():
        clus_to_pred_dic[clus][truel] = 0
    clus_to_pred_dic[clus][truel] += 1

In [None]:
clusid_to_cid = {}
for clus in clus_to_pred_dic.keys():
    clusid_to_cid[clus] = max(clus_to_pred_dic[clus],key=clus_to_pred_dic[clus].get)

In [None]:
pred_train_c = km.predict(train_x)
pred_val_c = km.predict(val_x)
pred_train = []
pred_val = []
for g in range(pred_train_c.shape[0]):
    pred_train.append(clusid_to_cid[pred_train_c[g]])
for g in range(pred_val_c.shape[0]):
    pred_val.append(clusid_to_cid[pred_val_c[g]])

In [None]:
sklearn.metrics.accuracy_score(train_y,pred_train)

In [None]:
sklearn.metrics.accuracy_score(val_y,pred_val)

In [None]:
for maxit in [10,20,30,40,50]:
    km = KMeans(n_clusters=20,n_init=40,precompute_distances=True,n_jobs=-1,max_iter=50)
    km.fit(train_x)
    clus_to_pred_dic = {}
    for p in range(train_y.shape[0]):
        clus = km.labels_[p]
        truel = train_y[p]
        if clus not in clus_to_pred_dic.keys():
            clus_to_pred_dic[clus] = {}
        if truel not in clus_to_pred_dic[clus].keys():
            clus_to_pred_dic[clus][truel] = 0
        clus_to_pred_dic[clus][truel] += 1
        clusid_to_cid = {}
    for clus in clus_to_pred_dic.keys():
        clusid_to_cid[clus] = max(clus_to_pred_dic[clus],key=clus_to_pred_dic[clus].get)
    pred_train_c = km.predict(train_x)
    pred_val_c = km.predict(val_x)
    pred_train = []
    pred_val = []
    for g in range(pred_train_c.shape[0]):
        pred_train.append(clusid_to_cid[pred_train_c[g]])
    for g in range(pred_val_c.shape[0]):
        pred_val.append(clusid_to_cid[pred_val_c[g]])
    print("------------------------------")
    print ("Max Iter ",maxit)
    print ("Train Accuracy ",sklearn.metrics.accuracy_score(train_y,pred_train))    
    print ("Test Accuracy ",sklearn.metrics.accuracy_score(val_y,pred_val))    

## PCA + SVM

In [None]:
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

In [None]:
pca = PCA(n_components=50)# adjust yourself
pca.fit(train_x)
train_x_trans = pca.transform(train_x)
val_x_trans = pca.transform(val_x)
clf = LinearSVC()
# clf = SVC()
clf.fit(train_x_trans, train_y)

In [None]:
preds = clf.predict(val_x_trans)

In [None]:
sklearn.metrics.accuracy_score(val_y,preds)

In [None]:
print(sklearn.metrics.classification_report(val_y,preds,labels=list(range(20)),target_names=[cid_to_cat[ii] for ii in range(20)]))

Best - C = 1.5

In [None]:
pca = PCA(n_components=50)# adjust yourself
pca.fit(train_x)
train_x_trans = pca.transform(train_x)
val_x_trans = pca.transform(val_x)
# for c in [0.1,0.5,0.9,1.5,10]:
for c in [1,2]:
    clf = SVC(C=c,kernel="linear",decision_function_shape="ovo")
    # clf = SVC()
    clf.fit(train_x_trans, train_y)
    preds = clf.predict(val_x_trans)
    print("C value = ",c," Accuracy = ",sklearn.metrics.accuracy_score(val_y,preds))

In [None]:
cid_to_cat

In [None]:
for i in range(422,433):
    f = clf.predict(val_x_trans[i,:][np.newaxis,])
    if f == val_y[i]:
        print ("corr ",cid_to_cat[f[0]])
    else:
        print("I - ",i,"Predicted ",cid_to_cat[f[0]], " but was ",cid_to_cat[val_y[i]])


In [None]:
plt.imshow(val_x[377,:].reshape(28,28))

## Fully Connected

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim


In [None]:
class fcmodel(nn.Module):
    def __init__(self,num_hidden=100,inp_dim=784,out_dim=20):
        super().__init__()
        self.lay = nn.Linear(inp_dim,num_hidden)
        self.outl = nn.Linear(num_hidden,out_dim)
    def forward(self,X):
        mid = F.softmax(self.lay(X),dim=1)
        return F.log_softmax(self.outl(mid),dim=1)

In [None]:
CUDA = True
criterion = nn.NLLLoss()
mod = fcmodel(num_hidden=900)
if CUDA:
    mod.cuda()
optimizer = optim.Adam(mod.parameters(), lr=0.001)
BATCH_SIZE = 100

In [None]:
total_loss_prev = 10000
for epoch in range(800):
    total_loss = 0
    for i in range(int(train_x.shape[0]/BATCH_SIZE)):
        X_B = train_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
        Y_B = train_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        if CUDA:
            X_P = Variable(torch.Tensor(X_B)).cuda()
            Y_P = Variable(torch.LongTensor(Y_B)).cuda()
        else:
            X_P = Variable(X_B)
            Y_P = Variable(Y_B)
        bout = mod.forward(X_P)
        optimizer.zero_grad()
        loss = criterion(bout, Y_P)
        loss.backward()
        total_loss += loss.data[0]
        optimizer.step()
        if i%600 == 0:
            print("Epoch ",epoch," Iter - ",i, "Loss ",loss.data[0])
    if total_loss > total_loss_prev:
        break
    else:
        total_loss_prev = total_loss

In [None]:
total_loss

In [None]:
vvx,vvy = val_x,val_y
# vvx,vvy = train_x,train_y
inp = Variable(torch.Tensor(vvx)).cuda()
outs = mod.forward(inp)
_, predicted = torch.max(outs.data, 1)
tem = predicted.cpu().numpy()
print(sklearn.metrics.accuracy_score(vvy,tem))

In [None]:
print(sklearn.metrics.classification_report(val_y,tem,labels=list(range(20)),target_names=[cid_to_cat[ii] for ii in range(20)]))

Validation

In [None]:
num_to_stop = 0
for nh in [10,50,100,250,1000]:
    CUDA = True
    criterion = nn.NLLLoss()
    mod = fcmodel(num_hidden=nh)
    if CUDA:
        mod.cuda()
    optimizer = optim.Adam(mod.parameters(), lr=0.01)
    BATCH_SIZE = 200
    total_loss_prev = 10000
    term = False
    for epoch in range(400):
        total_loss = 0
        for i in range(int(train_x.shape[0]/BATCH_SIZE)):
            X_B = train_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
            Y_B = train_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            if CUDA:
                X_P = Variable(torch.Tensor(X_B)).cuda()
                Y_P = Variable(torch.LongTensor(Y_B)).cuda()
            else:
                X_P = Variable(X_B)
                Y_P = Variable(Y_B)
            bout = mod.forward(X_P)
            optimizer.zero_grad()
            loss = criterion(bout, Y_P)
            loss.backward()
            total_loss += loss.data[0]
            optimizer.step()
#             if i%600 == 0:
#                 print("Epoch ",epoch," Iter - ",i, "Loss ",loss.data[0])
#         print("Loss is ",total_loss)
        if total_loss > total_loss_prev and (num_to_stop>4):
            print("Terminated at epoch = ",epoch)
            break
        elif total_loss > total_loss_prev:
            total_loss_prev = total_loss
            num_to_stop +=1
        else:
            total_loss_prev = total_loss
            num_to_stop = 0
#             term = False
    vvx,vvy = val_x,val_y
    inp = Variable(torch.Tensor(vvx)).cuda()
    outs = mod.forward(inp)
    _, predicted = torch.max(outs.data, 1)
    tem = predicted.cpu().numpy()
    print("Num Hidden =",nh," Accuracy = ",sklearn.metrics.accuracy_score(vvy,tem))

#  CNN One layer

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim

In [None]:
class onecnnmodel(nn.Module):
    def __init__(self,num_filters=80,filter_dim=5,num_hidden=100,out_dim=20):
        super().__init__()
#         num = 20
        self.conv1 = nn.Conv2d(1, num_filters, filter_dim)
        self.inp_hw = 28 
#         self.conv2 = nn.Conv2d(num, num, 1, padding=1)
#         self.conv3 = nn.Conv2d(num, num, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        # conv out - self.inp_hw - filter_dim + 1
        # max pool out  -  ((up - filter_dim) / filter_dim)+1
        self.lay = nn.Linear(int(num_filters*(((self.inp_hw - filter_dim + 1 - 2) / 2)+1)**2),num_hidden)
        self.outl = nn.Linear(num_hidden,out_dim)
    def forward(self,x):
        bs = x.shape[0]
        x = x.view(bs,1,28,28)
        x = F.relu(self.conv1(x))
#         x = F.relu(self.conv2(x))
#         x = F.relu(self.conv3(x))
#         print(x.shape)
        x = self.pool(x)
#         print(x.shape)
        x = x.view(bs,-1)
#         print(x.shape)
#         print("-------------")
        mid = F.softmax(self.lay(x),dim=1)
        x = 0
        y = 0
        return F.log_softmax(self.outl(mid),dim=1)

In [None]:
CUDA = True
criterion = nn.NLLLoss()
mod = onecnnmodel()
if CUDA:
    mod.cuda()
optimizer = optim.Adam(mod.parameters(), lr=0.005)
BATCH_SIZE = 200

In [None]:
for epoch in range(500):
#     av_loss = 0
    for i in range(int(train_x.shape[0]/BATCH_SIZE)):
        X_B = train_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
        Y_B = train_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        if CUDA:
            X_P = Variable(torch.Tensor(X_B)).cuda()
            Y_P = Variable(torch.LongTensor(Y_B)).cuda()
        else:
            X_P = Variable(X_B)
            Y_P = Variable(Y_B)
        bout = mod.forward(X_P)
        optimizer.zero_grad()
        loss = criterion(bout, Y_P)
        loss.backward()
        optimizer.step()
#         av_loss +=  loss.data[0]
        if i%400 == 0:
            print("Epoch ",epoch," Iter - ",i, "Loss ",loss.data[0])
#             av_loss = 0

In [None]:
vvx,vvy = val_x,val_y
# vvx,vvy = train_x,train_y

In [None]:
vvx.shape

In [None]:
preds = []
for i in range(int(vvx.shape[0]/BATCH_SIZE)):
    X_B = vvx[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
    if CUDA:
        X_P = Variable(torch.Tensor(X_B)).cuda()
    else:
        X_P = Variable(X_B)
    bout = mod.forward(X_P)
    _, predicted = torch.max(bout.data, 1)
    tem = list(predicted.cpu().numpy())
    preds += tem

In [None]:
vvy.shape

In [None]:
print(sklearn.metrics.accuracy_score(vvy,preds))

In [None]:
print(sklearn.metrics.classification_report(vvy,preds,labels=list(range(20)),target_names=[cid_to_cat[ii] for ii in range(20)]))

## Validation

In [None]:
num_to_stop = 0
for fil in [2,3,5,7]:
    CUDA = True
    criterion = nn.NLLLoss()
    mod = onecnnmodel(fil)
    if CUDA:
        mod.cuda()
    optimizer = optim.Adam(mod.parameters(), lr=0.001)
    BATCH_SIZE = 200
    total_loss_prev = 10000
    term = False
    for epoch in range(400):
        total_loss = 0
        for i in range(int(train_x.shape[0]/BATCH_SIZE)):
            X_B = train_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
            Y_B = train_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            if CUDA:
                X_P = Variable(torch.Tensor(X_B)).cuda()
                Y_P = Variable(torch.LongTensor(Y_B)).cuda()
            else:
                X_P = Variable(X_B)
                Y_P = Variable(Y_B)
            bout = mod.forward(X_P)
            optimizer.zero_grad()
            loss = criterion(bout, Y_P)
            loss.backward()
            total_loss += loss.data[0]
            optimizer.step()
#             if i%600 == 0:
#                 print("Epoch ",epoch," Iter - ",i, "Loss ",loss.data[0])
#         print("Loss is ",total_loss)
        if total_loss > total_loss_prev and (num_to_stop>4):
            print("Terminated at epoch = ",epoch)
            break
        elif total_loss > total_loss_prev:
            total_loss_prev = total_loss
            num_to_stop +=1
        else:
            total_loss_prev = total_loss
            num_to_stop = 0
#             term = False
    vvx,vvy = val_x,val_y
    inp = Variable(torch.Tensor(vvx)).cuda()
    outs = mod.forward(inp)
    _, predicted = torch.max(outs.data, 1)
    tem = predicted.cpu().numpy()
    print("Num Hidden =",nh," Accuracy = ",sklearn.metrics.accuracy_score(vvy,tem))

In [1]:
plt.imshow(val_x[332,:].reshape(28,28))

NameError: name 'plt' is not defined

## More complex model tries

In [None]:
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# from torch.autograd import Variable
# import torch.optim as optim

In [None]:
class exmodel(nn.Module):
    def __init__(self,num_hidden=10,inp_dim=784,out_dim=20):
        super().__init__()
#         num = 20
#         self.conv =  nn.Conv2d(1, 1, 1)
        self.conv1_1 = nn.Conv2d(1, 10, 2)
        self.conv1_2 = nn.Conv2d(1, 10, 3)
        self.conv1_3 = nn.Conv2d(1, 1, 5)
        self.pool = nn.MaxPool2d(2, 2)
#         self.lay = nn.Linear(10*7*7+5*6*6+1*5*5,num_hidden)
#         self.outl = nn.Linear(num_hidden,out_dim)
        self.outl = nn.Linear(10*13*13+10*13*13+1*12*12,out_dim)

    def forward(self,x):
        bs = x.shape[0]
        x = x.view(bs,1,28,28)
#         x = self.pool(self.conv(x))
#         print(x.shape)
        x_1 = self.pool(F.relu(self.conv1_1(x)))
        x_2 = self.pool(F.relu(self.conv1_2(x)))
        x_3 = self.pool(F.relu(self.conv1_3(x)))
        
#         x = F.relu(self.conv2(x))
#         x = F.relu(self.conv3(x))
#         x = self.pool(x)
#         print(x_1.shape)
#         print(x_2.shape)
#         print(x_3.shape)
        x_1 = x_1.view(bs,-1)
        x_2 = x_2.view(bs,-1)    
        x_3 = x_3.view(bs,-1)
    
        x = torch.cat((x_1,x_2,x_3),dim=1)
#         print(x.shape)
        mid = x
#         mid = F.softmax(self.lay(x),dim=1)
        return F.log_softmax(self.outl(mid),dim=1)

In [None]:
# CUDA = True
# criterion = nn.NLLLoss()
# mod = exmodel()
# if CUDA:
#     mod.cuda()
# optimizer = optim.Adam(mod.parameters(), lr=0.001)
# BATCH_SIZE = 200

In [None]:
# for epoch in range(800):
# #     av_loss = 0
#     for i in range(int(train_x.shape[0]/BATCH_SIZE)):
#         X_B = train_x[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
#         Y_B = train_y[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
#         if CUDA:
#             X_P = Variable(torch.Tensor(X_B)).cuda()
#             Y_P = Variable(torch.LongTensor(Y_B)).cuda()
#         else:
#             X_P = Variable(X_B)
#             Y_P = Variable(Y_B)
#         bout = mod.forward(X_P)
#         optimizer.zero_grad()
#         loss = criterion(bout, Y_P)
#         loss.backward()
#         optimizer.step()
# #         av_loss +=  loss.data[0]
#         if i%50 == 0:
#             print("Epoch ",epoch," Iter - ",i, "Loss ",loss.data[0])
# #             av_loss = 0

# vvx,vvy = val_x,val_y
# # vvx,vvy = train_x,train_y

# vvx.shape

# preds = []
# for i in range(int(vvx.shape[0]/BATCH_SIZE)):
#     X_B = vvx[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
#     if CUDA:
#         X_P = Variable(torch.Tensor(X_B)).cuda()
#     else:
#         X_P = Variable(X_B)
#     bout = mod.forward(X_P)
#     _, predicted = torch.max(bout.data, 1)
#     tem = list(predicted.cpu().numpy())
#     preds += tem

# vvy.shape

# print(sklearn.metrics.accuracy_score(vvy,preds))

# print(sklearn.metrics.classification_report(vvy,preds,labels=list(range(20)),target_names=[cid_to_cat[ii] for ii in range(20)]))

In [None]:
# for i in range(322,333):
#     f = clf.predict(val_x_trans[i,:][np.newaxis,])
#     if f == val_y[i]:
#         print ("corr ",cid_to_cat[f[0]])
#     else:
#         print("I - ",i,"Predicted ",cid_to_cat[f[0]], " but was ",cid_to_cat[val_y[i]])

In [None]:
# plt.imshow(val_x[332,:].reshape(28,28))

In [None]:
# print ('score', clf.score(val_x_trans, val_y))
# print ('pred label', clf.predict(val_x_trans)[:20])