In [48]:
import xgboost as xbg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import pickle
import joblib

%matplotlib inline

def load_p(fname):
    return np.array(pickle.load(open(fname,"rb")))

In [49]:
X_train_cls = load_p("/scratch/pbanerj6/sml-dataset/X_train_cls.p")
X_val_cls = load_p("/scratch/pbanerj6/sml-dataset/X_val_cls.p")
X_test_cls = load_p("/scratch/pbanerj6/sml-dataset/X_test_cls.p")
X_train_all = load_p("/scratch/pbanerj6/sml-dataset/X_train_all_90.pkl")
X_val_all = load_p("/scratch/pbanerj6/sml-dataset/X_val_all_90.pkl")
X_test_all = load_p("/scratch/pbanerj6/sml-dataset/X_test_all_90.pkl")

X_train = np.concatenate([X_train_cls,X_train_all],axis=1)
X_val = np.concatenate([X_val_cls,X_val_all],axis=1)
X_test = np.concatenate([X_test_cls,X_test_all],axis=1)

In [50]:
y_train = load_p("/scratch/pbanerj6/sml-dataset/y_train.p")
y_val = load_p("/scratch/pbanerj6/sml-dataset/y_val.p")
y_test = load_p("/scratch/pbanerj6/sml-dataset/y_test.p")

In [51]:
import torch
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from tqdm import tqdm
import os

In [52]:
class MyDataset(Dataset):
    def __init__(self, data, target, transform=None):
        self.data = torch.from_numpy(data).float()
        self.target = torch.from_numpy(target).long()
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        if self.transform:
            x = self.transform(x)
        
        return x, y
    
    def __len__(self):
        return len(self.data)

In [53]:
train_dataset = MyDataset(X_train,y_train)
val_dataset = MyDataset(X_val,y_val)
test_dataset = MyDataset(X_test,y_test)

In [54]:
train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=1,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=1,
    pin_memory=True
)

In [68]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
# N, D_in, H, D_out = 64, 768, 200, 2
N , D_in, H1, H2, D_out = 32, 6768, 500, 100, 2

In [69]:
# model = torch.nn.Sequential(
#     torch.nn.Linear(D_in, H),
#     torch.nn.ReLU(),
#     torch.nn.Linear(H, D_out),
# )

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H1),
    torch.nn.ReLU(),
    torch.nn.Linear(H1, H2),
    torch.nn.ReLU(),
    torch.nn.Linear(H2, D_out),
)

model.to('cuda')
model = torch.nn.DataParallel(model) 

loss_fn = nn.CrossEntropyLoss()


In [70]:
def accuracy(out, labels):
    outputs = np.argmax(out, axis=1)
    return np.sum(outputs == labels)

In [71]:
learning_rate = 1e-6
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

patience = 20

max_val_acc = 0.0
for t in tqdm(range(100),desc="Epoch"):
    model.train()
    total_loss = 0.0
    tmp_acc = 0.0
    steps = 0
    nb_eval_examples = 0
    for batch_idx, (x, y) in enumerate(train_loader):
        steps+=1
        x=x.to('cuda')
        y=y.to('cuda')
        logits = model(x)
        loss = loss_fn(logits, y)
        logits = logits.detach().cpu().numpy()
        label_ids = y.to('cpu').numpy()
        tmp_acc += accuracy(logits, label_ids)
        # Compute and print loss.
        
        total_loss += loss.mean()
        # Before the backward pass, use the optimizer object to zero all of the
        # gradients for the variables it will update (which are the learnable
        # weights of the model). This is because by default, gradients are
        # accumulated in buffers( i.e, not overwritten) whenever .backward()
        # is called. Checkout docs of torch.autograd.backward for more details.
        optimizer.zero_grad()

        # Backward pass: compute gradient of the loss with respect to model
        # parameters
        loss.backward()

        # Calling the step function on an Optimizer makes an update to its
        # parameters
        optimizer.step()
        nb_eval_examples += x.size(0)
    
    model.eval()
    val_acc = 0.0
    steps = 0
    nb_val_examples = 0
    for batch_idx, (x, y) in enumerate(val_loader):
        steps+=1
        logits = model(x)
        x=x.to('cuda')
        y=y.to('cuda')
        logits = logits.detach().cpu().numpy()
        label_ids = y.to('cpu').numpy()
        val_acc += accuracy(logits, label_ids)
        nb_val_examples += x.size(0)
    
    if max_val_acc < val_acc/nb_val_examples:
        patience = 20
        print("Saving Best Model!!")
        max_val_acc = val_acc/nb_val_examples
        model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
        output_model_file = os.path.join("/scratch/pbanerj6/sml-nn-v1", "best_model.bin")
        torch.save(model_to_save.state_dict(), output_model_file)
    else:
        patience-=1
        
    if patience == 0:
        print("Validation Accuracy Did Not Improve After", patience, "Epochs. Stopping.")
        break
    
    
    print("Epoch:",t,"Loss:",total_loss.item()/steps,"Train Accuracy:",(tmp_acc/nb_eval_examples),"Val Accuracy:",(val_acc/nb_val_examples))







Epoch:   0%|          | 0/100 [00:00<?, ?it/s][A[A[A[A[A




Epoch:   1%|          | 1/100 [00:09<15:54,  9.64s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 0 Loss: 3.3673906042220745 Train Accuracy: 0.59124 Val Accuracy: 0.5886666666666667







Epoch:   2%|▏         | 2/100 [00:19<15:42,  9.62s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 1 Loss: 3.206304282330452 Train Accuracy: 0.6668533333333333 Val Accuracy: 0.6149333333333333







Epoch:   3%|▎         | 3/100 [00:28<15:33,  9.62s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 2 Loss: 3.0506264544547874 Train Accuracy: 0.6996266666666666 Val Accuracy: 0.6272







Epoch:   4%|▍         | 4/100 [00:38<15:27,  9.66s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 3 Loss: 2.904498940325798 Train Accuracy: 0.72408 Val Accuracy: 0.6356







Epoch:   5%|▌         | 5/100 [00:48<15:17,  9.65s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 4 Loss: 2.7719053877160906 Train Accuracy: 0.7410266666666666 Val Accuracy: 0.6425333333333333







Epoch:   6%|▌         | 6/100 [00:57<15:06,  9.64s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 5 Loss: 2.653440824468085 Train Accuracy: 0.7554933333333334 Val Accuracy: 0.6452666666666667







Epoch:   7%|▋         | 7/100 [01:07<14:57,  9.65s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 6 Loss: 2.548600087267287 Train Accuracy: 0.7685466666666667 Val Accuracy: 0.6505333333333333







Epoch:   8%|▊         | 8/100 [01:17<14:48,  9.65s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 7 Loss: 2.4535364029255318 Train Accuracy: 0.7804933333333334 Val Accuracy: 0.6528







Epoch:   9%|▉         | 9/100 [01:26<14:37,  9.65s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 8 Loss: 2.365719279837101 Train Accuracy: 0.79164 Val Accuracy: 0.6544
Saving Best Model!!







Epoch:  10%|█         | 10/100 [01:45<18:28, 12.32s/it][A[A[A[A[A

Epoch: 9 Loss: 2.283501288231383 Train Accuracy: 0.8024533333333334 Val Accuracy: 0.6552







Epoch:  11%|█         | 11/100 [01:55<17:05, 11.52s/it][A[A[A[A[A

Saving Best Model!!
Epoch: 10 Loss: 2.205965602144282 Train Accuracy: 0.8129333333333333 Val Accuracy: 0.6612666666666667







Epoch:  12%|█▏        | 12/100 [02:04<16:09, 11.01s/it][A[A[A[A[A

Epoch: 11 Loss: 2.1319657995345747 Train Accuracy: 0.8223066666666666 Val Accuracy: 0.6586666666666666







Epoch:  13%|█▎        | 13/100 [02:14<15:23, 10.61s/it][A[A[A[A[A

Epoch: 12 Loss: 2.0607481611535903 Train Accuracy: 0.832 Val Accuracy: 0.6598







Epoch:  14%|█▍        | 14/100 [02:23<14:43, 10.27s/it][A[A[A[A[A

Epoch: 13 Loss: 1.992646172706117 Train Accuracy: 0.8398533333333333 Val Accuracy: 0.6609333333333334







Epoch:  15%|█▌        | 15/100 [02:34<14:40, 10.36s/it][A[A[A[A[A

Epoch: 14 Loss: 1.92705532642121 Train Accuracy: 0.8471066666666667 Val Accuracy: 0.6609333333333334







Epoch:  16%|█▌        | 16/100 [02:44<14:26, 10.32s/it][A[A[A[A[A

Epoch: 15 Loss: 1.863314494680851 Train Accuracy: 0.8546533333333334 Val Accuracy: 0.6548







Epoch:  17%|█▋        | 17/100 [02:55<14:15, 10.31s/it][A[A[A[A[A

Epoch: 16 Loss: 1.8017122309258644 Train Accuracy: 0.8619466666666666 Val Accuracy: 0.6556666666666666







Epoch:  18%|█▊        | 18/100 [03:05<14:07, 10.34s/it][A[A[A[A[A

Epoch: 17 Loss: 1.7421497101479388 Train Accuracy: 0.8679066666666667 Val Accuracy: 0.6512666666666667







Epoch:  19%|█▉        | 19/100 [03:16<14:05, 10.44s/it][A[A[A[A[A

Epoch: 18 Loss: 1.6838941208859708 Train Accuracy: 0.87376 Val Accuracy: 0.6500666666666667







Epoch:  20%|██        | 20/100 [03:26<13:57, 10.47s/it][A[A[A[A[A

Epoch: 19 Loss: 1.6272846627742685 Train Accuracy: 0.8802533333333333 Val Accuracy: 0.6488666666666667







Epoch:  21%|██        | 21/100 [03:37<13:51, 10.52s/it][A[A[A[A[A

Epoch: 20 Loss: 1.5721344643450799 Train Accuracy: 0.8858266666666667 Val Accuracy: 0.6440666666666667







Epoch:  22%|██▏       | 22/100 [03:48<13:52, 10.67s/it][A[A[A[A[A

Epoch: 21 Loss: 1.5180016050947474 Train Accuracy: 0.8920533333333334 Val Accuracy: 0.6454666666666666







Epoch:  23%|██▎       | 23/100 [03:58<13:29, 10.52s/it][A[A[A[A[A

Epoch: 22 Loss: 1.4654466994265292 Train Accuracy: 0.8967333333333334 Val Accuracy: 0.6464666666666666







Epoch:  24%|██▍       | 24/100 [04:09<13:22, 10.56s/it][A[A[A[A[A

Epoch: 23 Loss: 1.4141362616356383 Train Accuracy: 0.9025333333333333 Val Accuracy: 0.6418







Epoch:  25%|██▌       | 25/100 [04:19<13:10, 10.54s/it][A[A[A[A[A

Epoch: 24 Loss: 1.363835760887633 Train Accuracy: 0.9077333333333333 Val Accuracy: 0.6407333333333334







Epoch:  26%|██▌       | 26/100 [04:30<13:02, 10.58s/it][A[A[A[A[A

Epoch: 25 Loss: 1.3142022315492021 Train Accuracy: 0.9128933333333333 Val Accuracy: 0.6397333333333334







Epoch:  27%|██▋       | 27/100 [04:41<12:56, 10.64s/it][A[A[A[A[A

Epoch: 26 Loss: 1.2650773718001995 Train Accuracy: 0.9184533333333333 Val Accuracy: 0.64







Epoch:  28%|██▊       | 28/100 [04:51<12:48, 10.67s/it][A[A[A[A[A

Epoch: 27 Loss: 1.2171439962184176 Train Accuracy: 0.9241333333333334 Val Accuracy: 0.6370666666666667







Epoch:  29%|██▉       | 29/100 [05:02<12:39, 10.70s/it][A[A[A[A[A

Epoch: 28 Loss: 1.1698026616522605 Train Accuracy: 0.9285466666666666 Val Accuracy: 0.6310666666666667







Epoch:  30%|███       | 30/100 [05:13<12:28, 10.70s/it][A[A[A[A[A

Epoch: 29 Loss: 1.1232398499833776 Train Accuracy: 0.9333466666666667 Val Accuracy: 0.6308
Validation Accuracy Did Not Improve After 0 Epochs. Stopping.


In [72]:
# model = torch.nn.Sequential(
#     torch.nn.Linear(D_in, H),
#     torch.nn.ReLU(),
#     torch.nn.Linear(H, D_out),
# )

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H1),
    torch.nn.ReLU(),
    torch.nn.Linear(H1, H2),
    torch.nn.ReLU(),
    torch.nn.Linear(H2, D_out),
)
output_model_file = os.path.join("/scratch/pbanerj6/sml-nn-v1", "best_model.bin")
model.load_state_dict(torch.load(output_model_file))
model.eval()

Sequential(
  (0): Linear(in_features=6768, out_features=500, bias=True)
  (1): ReLU()
  (2): Linear(in_features=500, out_features=100, bias=True)
  (3): ReLU()
  (4): Linear(in_features=100, out_features=2, bias=True)
)

In [73]:
from scipy.stats import rankdata
from scipy.special import softmax

def mrrs(out, labels):
#     print(out,labels)
    outputs = np.argmax(out,axis=1)
    mrr = 0.0 
    for label,ranks in zip(labels,out):
        ranks = rankdata(ranks*-1)
        rank = ranks[label]
#         print(rank,ranks)
        mrr+=1/rank
    return mrr/len(labels)

def precision_at(out,labels,prank=1):
    count = 0
    for label,ranks in zip(labels,out):
        ranks = rankdata(ranks*-1)
        rank = ranks[label]
#         print(rank,ranks)
        if rank <= prank:
            count+=1
    return count/len(labels)

def mrrwrapper(qid2c,qid2indexmap,preds_prob):
    labels = []
    out = []
    for qid in qid2c.keys():
        scores = []
        for ix in qid2indexmap[qid]:
            if len(scores) < 6:
                scores.append(preds_prob[ix][1])
        if len(scores) < 6:
            continue
        out.append(scores)
        labels.append(int(qid2c[qid]))
    return mrrs(np.array(out),labels),precision_at(np.array(out),labels,1),precision_at(np.array(out),labels,3)

def load_ranking(fname):
    return pickle.load(open("/scratch/pbanerj6/sml-dataset/ranking_"+fname+".p","rb"))

qid2c,qid2indexmap = load_ranking("test")

model.to('cuda')
model = torch.nn.DataParallel(model) 
model.eval()
val_acc = 0.0
steps = 0
nb_val_examples = 0

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=1,
    pin_memory=1
)

all_preds = []

for batch_idx, (x, y) in enumerate(test_loader):
    x=x.to('cuda')
    y=y.to('cuda')
    logits = model(x)
    logits = logits.detach().cpu().numpy()
    
    for pred in logits:
        all_preds.append(softmax(pred))
    
    label_ids = y.to('cpu').numpy()
    val_acc += accuracy(logits, label_ids)
    nb_val_examples += x.size(0)
    
mrr = mrrwrapper(qid2c,qid2indexmap,all_preds)
print("Test Accuracy:",(val_acc/nb_val_examples),"MRR:",(mrr))

Test Accuracy: 0.6488 MRR: (0.6115571428571458, 0.3873333333333333, 0.8013333333333333)


In [40]:
# 100 : Test Accuracy: 0.6601333333333333 MRR: (0.6016777777777803, 0.38, 0.7846666666666666)
# 200 : Test Accuracy: 0.6572 MRR: (0.6074333333333362, 0.38666666666666666, 0.7813333333333333)
# 200-100 : Test Accuracy: 0.6604666666666666 MRR: (0.6049000000000025, 0.38333333333333336, 0.7853333333333333)
# 300-100 : Test Accuracy: 0.6583333333333333 MRR: (0.6061682539682567, 0.38666666666666666, 0.772)

# Both
# 300-100 , Test Accuracy: 0.6492666666666667 MRR: (0.6176015873015903, 0.398, 0.8046666666666666)
# 500-100, Test Accuracy: 0.6488 MRR: (0.6115571428571458, 0.3873333333333333, 0.8013333333333333) 

In [41]:
X_train.shape

(75000, 768)

In [2]:
import torch

In [5]:
a = torch.ones([100,100])
b = torch.ones([100,100])

In [11]:
torch.cat([a,b],1).size()

torch.Size([100, 200])