#**Read Me**

**Files:** I have used the **hw1p2_float16** dataset from Fall 2019 as it is smaller in size. The paths for the files needs to be changed.

**Dataloader** In the dataloader, I have padded the feature vector and stacked both the features and labels as one large 2D array each in the **init** part. The concatenation of frames is done in the **get item** part. It might take a long time to load the train data into the train loader depending on the system. 

**Model** I have used k=13 (input_size=1080), batch size 0f 256, initialized the model with xavier initialization, used batch norm after activations and used GeLU as activation function (torch 1.4 required). I have used Adam Optimizer with default learning rate and reduced the learning rate by 0.5 after every 5 epochs and ran it for around 30 epochs (I do not exactly remember how many epochs I ran, 30 is a conservative estimate).




In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
!pip3 install torch===1.4.0 torchvision===0.5.0 -f https://download.pytorch.org/whl/torch_stable.html

In [0]:
import numpy as np
import torch
import sys
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils import data
from torchvision import transforms
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
import time

cuda = torch.cuda.is_available()
cuda
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [0]:
print(torch.__version__)

In [0]:
train = np.load('/content/drive/My Drive/train1.npy',allow_pickle=True)
train_labels = np.load('/content/drive/My Drive/train_labels.npy',allow_pickle=True)
dev = np.load('/content/drive/My Drive/dev1.npy',allow_pickle=True)
dev_labels = np.load('/content/drive/My Drive/dev_labels.npy',allow_pickle=True)
test =  np.load('/content/drive/My Drive/test1.npy',allow_pickle=True)

In [0]:
class MyDataset(data.Dataset):
    def __init__(self, X,Y,k):
       
        self.X = X
        self.Y = Y
        self.k = k
        self.samples = []
        self.labels = []
        self.length = []
        self._init_dataset()
        self.ind = np.arange(self.length[-1])
        km = [self.k*(2*i+1) for i in range(len(self.length))]
        
        b = 0
        for i in range(self.length[-1]):
            if i == self.length[b]:
                b = b+1
                self.ind[i] = self.ind[i] + km[b]
            else:
                self.ind[i] = self.ind[i] + km[b]
        

    def __len__(self):
        print(len(self.samples),len(self.labels))
        return len(self.labels)

    def __getitem__(self,index):
        X = np.concatenate((self.samples[self.ind[index]-self.k:self.ind[index]+ self.k+1]),axis=0)
        labels = self.labels[index]
        return torch.from_numpy(X).float(),torch.tensor(labels).long()
    
    def _init_dataset(self):
        s = 0
        for i in range(len(self.X)):
            p = np.pad(self.X[i], ((self.k, self.k), (0, 0)), 'constant', constant_values=0)
            s = s + len(self.X[i])
            self.length.append(s)
            self.samples = self.samples + list(p)
            self.labels = self.labels + list(self.Y[i]) 

         
        return np.array(self.samples), np.array(self.labels)

In [0]:
class TestDataset(data.Dataset):
    def __init__(self, X,k):
       
        self.X = X
        self.k = k
        self.samples = []
        self.length = []
        self._init_dataset()
        self.ind = np.arange(self.length[-1])
        km = [self.k*(2*i+1) for i in range(len(self.length))]
        
        b = 0
        for i in range(self.length[-1]):
            if i == self.length[b]:
                b = b+1
                self.ind[i] = self.ind[i] + km[b]
            else:
                self.ind[i] = self.ind[i] + km[b]
        

    def __len__(self):
        print(len(self.samples),self.length[-1])
        return self.length[-1]

    def __getitem__(self,index):
        X = np.concatenate((self.samples[self.ind[index]-self.k:self.ind[index]+ self.k+1]),axis=0)
        return torch.from_numpy(X).float()
    
    def _init_dataset(self):
        s = 0
        for i in range(len(self.X)):
            p = np.pad(self.X[i], ((self.k, self.k), (0, 0)), 'constant', constant_values=0)
            s = s + len(self.X[i])
            self.length.append(s)
            self.samples = self.samples + list(p)
         
        return np.array(self.samples)        

In [0]:
#num_workers = 0 if sys.platform == 'win32' else 2
num_workers = 8 if cuda else 0 
    
# Training
train_dataset = MyDataset(train, train_labels,13)

train_loader_args = dict(shuffle=True, batch_size=256, num_workers=num_workers, pin_memory=True)

train_loader = data.DataLoader(train_dataset, **train_loader_args)



In [0]:
# Validation
num_workers = 8 
val_dataset = MyDataset(dev, dev_labels,13)
val_loader_args = dict(shuffle=False, batch_size=256, num_workers=num_workers, pin_memory=True)
val_loader = data.DataLoader(val_dataset, **val_loader_args)

In [0]:
# Testing
test_dataset = TestDataset(test,13)
test_loader_args = dict(shuffle=False, batch_size=1, num_workers=num_workers, pin_memory=True)
test_loader = data.DataLoader(test_dataset, **test_loader_args)

In [0]:
def init_xavier(m):
  if type(m) == nn.Linear:
    fan_in = m.weight.size()[1]
    fan_out = m.weight.size()[0]
    std = np.sqrt(1.0/(fan_in + fan_out))
    m.weight.data.normal_(0,std)

In [0]:
def init_hey(m):
  if type(m) == nn.Linear:
    fan_in = m.weight.size()[1]
    fan_out = m.weight.size()[0]
    std = np.sqrt(2.0/(fan_in + fan_out))
    m.weight.data.normal_(0,std)

In [0]:
# SIMPLE MODEL DEFINITION
class Simple_MLP(nn.Module):
    def __init__(self, size_list):
        super(Simple_MLP, self).__init__()
        layers = []
        self.size_list = size_list
        for i in range(len(size_list) - 2):
            layers.append(nn.Linear(size_list[i],size_list[i+1]))
            #layers.append(nn.ReLU())
            layers.append(nn.GELU())
            layers.append(nn.BatchNorm1d(size_list[i+1]))
            #layers.append(nn.Dropout(0.04*i,True))
        layers.append(nn.Linear(size_list[-2], size_list[-1]))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)


In [0]:
model = Simple_MLP([1080, 2048, 2048,  1024, 1024, 1024, 512, 512, 256, 138])
model.apply(init_xavier)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
#scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True)
device = torch.device("cuda" if cuda else "cpu")
model.to(device)
print(model)

In [0]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    total_predictions = 0.0
    correct_predictions = 0.0
    model.to(device)
    
    start_time = time.time()
    
    # Print Learning Rate
    
    for batch_idx, (data, target) in enumerate(train_loader):   
        optimizer.zero_grad()   # .backward() accumulates gradients
        data = data.to(device)
        target = target.to(device) # all data & model on same device

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        
        total_predictions += target.size(0)
        correct_predictions += (predicted == target).sum().item()
        
        loss = criterion(outputs, target)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
    scheduler.step()
    end_time = time.time()
    
    running_loss /= len(train_loader)
    acc = (correct_predictions/total_predictions)*100.0
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
    print('Training Accuracy: ', acc, '%')
    return running_loss,acc

In [0]:
def val_model(model, val_loader, criterion):
    with torch.no_grad():
        model.eval()
        model.to(device)

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(val_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data)

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()


        running_loss /= len(val_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc


In [0]:
def test_model(model, test_loader):
    with torch.no_grad():
        model.eval()
        pred = []

        for batch_idx, (data) in enumerate(test_loader):   
            data = data.to(device)
            outputs = model(data)

            _, predicted = torch.max(outputs.data, 1)
            pred.append(predicted.cpu().numpy()[0])

        return np.array(pred)


In [0]:
n_epochs = 30
Train_acc = []
Train_loss = []
Val_loss = []
Val_acc = []

for i in range(n_epochs):
    print('Epoch: ',i+1)
    print('LR: ', scheduler.get_lr())
    train_loss,acc = train_epoch(model, train_loader, criterion, optimizer)
    test_loss, test_acc = val_model(model, val_loader, criterion)
    Train_loss.append(train_loss)
    Train_acc.append(acc)
    Val_loss.append(test_loss)
    Val_acc.append(test_acc)
    print('='*20)
    #scheduler.step(test_acc)
    torch.save(model.state_dict(), '/content/drive/My Drive/model2.pt')

In [0]:
pred= test_model(model, test_loader)

In [0]:
with open('/content/drive/My Drive/mbarman7.csv', 'w') as w:
    w.write('id,label\n')
    for i in range(len(pred)):
            w.write(str(i)+','+str(pred[i])+'\n')

In [0]:
plt.title('Training Loss')
plt.xlabel('Epoch Number')
plt.ylabel('Loss')
plt.plot(Train_loss)

In [0]:
plt.title('Val Loss')
plt.xlabel('Epoch Number')
plt.ylabel('Loss')
plt.plot(Val_loss)

In [0]:
plt.title('Val Accuracy')
plt.xlabel('Epoch Number')
plt.ylabel('Accuracy (%)')
plt.plot(Val_acc)