# PyTorch Simple Neural Network

    1. restart and Run all would work fine. 
    2. Just make sure to change the paths properly. 
    3. The code takes a long time even in GPUs (about 3 to 4 hrs). Nonetheless, the code is written in a such a way even if it stops in between, it continues from where it left instead of starting from the beginning. 

In [None]:
import torch 
import torch.nn as nn 
import torch.optim as optim 
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F

import copy

import numpy as np
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

from sklearn.model_selection import KFold, train_test_split
import matplotlib.pyplot as plt
import pickle
import os

    1. Network initializes weights for 4 layers, but for forward and backward passes, uses only num_layers provided. So, we can use the same network for 1, 2, 3, and 4 layer Neural networks. 
    2. Layer, I found a better way to initialize NN with variable number of hidden units. 

In [None]:
class Simple_NN(nn.Module):
    def __init__(self, input_dim, hidd_dim, dropout_rate=0.5, num_layers=3, 
                activation='relu'):
        super(Simple_NN, self).__init__()
        
        if activation == 'relu': 
            act_fn = nn.ReLU()
        elif  activation == 'leakyrelu' : 
            act_fn = nn.LeakyReLU()
            
        
        self.linear1 = nn.Sequential(
                            nn.Linear(input_dim, hidd_dim), 
                            nn.Dropout(p=dropout_rate), 
                            act_fn)
        
        self.linear2 = nn.Sequential(
                            nn.Linear(hidd_dim, hidd_dim), 
                            nn.Dropout(p=dropout_rate) , 
                            act_fn)
        
        self.linear3 = nn.Sequential(
                            nn.Linear(hidd_dim, hidd_dim), 
                            nn.Dropout(p=dropout_rate) , 
                            act_fn)
        
        self.linear4 = nn.Sequential(
                            nn.Linear(hidd_dim, hidd_dim), 
                            nn.Dropout(p=dropout_rate) , 
                            act_fn)
        
        self.linear_final = nn.Linear(hidd_dim, 2)
        
        self.num_layers = num_layers

    def forward(self, x ): 
        x = self.linear1(x)
        if self.num_layers > 1: 
            x = self.linear2(x)
            if self.num_layers > 2: 
                x = self.linear3(x)
                if self.num_layers > 3: 
                    x = self.linear4(x)
                    
        x = self.linear_final(x)
        return x

In [None]:
from data_processing import *

train_path = '../data/coupons/train.csv'
test_path = '../data/coupons/test.csv'
sample_path = '../data/coupons/sample_submission1.csv'

data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

In [None]:
criterion = nn.CrossEntropyLoss()
criterion.to(device)

In [None]:
n_splits = 10

kf = KFold(n_splits=n_splits, random_state=True, shuffle=True)


In [None]:
drop_first = False
prune=False

X_df, y_df = clean_all(data, drop_first, prune)

X = np.array(X_df)
y = np.array(y_df)


data_folds = []


for train_index, val_index in kf.split(X):
    X_train_np, X_val_np = X[train_index], X[val_index]
    y_train_np, y_val_np = y[train_index], y[val_index]
    data_folds.append( (X_train_np, X_val_np, y_train_np, y_val_np) )
    

In [None]:
X.shape

In [None]:
def eval_model(eval_dataloader, model):
    pred = []
    true = []
    eval_loss = 0 
    
    model.eval()
    for i, (x, y) in enumerate(eval_dataloader): 
        
        with torch.no_grad(): 
            out = model(x) 
            loss = criterion(out, y)
            eval_loss += loss 
            pred_labels = torch.argmax(out, axis=1)
            
            pred.extend(pred_labels)
            true.extend(y)
        
    pred = torch.tensor(pred).cpu().numpy()
    true = torch.tensor(true).cpu().numpy()
    
    eval_loss = eval_loss/(i+1)
    
    acc = np.mean(pred == true)
    model.train()
    return acc, eval_loss
    


In [None]:
def test_model(test_dataloader, model):
    pred = []
    pred_probabilities = torch.tensor([]).to(device)
    
    model.eval()
    for i, (x, y) in enumerate(test_dataloader): 
        
        with torch.no_grad(): 
            out = model(x) 
            pred_labels = torch.argmax(out, axis=1)
            pred_prob = torch.softmax(out, dim=1)
            
            pred.extend(pred_labels)
            pred_probabilities = torch.cat( [pred_probabilities, pred_prob], axis=0)
        
    pred = torch.tensor(pred).cpu().numpy()
    pred_probabilities = torch.tensor(pred_probabilities).cpu().numpy()
    
    model.train()
    return pred, pred_probabilities
    


In [None]:
def get_dataloaders(X_train_np, X_val_np, y_train_np, y_val_np, batch_size=64): 
    
    X_train = torch.tensor(X_train_np).type(torch.FloatTensor).to(device)
    X_val = torch.tensor(X_val_np).type(torch.FloatTensor).to(device)

    y_train = torch.tensor(y_train_np).to(device)
    y_val = torch.tensor(y_val_np).to(device)


    train_dataset = TensorDataset(X_train, y_train) # create your datset
    train_dataloader =  DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

    eval_dataset = TensorDataset(X_val, y_val) # create your datset
    eval_dataloader =  DataLoader(dataset=eval_dataset, batch_size=512, shuffle=False)
    
    return train_dataloader, eval_dataloader

    

In [None]:
X_test_df, _ = clean_all(test_data, drop_first, prune)
X_test_np = np.array(X_test_df)

X_test = torch.tensor(X_test_np).type(torch.FloatTensor).to(device)
Y_test = torch.zeros( (X_test_np.shape[0]) , dtype=torch.int64).to(device)

test_dataset = TensorDataset(X_test, Y_test)
test_dataloader =  DataLoader(dataset=test_dataset, batch_size=512, shuffle=False)


### Selected List of Hyperparameters after basic exploration

In [None]:
hyper_pram_list = [
    
    (1000, 0.8, 4, 512, 'relu'),
    (1000, 0.75, 4, 512, 'relu'), 
    (1000, 0.85, 4, 512, 'relu'), 
    (1000, 0.8, 4, 64, 'relu'), 
    (1000, 0.8, 4, 32, 'relu'),
    
    (1000, 0.8, 3, 512, 'relu'),
    (1000, 0.75, 3, 512, 'relu'), 
    (1000, 0.85, 3, 512, 'relu'), 
    (1000, 0.8, 3, 64, 'relu'), 
    (1000, 0.8, 3, 32, 'relu'),
    
    (1000, 0.8, 2, 512, 'relu'), 
    (1000, 0.75, 2, 512, 'relu'),
    (1000, 0.85, 2, 512, 'relu'), 
    (1000, 0.8, 2, 64, 'relu'), 
    (1000, 0.8, 2, 32, 'relu'), 
    
    (1000, 0.8, 1, 512, 'relu'), 
    (1000, 0.75, 1, 512, 'relu'), 
    (1000, 0.85, 1, 512, 'relu'), 
    (1000, 0.8, 1, 64, 'relu'), 
    (1000, 0.8, 1, 32, 'relu'), 
    
    (5000, 0.85, 1, 512, 'relu'), 
    (5000, 0.85, 1, 64, 'relu'), 
    (5000, 0.85, 1, 32, 'relu'), 
    
    (1000, 0.8, 4, 512, 'leakyrelu'),
    (1000, 0.75, 4, 512, 'leakyrelu'), 
    (1000, 0.85, 4, 512, 'leakyrelu'), 
    (1000, 0.8, 4, 64, 'leakyrelu'), 
    (1000, 0.8, 4, 32, 'leakyrelu'),

    (1000, 0.8, 3, 512, 'leakyrelu'),
    (1000, 0.75, 3, 512, 'leakyrelu'), 
    (1000, 0.85, 3, 512, 'leakyrelu'), 
    (1000, 0.8, 3, 64, 'leakyrelu'), 
    (1000, 0.8, 3, 32, 'leakyrelu'),
    
    (1000, 0.8, 2, 512, 'leakyrelu'), 
    (1000, 0.75, 2, 512, 'leakyrelu'),
    (1000, 0.85, 2, 512, 'leakyrelu'), 
    (1000, 0.8, 2, 64, 'leakyrelu'), 
    (1000, 0.8, 2, 32, 'leakyrelu'), 
    
    (1000, 0.8, 1, 512, 'leakyrelu'), 
    (1000, 0.75, 1, 512, 'leakyrelu'), 
    (1000, 0.85, 1, 512, 'leakyrelu'), 
    (1000, 0.8, 1, 64, 'leakyrelu'), 
    (1000, 0.8, 1, 32, 'leakyrelu'), 
    
    (5000, 0.85, 1, 512, 'leakyrelu'), 
    (5000, 0.85, 1, 64, 'leakyrelu'), 
    (5000, 0.85, 1, 32, 'leakyrelu'), 
]

print('HYP Len:', len(hyper_pram_list))

In [None]:
try: 
    filename = 'simple_nn_predictions_prob.pk'
    filepath = os.path.join('../data/coupons/all_outputs/', filename)
    all_best_predictions = pickle.load(open(filepath, 'rb+') )


    filename = 'simple_nn_accs_prob.pk'
    filepath = os.path.join('../data/coupons/all_outputs/', filename)
    all_best_acc = pickle.load(open(filepath, 'rb+') )
except: 
    all_best_predictions = {}
    all_best_acc = {}


for i, (hidd_dim, drate, num_layers, bs, activation) in enumerate(hyper_pram_list): 
    
    if (hidd_dim, drate, num_layers, bs, activation, 0)  in all_best_acc: 
        continue 
    
    print('Hyperparameter {} of {}'.format(i+1, len(hyper_pram_list)))
    

    avg_acc = 0 

    for num_val in range(n_splits): 

        X_train_np, X_val_np, y_train_np, y_val_np = data_folds[num_val]
        train_dataloader, eval_dataloader = get_dataloaders(X_train_np, X_val_np, y_train_np, y_val_np, batch_size=bs)

        model = Simple_NN(input_dim=111, hidd_dim=hidd_dim, dropout_rate=drate, 
                        num_layers=num_layers, activation=activation )

        model.to(device)

        optimizer = torch.optim.Adam(model.parameters())

        train_losses = []
        eval_losses = []
        eval_acc = []

        best_eval_acc = 0 
        best_model = None 

        for epoch in tqdm(range(100)): 
            model.train()
            total_loss = 0 
            for i, (x, y) in enumerate(train_dataloader): 

                optimizer.zero_grad()

                out = model(x) 
                loss = criterion(out, y)

                loss.backward()
                optimizer.step()

                total_loss += loss.item()

            total_loss = total_loss/(i+1)
            acc, eval_loss = eval_model(eval_dataloader, model)

            train_losses.append(total_loss)
            eval_losses.append(eval_loss)
            eval_acc.append(acc)

            if (acc > best_eval_acc): 
                best_eval_acc = acc 
                best_model = copy.deepcopy(model)

        avg_acc += best_eval_acc
        
        pred, pred_prob = test_model(test_dataloader, best_model)
        all_best_predictions[ (hidd_dim, drate, num_layers, bs, activation, num_val) ] = pred_prob
        all_best_acc[ (hidd_dim, drate, num_layers, bs, activation, num_val) ] =  best_eval_acc

        print('Split: ', num_val , '\t Max Eval Acc: ', np.max(eval_acc))

    avg_acc = avg_acc/n_splits
    print('AVG: ', num_val , '\t Eval Acc: ', avg_acc)
    
    filename = 'simple_nn_predictions_prob.pk'
    filepath = os.path.join('../data/coupons/all_outputs/', filename)
    pickle.dump(all_best_predictions, open(filepath, 'wb+') )


    filename = 'simple_nn_accs_prob.pk'
    filepath = os.path.join('../data/coupons/all_outputs/', filename)
    pickle.dump(all_best_acc, open(filepath, 'wb+') )