## Imports

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import random
import os
import matplotlib.pyplot as plt
from sklearn import metrics

## Loading Datasets

- Replace the path while using the code


In [None]:
from google.colab import drive
drive.mount('/content/drive')


train_data = np.genfromtxt("/content/drive/MyDrive/CS6910/FoDL/task1/train_data.csv", delimiter=',')
train_label = np.genfromtxt("/content/drive/MyDrive/CS6910/FoDL/task1/train_label.csv", delimiter=',')
val_data = np.genfromtxt("/content/drive/MyDrive/CS6910/FoDL/task1/val_data.csv", delimiter=',')
val_label = np.genfromtxt("/content/drive/MyDrive/CS6910/FoDL/task1/val_label.csv", delimiter=',')
test_data = np.genfromtxt("/content/drive/MyDrive/CS6910/FoDL/task1/test_data.csv", delimiter=',')
test_label = np.genfromtxt("/content/drive/MyDrive/CS6910/FoDL/task1/test_label.csv", delimiter=',')


train_dataset = torch.tensor(train_data,dtype = torch.float32)
train_label = torch.nn.functional.one_hot(torch.tensor(train_label,dtype = torch.long),num_classes = -1)
val_dataset = torch.tensor(val_data,dtype = torch.float32)
val_label = torch.nn.functional.one_hot(torch.tensor(val_label,dtype = torch.long),num_classes = -1)
test_dataset = torch.tensor(test_data,dtype = torch.float32)
test_label = torch.nn.functional.one_hot(torch.tensor(test_label,dtype = torch.long),num_classes = -1)

## Model definitions and helper functions

In [None]:
class dataset(torch.utils.data.Dataset):
  def __init__(self,data,label):
    self.data = data
    self.label = label

  def __len__(self):
    return len(self.data)

  def __getitem__(self,idx):
    return self.data[idx],self.label[idx]



class MLFFNN(nn.Module):
  def __init__(self, batch_norm = True):
    super().__init__()
    if batch_norm :
      self.model = nn.Sequential(nn.Linear(36,60), nn.Tanh(), nn.BatchNorm1d(60),
                        nn.Linear(60,30), nn.Tanh(), nn.BatchNorm1d(30),
                        nn.Linear(30,5), nn.Softmax(dim = 1)
    )
    else :
      self.model = nn.Sequential(nn.Linear(36,60), nn.Tanh(),
                        nn.Linear(60,30), nn.Tanh(),
                        nn.Linear(30,5), nn.Softmax(dim = 1)
      )

  def forward(self,x):
    return self.model(x)


def set_seed(seed: int = 42):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)


def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)
    if isinstance(m, nn.BatchNorm1d) :
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        m.bias.data.fill_(0.01)

def get_accuracy(true, pred):
    return torch.sum(torch.argmax(true, axis = 1) == torch.argmax(pred, axis=  1))




def train(train_data, val_data, optim : str, seed: int = 42, batch_norm :bool = False, batch_size = 1, lr = 0.0003, threshold= 1e-5):
    optim = optim.lower()
    assert optim in ["delta rule", "generalized delta rule", "adagrad", "rmsprop", "adam"]

    train_loader = torch.utils.data.DataLoader(train_data,batch_size = batch_size,shuffle = True)
    val_loader = torch.utils.data.DataLoader(val_data,batch_size = len(val_data),shuffle = True)

    learning_rate = lr
    criterion = torch.nn.CrossEntropyLoss()
    threshold = threshold #placeholder value

    # hidden_layer_neurons = [256, 64]

    model = MLFFNN(batch_norm = batch_norm)
    seed = 42
    set_seed(seed)
    model.apply(init_weights)
    if optim == "delta rule":
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    elif optim == "generalized delta rule" :
      optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.9)
    elif optim == "adagrad" :
      optimizer = torch.optim.Adagrad(model.parameters(), lr = learning_rate)
    elif optim == "rmsprop" :
      optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
    elif optim == "adam" :
      optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

    train_acc = []
    train_loss = []
    val_loss = []
    val_acc  = []
    err = np.inf
    epoch = 0
    # num_epochs = 500


    # for epoch in range(num_epochs) :
    while err>threshold :
        total_acc = 0
        total_loss = 0
        for data in train_loader :
            optimizer.zero_grad()
            out = model(data[0].float())
            loss = criterion(out, data[1].float())
            loss.backward()
            optimizer.step()
            total_acc += torch.sum(torch.argmax(out, dim = 1) == torch.argmax(data[1], dim =1))
            total_loss += loss*len(data[0])

        train_acc.append(total_acc.item()/len(train_data)*100)
        train_loss.append(total_loss.item()/len(train_data))
        if epoch > 1:
          err = abs(train_loss[-1] - train_loss[-2])
        with torch.no_grad():
            for data in val_loader :
                out = model(data[0].float())
            ######## val
            acc = torch.sum((torch.argmax(out, dim = 1) == torch.argmax(data[1], dim = 1)))/len(data[1])
            loss = criterion(out, data[1].float())
            val_acc.append(acc.item()*100)
            val_loss.append(loss.item())

        print("Epoch{} Train Acc : {:.4f} Train loss : {:.4f} Val Acc : {:.4f} Val Loss : {:.4f}".format(epoch+1,
                                                                        train_acc[-1], train_loss[-1], acc.item()*100, loss.item()))
        epoch+=1


    plt.figure(figsize= (10,14))
    plt.subplot(2,1,1)
    plt.plot(np.arange(len(train_acc)), train_acc)
    plt.plot(np.arange(len(val_acc)), val_acc)
    plt.legend(["Train", "Validation"])
    plt.title("Epoch vs Accuracy")

    plt.subplot(2,1,2)
    plt.plot(np.arange(len(train_loss)), train_loss)
    plt.plot(np.arange(len(val_loss)), val_loss)
    plt.legend(["Train", "Validation"])
    plt.title("Epoch vs Loss")

    plt.show()

    return model, train_loss, val_loss, train_acc, val_acc



def plot_confusion_matrix(model, data, label):
    with torch.no_grad() :
        out = model(data)

    confusion_matrix = metrics.confusion_matrix(torch.argmax(label, dim=1), torch.argmax(out, dim= 1))

    cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix)

    cm_display.plot()
    plt.show()

plot_confusion_matrix(model, train_dataset, train_label)

## Initializing datasets and models

In [None]:
train_data = dataset(train_data,train_label)
val_data = dataset(val_data,val_label)
test_data = dataset(test_data,test_label)

model = MLFFNN()
seed = 42
set_seed(seed)
model.apply(init_weights)

## Delta Rule

In [None]:
model, train_loss, val_loss, train_acc, val_acc = train(train_data, val_data, batch_norm = False, optim = "Delta rule")
plot_confusion_matrix(model, test_dataset, test_label)

## Generalized Delta Rule

In [None]:
model, train_loss, val_loss, train_acc, val_acc = train(train_data, val_data, batch_norm = False, optim = "Generalized Delta rule")
plot_confusion_matrix(model, train_dataset, train_label)
plot_confusion_matrix(model, test_dataset, test_label)

## AdaGrad

In [None]:
model, train_loss, val_loss, train_acc, val_acc = train(train_data, val_data, batch_norm = False, optim = "AdaGrad", lr = 0.0003)
plot_confusion_matrix(model, train_dataset, train_label)
plot_confusion_matrix(model, test_dataset, test_label)

## RMSProp

In [None]:
model, train_loss, val_loss, train_acc, val_acc = train(train_data, val_data, batch_norm = False, optim = "RMSProp")
plot_confusion_matrix(model, train_dataset, train_label)
plot_confusion_matrix(model, test_dataset, test_label)

## Adam

In [None]:
model, train_loss, val_loss, train_acc, val_acc = train(train_data, val_data, batch_norm = False, optim = "Adam")
plot_confusion_matrix(model, train_dataset, train_label)
plot_confusion_matrix(model, test_dataset, test_label)