## Imports

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import random
import os
import matplotlib.pyplot as plt
from sklearn import metrics

## Loading Datasets

- Replace the path while using the code



In [None]:
from google.colab import drive
drive.mount('/content/drive')

unlabeled = np.genfromtxt("/content/drive/MyDrive/CS6910/Assignment1/task3/training_data_set_17_unlabeled.csv", delimiter=',')
unlabeled = torch.tensor(unlabeled,dtype =torch.float32)
unlabeled_train = dataset(unlabeled,unlabeled)
unlabeled_loader = torch.utils.data.DataLoader(unlabeled_train,batch_size = int(len(unlabeled)*0.1),shuffle = True)



def load_dataset(path1,path2):
    labeled = np.genfromtxt(path1, delimiter=',')
    label=np.genfromtxt(path2, delimiter=',')
    labeled_label = torch.nn.functional.one_hot(torch.tensor(label,dtype = torch.long),num_classes = -1)

    labeled_train = dataset(labeled,labeled_label)
    labeled_loader = torch.utils.data.DataLoader(labeled_train,batch_size = int(len(labeled)*0.1),shuffle = True)
    return labeled_loader, labeled, label




train_labeled_loader, train_labeled, train_label= load_dataset(path1 = "/content/drive/MyDrive/CS6910/Assignment1/task3/training_data_set_17_labeled_data.csv",path2='/content/drive/MyDrive/CS6910/Assignment1/task3/training_data_set_17_labeled_labels.csv')
val_labeled_loader, val_labeled,val_label = load_dataset(path1 = "/content/drive/MyDrive/CS6910/Assignment1/task3/validation_data_set_17_data.csv",path2='/content/drive/MyDrive/CS6910/Assignment1/task3/validation_data_set_17_labels.csv')
test_labeled_loader, test_labeled,test_label = load_dataset(path1 = "/content/drive/MyDrive/CS6910/Assignment1/task3/testing_data_set_17_data.csv",path2='/content/drive/MyDrive/CS6910/Assignment1/task3/testing_data_set_17_labels.csv')




X_train_labeled = '/content/drive/MyDrive/CS6910/Assignment1/task3/training_data_set_17_labeled_data.csv'
y_train_labeled = '/content/drive/MyDrive/CS6910/Assignment1/task3/training_data_set_17_labeled_labels.csv'

X_train_labeled_df = pd.read_csv(X_train_labeled, header=None)
y_train_labeled_df = pd.read_csv(y_train_labeled, header=None)


X_train_labeled_tensor = torch.tensor(X_train_labeled_df.values, dtype=torch.float32)
y_train_labeled_tensor = torch.tensor(y_train_labeled_df.values, dtype=torch.float32)
y_train_labeled_tensor = torch.nn.functional.one_hot(y_train_labeled_tensor.squeeze().long(), num_classes=5).float()

dataset_labeled = dataset(X_train_labeled_tensor, y_train_labeled_tensor)
train_labeled= np.genfromtxt('/content/drive/MyDrive/CS6910/Assignment1/task3/training_data_set_17_labeled_data.csv', delimiter=',')
train_labeled_loader = torch.utils.data.DataLoader(dataset_labeled, batch_size=len(dataset_labeled) // 10, shuffle=True)




X_val_labeled = '/content/drive/MyDrive/CS6910/Assignment1/task3/validation_data_set_17_data.csv'
y_val_labeled = '/content/drive/MyDrive/CS6910/Assignment1/task3/validation_data_set_17_labels.csv'

X_val_labeled_df = pd.read_csv(X_val_labeled, header=None)
y_val_labeled_df = pd.read_csv(y_val_labeled, header=None)


X_val_labeled_tensor = torch.tensor(X_val_labeled_df.values, dtype=torch.float32)
y_val_labeled_tensor = torch.tensor(y_val_labeled_df.values, dtype=torch.float32)
y_val_labeled_tensor = torch.nn.functional.one_hot(y_val_labeled_tensor.squeeze().long(), num_classes=5).float()

val_labeled=np.genfromtxt('/content/drive/MyDrive/CS6910/Assignment1/task3/validation_data_set_17_data.csv', delimiter=',')
dataset_val = dataset(X_val_labeled_tensor, y_val_labeled_tensor)

val_labeled_loader = torch.utils.data.DataLoader(dataset_val, shuffle=True)





X_test_labeled = '/content/drive/MyDrive/CS6910/Assignment1/task3/testing_data_set_17_data.csv'
y_test_labeled = '/content/drive/MyDrive/CS6910/Assignment1/task3/testing_data_set_17_labels.csv'

X_test_labeled_df = pd.read_csv(X_test_labeled, header=None)
y_test_labeled_df = pd.read_csv(y_test_labeled, header=None)


X_test_labeled_tensor = torch.tensor(X_test_labeled_df.values, dtype=torch.float32)
y_test_labeled_tensor = torch.tensor(y_test_labeled_df.values, dtype=torch.float32)
y_test_labeled_tensor = torch.nn.functional.one_hot(y_test_labeled_tensor.squeeze().long(), num_classes=5).float()

test_labeled=np.genfromtxt('/content/drive/MyDrive/CS6910/Assignment1/task3/testing_data_set_17_data.csv', delimiter=',')

dataset_test = dataset(X_test_labeled_tensor, y_test_labeled_tensor)

test_labeled_loader = torch.utils.data.DataLoader(dataset_test, batch_size=len(dataset_test) // 10, shuffle=True)


## Autoencoder Definition

In [None]:
class AANN(nn.Module):
  def __init__(self,inputnodes,hlnodes,compressednodes):#nodelist : list of number of nodes till bottleneck layer (including bottleneck layer) e.g [30,20,10] in a 5 layer AANN
    super().__init__()
    self.encoder = nn.Sequential(nn.Linear(inputnodes,hlnodes),
                            nn.Tanh(),
                            nn.Linear(hlnodes,compressednodes))

    self.decoder = nn.Sequential(nn.Linear(compressednodes,hlnodes),
                            nn.Tanh(),
                            nn.Linear(hlnodes,inputnodes))

  def forward(self,x):
    compressed = self.encoder(x)
    out = self.decoder(compressed)
    return out, compressed


class StackedAutoEncoders(nn.Module):
  def __init__(self,aann1,aann2,aann3,compressed_size, outnodes):
    super().__init__()
    self.aann1 = aann1
    self.aann2 = aann2
    self.aann3 = aann3
    self.outlayer = nn.Linear(compressed_size, outnodes)

  def forward(self,x):
      _, x = self.aann1(x)
      _, x = self.aann2(x)
      _, x = self.aann3(x)

      return nn.functional.softmax(self.outlayer(x),dim=1)

## AutoEncoder training Helper functions

In [None]:
def train_AE(Aann1, Aann2, Aann3, unlabeled_loader, unlabeled, threshold = 1e-5,learning_rate = 0.0003):

    criterion = nn.MSELoss()
    optimizer1 = torch.optim.Adam(Aann1.parameters(), lr = learning_rate)
    optimizer2 = torch.optim.Adam(Aann2.parameters(), lr = learning_rate)
    optimizer3 = torch.optim.Adam(Aann3.parameters(), lr = learning_rate)

    err = np.inf
    loss_list  = []
    print("-"*30+"\n Traning 1st AANN\n"+"-"*30)
    while err > threshold:
          loss=0
          for input1, output1 in unlabeled_loader:
            optimizer1.zero_grad()
            out1, _ = Aann1(input1)
            loss1 = criterion(out1, output1.float())
            loss += loss1*len(input1)
            loss1.backward()
            optimizer1.step()

          loss/=len(unlabeled)
          if len(loss_list)>0:
            err = abs(loss - loss_list[-1])
          loss_list.append(loss)
          if not(len(loss_list)%50) or err < threshold:
            print("Epoch{} Train loss : {:.4f} ".format(len(loss_list), loss.item()))


    err = np.inf
    loss_list  = []
    print("-"*30+"\n Traning 2nd AANN\n"+"-"*30)
    while err > threshold:
          loss = 0
          for input1, output1 in unlabeled_loader:
            optimizer2.zero_grad()
            with torch.no_grad():
              _, comp1 = Aann1(input1)

            out2, _ = Aann2(comp1)

            loss2 = criterion(out2, comp1)
            loss += loss2*len(input1)
            loss2.backward()
            optimizer2.step()

          loss/=len(unlabeled)
          if len(loss_list)>0:
            err = abs(loss - loss_list[-1])
          loss_list.append(loss)
          if not(len(loss_list)%50) or err < threshold:
              print("Epoch{} Train loss : {:.4f} ".format(len(loss_list), loss.item()))


    err = np.inf
    loss_list  = []
    print("-"*30+"\n Traning 3rd AANN\n"+"-"*30)
    while err > threshold:
          loss = 0
          for input1, output1 in unlabeled_loader:
            optimizer3.zero_grad()
            with torch.no_grad():
              _, comp1 = Aann1(input1)
              _, comp2 = Aann2(comp1)

            out3, _ = Aann3(comp2)

            loss3 = criterion(out3, comp2)
            loss += loss3*len(input1)
            loss3.backward()
            optimizer3.step()

          loss/=len(unlabeled)
          if len(loss_list)>0:
            err = abs(loss - loss_list[-1])
          loss_list.append(loss)
          if not(len(loss_list)%50) or err < threshold:
            print("Epoch{} Train loss : {:.4f} ".format(len(loss_list), loss.item()))

    return Aann1, Aann2, Aann3

def train_stacked_encoders(StackedAE, train_labeled_loader, train_labeled, val_labeled_loader, val_labeled, learning_rate = 0.0003,threshold = 1e-6):
      criterion = nn.CrossEntropyLoss()
      optimizer_stacked = torch.optim.Adam(StackedAE.parameters(), lr = learning_rate)
      err = np.inf
      train_loss = []
      train_acc = []
      val_acc = []
      val_loss = []
      print("-"*50+"\nFine Tuning Stacked Auto Encoder\n"+"-"*50)
      while err > threshold:
          total_loss = 0
          total_acc = 0
          for img, label in train_labeled_loader:
              optimizer_stacked.zero_grad()
              pred = StackedAE(img.float())
              loss  = criterion(pred, torch.argmax(label.float(), dim =1))
              total_loss += loss*len(img)
              total_acc += torch.sum(torch.argmax(pred, dim = 1) == torch.argmax(label.float(), dim =1))
              loss.backward()
              optimizer_stacked.step()

          train_acc.append(total_acc.item()/len(train_labeled)*100)
          train_loss.append(total_loss.item()/len(train_labeled))
          if len(train_loss) > 1:
            err = abs(train_loss[-1] - train_loss[-2])
          with torch.no_grad():
              for img, label in val_labeled_loader :
                  out = StackedAE(img.float())
              acc = torch.sum((torch.argmax(out, dim = 1) == torch.argmax(label.float(), dim = 1)))/len(label)
              loss = criterion(out, torch.argmax(label.float(), dim =1))
              val_acc.append(acc.item()*100)
              val_loss.append(loss.item())

          if not(len(train_loss)%50) or err < threshold:
            print("Epoch{} Train Acc : {:.4f} Train loss : {:.4f} Val Acc : {:.4f} Val Loss : {:.4f}".format(len(train_loss),
                                                                              train_acc[-1], train_loss[-1], acc.item()*100, loss.item()))

      return StackedAE

def update_dataset(StackedAE, unlabeled, train_labeled,train_label, prob_threshold = 0.5):
    no_unlabeled_dataset = False
    with torch.no_grad():
        pred = StackedAE(unlabeled)
    max_values, unlabeled_classes  = torch.max(pred, axis = 1)
    indices = torch.where(torch.sum(max_values.unsqueeze(dim=1)-pred, axis = 1)/4>prob_threshold)[0]


    updated_train_labeled = np.vstack([train_labeled,unlabeled[indices]])
    updated_train_label=np.vstack([train_label.reshape(-1,1),unlabeled_classes[indices].unsqueeze(dim=1)])
    unlabeled_indices = np.array(list(set(range(len(unlabeled)))-set(indices.numpy())))
    if len(unlabeled_indices) == 0:
        no_unlabeled_dataset = True
    updated_unlabeled = unlabeled[unlabeled_indices]
    print("-"*60+f"\nSucessfully labeled {len(indices)} and added to training data\n"+"-"*60)
    return no_unlabeled_dataset, updated_unlabeled, updated_train_labeled , updated_train_label.reshape(-1)


def train_stacked(unlabeled_loader, unlabeled, train_labeled_loader, train_labeled,train_label, val_labeled_loader, val_labeled,val_label):
    compressed_size = 6
    outnodes = len(np.unique(train_label))

    Aann1 = AANN(36,30,26)
    Aann2 = AANN(26,20,16)
    Aann3 = AANN(16,10,6)


    set_seed()
    init_weights(Aann1)
    init_weights(Aann2)
    init_weights(Aann3)

    StackedAE = StackedAutoEncoders(Aann1,Aann2,Aann3, compressed_size, outnodes)


    Aann1, Aann2, Aann3 = train_AE(Aann1, Aann2, Aann3, unlabeled_loader, unlabeled)
    StackedAE = train_stacked_encoders(StackedAE, train_labeled_loader, train_labeled, val_labeled_loader, val_labeled)

    return StackedAE

def plot_confusion_matrix(model, data, label):
    with torch.no_grad() :
        out = model(data)

    confusion_matrix = metrics.confusion_matrix(torch.tensor(label), torch.argmax(out, dim= 1))

    cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix)

    cm_display.plot()
    plt.show()

## Deep FeedForward Network definition and train function


In [None]:
class DFNN(nn.Module):
    def __init__(self, input_size, out_nodes):
      super().__init__()
      self.layers = nn.Sequential(
          nn.Linear(input_size, 26), nn.Tanh(),
          nn.Linear(26, 16), nn.Tanh(),
          nn.Linear(16, 6), nn.Tanh(),
          nn.Linear(6, out_nodes), nn.Softmax(dim=1)
      )

    def forward(self, x):
      return self.layers(x)


def train_dfnn(dfnn, learning_rate = 0.0003,  threshold = 1e-6):
    #init_weights(dfnn)     #Comment this line out when training a pre-trained model
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(dfnn.parameters(), lr = learning_rate)

    err = np.inf
    train_loss = []
    train_acc = []
    val_acc = []
    val_loss = []
    while err > threshold:
        total_loss = 0
        total_acc = 0
        for img, label in train_labeled_loader:
            optimizer.zero_grad()
            pred = dfnn(img.float())
            loss  = criterion(pred, label.float())
            total_loss += loss*len(img)
            total_acc += torch.sum(torch.argmax(pred, dim = 1) == torch.argmax(label.float(), dim =1))
            loss.backward()
            optimizer.step()
        train_acc.append(total_acc.item()/len(train_labeled)*100)
        train_loss.append(total_loss.item()/len(train_labeled))
        if len(train_loss) > 1:
          err = abs(train_loss[-1] - train_loss[-2])
        with torch.no_grad():
            for img, label in val_labeled_loader :
                out = dfnn(img.float())
            acc = torch.sum((torch.argmax(out, dim = 1) == torch.argmax(label.float(), dim = 1)))/len(label)
            loss = criterion(out, label.float())
            val_acc.append(acc.item()*100)
            val_loss.append(loss.item())


        print("Epoch{} Train Acc : {:.4f} Train loss : {:.4f} Val Acc : {:.4f} Val Loss : {:.4f}".format(len(train_loss),
                                                                            train_acc[-1], train_loss[-1], acc.item()*100, loss.item()))


    return dfnn







## Training AANN

In [None]:
StackedAE = train_stacked(unlabeled_loader, unlabeled, train_labeled_loader, train_labeled,train_label, val_labeled_loader, val_labeled,val_label)
plot_confusion_matrix(StackedAE, torch.tensor(train_labeled, dtype = torch.float32),torch.nn.functional.one_hot(torch.tensor(train_label,dtype=torch.long),num_classes=-1) )
plot_confusion_matrix(StackedAE, torch.tensor(test_labeled, dtype = torch.float32),torch.nn.functional.one_hot(torch.tensor(test_label,dtype=torch.long),num_classes=-1))

## Training DFNN without Pretraining

In [None]:
outnodes = len(np.unique(train_label))
dfnn = DFNN(unlabeled.shape[1], outnodes)#after training AEs
dfnn = train_dfnn(dfnn)
plot_confusion_matrix(dfnn, torch.tensor(test_labeled, dtype = torch.float32), torch.nn.functional.one_hot(torch.tensor(test_label,dtype=torch.long),num_classes=-1))

## Training DFNN with Pretraining

In [None]:
class p_DFNN(nn.Module):
    def __init__(self, stacked_autoencoder,outnodes):
        super().__init__()
        # Load pretrained encoder layers
        self.encoder1 = stacked_autoencoder.aann1
        self.encoder2 = stacked_autoencoder.aann2
        self.encoder3 = stacked_autoencoder.aann3

        # Fully connected classifier head
        self.fc = nn.Linear(6, outnodes)  # Output layer

    def forward(self, x):
        _,x = self.encoder1(x)  # Output of AANN1
        _,x = self.encoder2(x)  # Output of AANN2
        _,x = self.encoder3(x)  # Output of AANN3
        return nn.functional.softmax(self.fc(x), dim=1)



outnodes = len(np.unique(train_label))
p_dfnn = p_DFNN(StackedAE,outnodes)#after training AEs
p_dfnn = train_dfnn(p_dfnn)


plot_confusion_matrix(p_dfnn, torch.tensor(train_labeled, dtype = torch.float32), torch.nn.functional.one_hot(torch.tensor(train_label,dtype=torch.long),num_classes=-1))
plot_confusion_matrix(p_dfnn, torch.tensor(test_labeled, dtype = torch.float32), torch.nn.functional.one_hot(torch.tensor(test_label,dtype=torch.long),num_classes=-1))