In [1]:
#  Task2: Comparison of normalization methods for classification on Image dataset 2
# • Model: MLFFNN with 2 hidden layers and tanh activation function
# • Lossfunction: Cross-entropy
# • Mode of learning: Mini-batch mode
# • Stopping criterion: Change in average error below a threshold
# • Weight update rule: AdaM
# • Normalization method: (1) No normalization, (2) Batch normalization with post-activation normalization 
# • Use the same value of learning rate parameter
# • Use the same initial random values of weights
# • For each normalization method, report should include the following: (a) Plot of
# average error on training data vs Epoch,(b) Confusion matricesfortraining data and
# test data
# • Compare number of epochs taken for convergence for normalization methods

# Dataset:
# it is in ./task2 {folder}
# the {folder} has 6 files
# test_data.csv
# test_label.csv
# train_data.csv
# train_label.csv
# val_data.csv
# val_label.csv

# Train data shape -> (2000, 36)
# Train label shape -> (2000, 1)
# Test data shape -> (500, 36)
# Test label shape -> (500, 1)
# Val data shape -> (500, 36)
# Val label shape -> (500, 1)

# You can use Pytorch as well. No need to implement neural network from scratch.
# You can use any library for plotting the graphs.


In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")


In [3]:
folder = ""

def seed_all(seed=59+87+122+143):
    np.random.seed(seed)
    torch.manual_seed(seed)
    # torch.cuda.manual_seed(seed)
    # torch.cuda.manual_seed_all(seed)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False


In [4]:
class DataLoader:
    def __init__(self, train_data_path, train_label_path, val_data_path, val_label_path, test_data_path, test_label_path):
        self.train_data, self.train_labels = self.load_data(train_data_path, train_label_path)
        self.val_data, self.val_labels = self.load_data(val_data_path, val_label_path)
        self.test_data, self.test_labels = self.load_data(test_data_path, test_label_path)

    def load_data(self, data_path, label_path):
        data = pd.read_csv(data_path, header=None).values.astype(np.float32)
        labels = pd.read_csv(label_path, header=None).values.flatten().astype(np.int64)
        return data, labels

    def normalize_data(self):
        scaler = StandardScaler()
        scaler.fit(self.train_data)
        self.train_data = scaler.transform(self.train_data)
        self.val_data = scaler.transform(self.val_data)
        self.test_data = scaler.transform(self.test_data)


In [5]:
class MLFFNN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, batch_norm):
        super(MLFFNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        self.activation = nn.Tanh()
        self.batch_norm = batch_norm
        if self.batch_norm:
            self.bn1 = nn.BatchNorm1d(hidden_size1)
            self.bn2 = nn.BatchNorm1d(hidden_size2)

    def forward(self, x):
        if self.batch_norm:
            x = self.activation(self.bn1(self.fc1(x)))
            x = self.activation(self.bn2(self.fc2(x)))
        else:
            x = self.activation(self.fc1(x))
            x = self.activation(self.fc2(x))
        return x


In [6]:
class NeuralNetworkClassifier:
    def __init__(self, model, criterion, optimizer, epochs, threshold, batch_size):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.epochs = epochs
        self.threshold = threshold
        self.batch_size = batch_size

    def train(self, train_data, train_labels):
        train_errors = []
        for epoch in tqdm(range(self.epochs), desc="Training", unit="epoch"):
            self.model.train()
            for i in range(0, train_data.shape[0], self.batch_size):
                data = train_data[i:i + self.batch_size]
                labels = train_labels[i:i + self.batch_size]
                outputs = self.model(data)
                loss = self.criterion(outputs, labels)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            train_errors.append(loss.item())
            if epoch > 0 and abs(train_errors[-1] - train_errors[-2]) < self.threshold:
                print(f"Converged at epoch {epoch}")
                break
        return train_errors
    
    def test(self, test_data, test_labels):
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(test_data)
            loss = self.criterion(outputs, test_labels)
            predicted = torch.argmax(outputs, dim=1)
            accuracy = (predicted == test_labels).sum().item() / test_labels.shape[0]
        return loss.item(), accuracy, predicted
    
    def confusion_matrix_train(self, train_data, train_labels):
        self.model.eval()
        outputs = self.model(train_data)
        predictions = torch.argmax(outputs, dim=1)
        cm = confusion_matrix(train_labels, predictions.detach().numpy())
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plt.title(f"{self.optimizer_name} Confusion Matrix (Train)")
        plt.savefig(f"{folder}/{self.optimizer_name}_confusion_matrix_train.png")
        return cm

    def confusion_matrix_test(self, test_data, test_labels):
        self.model.eval()
        outputs = self.model(test_data)
        predictions = torch.argmax(outputs, dim=1)
        cm = confusion_matrix(test_labels, predictions.detach().numpy())
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plt.title(f"{self.optimizer_name} Confusion Matrix (Test)")
        plt.savefig(f"{folder}/{self.optimizer_name}_confusion_matrix_test.png")
        return cm

    def plot_avg_train_error(self, train_errors):
        plt.plot(train_errors)
        plt.xlabel("Epoch")
        plt.ylabel("Average Error")
        plt.title(f"{self.optimizer_name} Average Error on Training Data")
        plt.savefig(f"{folder}/{self.optimizer_name}_average_error.png")
        plt.show()
        


In [7]:
# Paths to data files
train_data_path = "./task2/train_data.csv"
train_label_path = "./task2/train_label.csv"
val_data_path = "./task2/val_data.csv"
val_label_path = "./task2/val_label.csv"
test_data_path = "./task2/test_data.csv"
test_label_path = "./task2/test_label.csv"

# Load data
data_loader = DataLoader(train_data_path, train_label_path, val_data_path, val_label_path, test_data_path, test_label_path)
# data_loader.normalize_data()

# Convert data to tensors
train_data = torch.tensor(data_loader.train_data)
train_labels = torch.tensor(data_loader.train_labels)
val_data = torch.tensor(data_loader.val_data)
val_labels = torch.tensor(data_loader.val_labels)
test_data = torch.tensor(data_loader.test_data)
test_labels = torch.tensor(data_loader.test_labels)


In [12]:
# Define hyperparameters
input_size = data_loader.train_data.shape[1]
hidden_size1 = 60
hidden_size2 = 30
output_size = len(np.unique(data_loader.train_labels))  # Number of unique classes
epochs = 1000
threshold = 1e-5
batch_size = input_size//10
learning_rate = 0.1
momentum = 0.9
# weight_decay = 0.0001
betas = (0.9, 0.999)

folder = f'output/task2/{learning_rate}_{threshold}_{epochs}'
os.makedirs(folder, exist_ok=True)


In [13]:
# No normalization
seed_all()
model = MLFFNN(input_size, hidden_size1, hidden_size2, output_size, batch_norm=False)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
classifier = NeuralNetworkClassifier(model, criterion, optimizer, epochs, threshold, batch_size)
classifier.optimizer_name = "No_Normalization"
train_errors = classifier.train(train_data, train_labels)
loss, accuracy, predicted = classifier.test(test_data, test_labels)
print(f"Loss: {loss}, Accuracy: {accuracy}")
classifier.plot_avg_train_error(train_errors)
cm_train = classifier.confusion_matrix_train(train_data, train_labels)
cm_test = classifier.confusion_matrix_test(test_data, test_labels)



Training:   8%|▊         | 85/1000 [02:37<28:16,  1.85s/epoch]


KeyboardInterrupt: 

In [None]:
# Batch normalization
seed_all()
model = MLFFNN(input_size, hidden_size1, hidden_size2, output_size, batch_norm=True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
classifier = NeuralNetworkClassifier(model, criterion, optimizer, epochs, threshold, batch_size)
classifier.optimizer_name = "Batch_Normalization"
train_errors = classifier.train(train_data, train_labels)
loss, accuracy, predicted = classifier.test(test_data, test_labels)
print(f"Loss: {loss}, Accuracy: {accuracy}")
classifier.plot_avg_train_error(train_errors)
cm_train = classifier.confusion_matrix_train(train_data, train_labels)
cm_test = classifier.confusion_matrix_test(test_data, test_labels)
