In [82]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tabulate import tabulate
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import wandb
import pickle

In [83]:
class DataExploration:
    def __init__(self,df):
        self.df = df

    def describe_dataset(self, print_all):
        if print_all:
            print(self.df.describe())
        # print(self.df.head())
        # print(self.df.info())

    def correlation_list(self, printer):
        correlationdict = {}        
        for feature in self.df.columns:
            correlation = self.df[feature].corr(self.df['quality'])
            correlationdict[feature] = correlation

        average_correlation = {}

        for feature, correlation_value in correlationdict.items():
            average_correlation[feature] = abs(correlation_value)

        sorted_table = sorted(average_correlation.items(), key=lambda item: item[1], reverse=True)

        headers = ["Feature", "Absolute Correlation"]
        if printer:
            print(tabulate(sorted_table, headers, tablefmt="grid"))

In [84]:
class EDA_Plots(DataExploration):        
    def histogram_of_features(self, plotter):
        if plotter:
            self.df.hist(figsize=(15, 10), bins=1140, edgecolor = "blue")
            plt.suptitle('How the Numerical Features are Distributed')
            plt.show()

    def correlation_matrix(self, plotter):
        if plotter:
            correlation_matrix = self.df.corr()
            plt.figure(figsize=(12, 8))
            sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
            plt.title('Correlation Matrix')
            plt.show()

    def pairplot_top_features(self, plotter):
        if plotter:
            selected_features = ['alcohol', 'volatile acidity', 'sulphates', 'citric acid']
            sns.pairplot(self.df, vars=selected_features, hue='quality', palette='tab20', plot_kws={'alpha': 0.5})
            plt.suptitle('Pairplot of Selected Features Colored by Genre', y=1.02)
            plt.show()    
    
    def pairplot_all_features(self, plotter):
        if plotter:
            sns.pairplot(self.df, hue='quality', palette='tab20', plot_kws={'alpha': 0.5})
            plt.suptitle('Pairplot of Selected Features Colored by Genre', y=1.02)
            plt.show()

In [85]:
class DataSplit(EDA_Plots):

    def normalize_data(self):
        features = self.df.drop(columns=['quality', 'Id'])
        labels = self.df['quality']
    
        ss = StandardScaler()
        features_standardized = ss.fit_transform(features)
        
        normz = MinMaxScaler()
        features_normalized = normz.fit_transform(features_standardized)

        df_normalized = pd.DataFrame(features_normalized, columns=features.columns)

        df_normalized['quality'] = labels.values
        df_normalized['Id'] = self.df['Id']

        path = 'C:/Users/raaga/OneDrive/Desktop/IIIT-H/3-1/SMAI/smai-m24-assignments-rraagav/data/interim/3/WineQT_Normalized.csv'
        df_normalized.to_csv(path, index=False)  

        return df_normalized

    def split_data(self, data):
        features = data.drop(columns=['quality', 'Id'])
        # features = data[['alcohol', 'volatile acidity', 'sulphates', 'citric acid']]      
        labels = pd.Categorical(data['quality']).codes

        # print("Original labels:")
        # print(data['quality'].unique())
        
        # print("\nEncoded labels:")
        # print(np.unique(labels))
        
        # # Display the distribution of labels
        # print('\nDistribution of the original labels:')
        # print(data['quality'].value_counts())
        
        np.random.seed(42)
        indices = np.arange(len(features))
        np.random.shuffle(indices)

        train_size = 0.8
        train_index = int(len(features) * train_size)

        val_size = 0.1
        val_index = int(len(features) * val_size)
        
        train_indices = indices[:train_index]
        val_indices = indices[train_index:train_index+val_index]
        test_indices = indices[train_index+val_index:]
    
        X_train = features.iloc[train_indices].values
        y_train = labels[train_indices]

        X_eval = features.iloc[val_indices].values
        y_eval = labels[val_indices]

        X_test = features.iloc[test_indices].  values
        y_test = labels[test_indices]

        return X_train, y_train, X_eval, y_eval, X_test, y_test

In [86]:
df = pd.read_csv(r'C:\Users\raaga\OneDrive\Desktop\IIIT-H\3-1\SMAI\smai-m24-assignments-rraagav\data\interim\3\WineQT.csv')

In [87]:
DataExplorer = DataExploration(df)
DataExplorer.describe_dataset(print_all=False)
DataExplorer.correlation_list(printer=True)

+----------------------+------------------------+
| Feature              |   Absolute Correlation |
| quality              |              1         |
+----------------------+------------------------+
| alcohol              |              0.484866  |
+----------------------+------------------------+
| volatile acidity     |              0.407394  |
+----------------------+------------------------+
| sulphates            |              0.25771   |
+----------------------+------------------------+
| citric acid          |              0.240821  |
+----------------------+------------------------+
| total sulfur dioxide |              0.183339  |
+----------------------+------------------------+
| density              |              0.175208  |
+----------------------+------------------------+
| chlorides            |              0.124085  |
+----------------------+------------------------+
| fixed acidity        |              0.12197   |
+----------------------+------------------------+


In [88]:
Explorer_plots = EDA_Plots(df)
Explorer_plots.histogram_of_features(plotter=False)
Explorer_plots.correlation_matrix(plotter=False)
Explorer_plots.pairplot_top_features(plotter=False)
Explorer_plots.pairplot_all_features(plotter=False)

In [None]:
splitter = DataSplit(df)
df_normalized = splitter.normalize_data()

X_train, y_train, X_eval, y_eval, X_test, y_test = splitter.split_data(df_normalized)

In [None]:
class MLP_Classifier:
    def __init__(self, input_size, output_size):
        self.alpha = None
        self.activation_function = None
        self.activation_derivative = None
        self.optimizers = None
        self.hidden_layers = None
        self.neurons_per_layer = None
        self.batch_size = None
        self.epochs = None
        self.weights = []
        self.biases = []
        self.losses = []
        self.accuracies = []
        self.input_size = input_size
        self.output_size = output_size
        
    def set_params(self, alpha, activation_function, optimizer, hidden_layers, neurons_per_layer, batch_size, epochs):
        self.alpha = alpha
        self.set_activation_function(activation_function)
        self.optimizers = optimizer
        self.hidden_layers = hidden_layers
        self.neurons_per_layer = neurons_per_layer
        self.epochs = epochs
        self.batch_size = batch_size
        
    def initialize_weights(self):
        layer_sizes = [self.input_size] + self.neurons_per_layer + [self.output_size]
        self.weights = []
        self.biases = []
        for i in range(len(layer_sizes)-1):
            w = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * 0.01 # Xavier initialization, or He 
            b = np.zeros((1, layer_sizes[i+1]))
            self.weights.append(w)
            self.biases.append(b)
        
    def forward(self, X):
        activations = [X]
        Zs = []
        for i in range(len(self.weights)):
            w = self.weights[i]
            b = self.biases[i]
            Z = np.dot(activations[-1], w) + b
            Zs.append(Z)
            if i == len(self.weights) - 1: 
                # Output/last layer in nn. Do softmax. 
                A = self.softmax(Z)
            else:
                # hidden layer present, use the activation function 
                A = self.activation_function(Z)
            activations.append(A)
        return activations, Zs
        
    def backward(self, activations, Zs, y):
        gradients_w = []
        gradients_b = []
        m = y.shape[0]
        
        # One-hot encode y
        y_one_hot = np.zeros_like(activations[-1])
        y_one_hot[np.arange(m), y] = 1
        
        # Output layer error
        delta = activations[-1] - y_one_hot 
        
        for i in reversed(range(len(self.weights))):
            dW = np.dot(activations[i].T, delta) / m  
            dB = np.sum(delta, axis=0, keepdims=True) / m
            gradients_w.insert(0, dW)
            gradients_b.insert(0, dB)
            if i > 0:
                delta = np.dot(delta, self.weights[i].T)
                delta *= self.activation_derivative(Zs[i-1])
        return gradients_w, gradients_b

    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    def sigmoid_derivative(self, Z):
        sig = self.sigmoid(Z)
        return sig * (1 - sig)
    
    def tanh(self, Z):
        return np.tanh(Z)
    
    def tanh_derivative(self, Z):
        return 1 - np.tanh(Z) ** 2
    
    def relu(self, Z):
        return np.maximum(0, Z)
    
    def relu_derivative(self, Z):
        return np.where(Z > 0, 1, 0)
    
    def linear(self, Z):
        return Z
    
    def linear_derivative(self, Z):
        return np.ones_like(Z)
    
    def softmax(self, Z):
        exp_scores = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
    def set_activation_function(self, func_name):
        if func_name == "sigmoid":
            self.activation_function = self.sigmoid
            self.activation_derivative = self.sigmoid_derivative
        elif func_name == "tanh":
            self.activation_function = self.tanh
            self.activation_derivative = self.tanh_derivative
        elif func_name == "relu":
            self.activation_function = self.relu
            self.activation_derivative = self.relu_derivative
        elif func_name == "linear":
            self.activation_function = self.linear
            self.activation_derivative = self.linear_derivative
    
    def compute_loss(self, y_pred, y_true):
        m = y_true.shape[0]
        loss = -np.sum(np.log(y_pred[np.arange(m), y_true] + 1e-8)) / m
        return loss

    def compute_accuracy(self, y_pred, y_true):
        y_pred_classes = np.argmax(y_pred, axis=1)
        accuracy = np.mean(y_pred_classes == y_true)
        return accuracy
        
    def compute_classification_metrics(self, y_true, y_pred):
        precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
        recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
        return precision, recall, f1

    def update_parameters(self, gradients_w, gradients_b):
        for i in range(len(self.weights)):
            self.weights[i] -= self.alpha * gradients_w[i]
            self.biases[i] -= self.alpha * gradients_b[i]
    
    def bgd(self, X_train, y_train):
        activations, Zs = self.forward(X_train)
        gradients_w, gradients_b = self.backward(activations, Zs, y_train)
        self.update_parameters(gradients_w, gradients_b)
        loss = self.compute_loss(activations[-1], y_train)
        accuracy = self.compute_accuracy(activations[-1], y_train)
        return loss, accuracy
    
    def mini_bgd(self, X_train, y_train):
        m = X_train.shape[0]
        perm = np.random.permutation(m)
        X_shuffled = X_train[perm]
        y_shuffled = y_train[perm]
        num_batches = int(np.ceil(m / self.batch_size))
        for i in range(num_batches):
            start = i * self.batch_size
            end = min(start + self.batch_size, m)
            X_batch = X_shuffled[start:end]
            y_batch = y_shuffled[start:end]
            activations, Zs = self.forward(X_batch)
            gradients_w, gradients_b = self.backward(activations, Zs, y_batch)
            self.update_parameters(gradients_w, gradients_b)
        activations, _ = self.forward(X_train)
        loss = self.compute_loss(activations[-1], y_train)
        accuracy = self.compute_accuracy(activations[-1], y_train)
        return loss, accuracy
    
    def sgd(self, X_train, y_train):
        m = X_train.shape[0]
        perm = np.random.permutation(m)
        X_shuffled = X_train[perm]
        y_shuffled = y_train[perm]
        for i in range(m):
            X_sample = X_shuffled[i:i+1]
            y_sample = y_shuffled[i:i+1]
            activations, Zs = self.forward(X_sample)
            gradients_w, gradients_b = self.backward(activations, Zs, y_sample)
            
            self.update_parameters(gradients_w, gradients_b)
        activations, _ = self.forward(X_train)
        loss = self.compute_loss(activations[-1], y_train)
        accuracy = self.compute_accuracy(activations[-1], y_train)
        return loss, accuracy
    
    def fit(self, X_train, y_train, early_stopping, patience=5):
        m = X_train.shape[0]
        best_loss = float('inf')
        patience_counter = 0
        self.initialize_weights()
        
        for epoch in range(self.epochs):
            if self.optimizers == 'bgd':
                epoch_loss, epoch_accuracy = self.bgd(X_train, y_train)
            elif self.optimizers == 'mini_bgd':
                epoch_loss, epoch_accuracy = self.mini_bgd(X_train, y_train)
            elif self.optimizers == 'sgd':
                epoch_loss, epoch_accuracy = self.sgd(X_train, y_train)
            else:
                raise ValueError("Invalid optimizer name")
            
            self.losses.append(epoch_loss)
            self.accuracies.append(epoch_accuracy)

            # Early stopping part
            if early_stopping:
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    patience_counter = 0
                else:
                    patience_counter += 1
                if patience_counter > patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
            print(f"Epoch {epoch+1}/{self.epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

    def predict(self, X):
        activations, Zs = self.forward(X)
        y_pred = np.argmax(activations[-1], axis=1)
        return y_pred
    
    def gradient_check(self, X, y, epsilon=1e-7):
        activations, Zs = self.forward(X)
        gradients_w, gradients_b = self.backward(activations, Zs, y)
        
        params = []
        grads = []
        for w, b in zip(self.weights, self.biases):
            params.extend([w, b])
        for gw, gb in zip(gradients_w, gradients_b):
            grads.extend([gw, gb])
        
        num_grads = []
        for param in params:
            num_grad = np.zeros_like(param)
            iterator = np.nditer(param, flags=['multi_index'], op_flags=['readwrite'])
            while not iterator.finished:
                idx = iterator.multi_index
                original_value = param[idx]
                
                param[idx] = original_value + epsilon
                activations_plus, _ = self.forward(X)
                loss_plus = self.compute_loss(activations_plus[-1], y)
                
                param[idx] = original_value - epsilon
                activations_minus, _ = self.forward(X)
                loss_minus = self.compute_loss(activations_minus[-1], y)
                
                param[idx] = original_value
                
                num_grad[idx] = (loss_plus - loss_minus) / (2 * epsilon)
                iterator.iternext()
            num_grads.append(num_grad)
        
        for i in range(len(grads)):
            grad = grads[i]
            num_grad = num_grads[i]
            numerator = np.linalg.norm(grad - num_grad)
            denominator = np.linalg.norm(grad) + np.linalg.norm(num_grad) + 1e-8
            relative_error = numerator / denominator
            print(f"Parameter {i+1} - Relative Error: {relative_error:.10e}")
            if relative_error > 1e-5:
                print("Gradients dont match!!!vfgsdgbhjxndwgtbeiurowefnipwmod.")
                return
        print("It worksssssssssssssssss")    

In [91]:
output_txt_file = open("C:/Users/raaga/OneDrive/Desktop/IIIT-H/3-1/SMAI/smai-m24-assignments-rraagav/assignments/3/figures/best_accuracy.txt", "w")

input_size = X_train.shape[1]
output_size = len(np.unique(y_train))

mlp_activ = MLP_Classifier(input_size, output_size)

mlp_activ.set_params(alpha=0.01, activation_function='tanh', optimizer='sgd', hidden_layers= 3, neurons_per_layer=[128, 64, 32], batch_size=32, epochs=50)
mlp_activ.fit(X_train, y_train, early_stopping=True, patience=5)

test_activations, _ = mlp_activ.forward(X_test)
test_loss = mlp_activ.compute_loss(test_activations[-1], y_test)
test_accuracy = mlp_activ.compute_accuracy(test_activations[-1], y_test)
test_precision, test_recall, test_f1 = mlp_activ.compute_classification_metrics(y_test, mlp_activ.predict(X_test))
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}, Test F1: {test_f1:.4f}")

train_activations, _ = mlp_activ.forward(X_train)
train_loss = mlp_activ.compute_loss(train_activations[-1], y_train)
train_accuracy = mlp_activ.compute_accuracy(train_activations[-1], y_train)
train_precision, train_recall, train_f1 = mlp_activ.compute_classification_metrics(y_train, mlp_activ.predict(X_train))
print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
print(f"Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}")

eval_activations, _ = mlp_activ.forward(X_eval)
eval_loss = mlp_activ.compute_loss(eval_activations[-1], y_eval)
eval_accuracy = mlp_activ.compute_accuracy(eval_activations[-1], y_eval)
eval_precision, eval_recall, eval_f1 = mlp_activ.compute_classification_metrics(y_eval, mlp_activ.predict(X_eval))
print(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}")
print(f"Eval Precision: {eval_precision:.4f}, Eval Recall: {eval_recall:.4f}, Eval F1: {eval_f1:.4f}")

output_txt_file.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}\n")
output_txt_file.write(f"Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}, Test F1: {test_f1:.4f}\n\n")

output_txt_file.write(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}\n")
output_txt_file.write(f"Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}\n\n")

output_txt_file.write(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}\n")
output_txt_file.write(f"Eval Precision: {eval_precision:.4f}, Eval Recall: {eval_recall:.4f}, Eval F1: {eval_f1:.4f}")

Epoch 1/50, Loss: 1.2297, Accuracy: 0.3950
Epoch 2/50, Loss: 1.1871, Accuracy: 0.4256
Epoch 3/50, Loss: 1.1812, Accuracy: 0.4311
Epoch 4/50, Loss: 1.1793, Accuracy: 0.4792
Epoch 5/50, Loss: 1.2052, Accuracy: 0.4322
Epoch 6/50, Loss: 1.1607, Accuracy: 0.4956
Epoch 7/50, Loss: 1.1447, Accuracy: 0.5011
Epoch 8/50, Loss: 1.1430, Accuracy: 0.5066
Epoch 9/50, Loss: 1.1439, Accuracy: 0.5077
Epoch 10/50, Loss: 1.1513, Accuracy: 0.4639
Epoch 11/50, Loss: 1.1366, Accuracy: 0.5000
Epoch 12/50, Loss: 1.1379, Accuracy: 0.5000
Epoch 13/50, Loss: 1.1348, Accuracy: 0.5120
Epoch 14/50, Loss: 1.1436, Accuracy: 0.5011
Epoch 15/50, Loss: 1.1370, Accuracy: 0.5098
Epoch 16/50, Loss: 1.1460, Accuracy: 0.4956
Epoch 17/50, Loss: 1.1407, Accuracy: 0.5131
Epoch 18/50, Loss: 1.1472, Accuracy: 0.4989
Early stopping at epoch 19
Test Loss: 1.0831, Test Accuracy: 0.5391
Test Precision: 0.4606, Test Recall: 0.5391, Test F1: 0.4964
Train Loss: 1.1399, Train Accuracy: 0.5033
Train Precision: 0.4259, Train Recall: 0.5033

60

In [92]:
# # Effect of non linearity
# activ_list = ['sigmoid', 'tanh', 'relu', 'linear']
# output_txt_file = open("C:/Users/raaga/OneDrive/Desktop/IIIT-H/3-1/SMAI/smai-m24-assignments-rraagav/assignments/3/figures/activation_functions.txt", "w")

# input_size = X_train.shape[1]
# output_size = len(np.unique(y_train))

# mlp_activ = MLP_Classifier(input_size, output_size)

# for activ in activ_list:
#     mlp_activ.set_params(alpha=0.01, activation_function=activ, optimizer='sgd', hidden_layers= 3, neurons_per_layer=[128, 64, 32], batch_size=32, epochs=50)
#     mlp_activ.fit(X_train, y_train, early_stopping=True, patience=5)

#     test_activations, _ = mlp_activ.forward(X_test)
#     test_loss = mlp_activ.compute_loss(test_activations[-1], y_test)
#     test_accuracy = mlp_activ.compute_accuracy(test_activations[-1], y_test)
#     test_precision, test_recall, test_f1 = mlp_activ.compute_classification_metrics(y_test, mlp_activ.predict(X_test))
#     print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
#     print(f"Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}, Test F1: {test_f1:.4f}")

#     train_activations, _ = mlp_activ.forward(X_train)
#     train_loss = mlp_activ.compute_loss(train_activations[-1], y_train)
#     train_accuracy = mlp_activ.compute_accuracy(train_activations[-1], y_train)
#     train_precision, train_recall, train_f1 = mlp_activ.compute_classification_metrics(y_train, mlp_activ.predict(X_train))
#     print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
#     print(f"Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}")

#     eval_activations, _ = mlp_activ.forward(X_eval)
#     eval_loss = mlp_activ.compute_loss(eval_activations[-1], y_eval)
#     eval_accuracy = mlp_activ.compute_accuracy(eval_activations[-1], y_eval)
#     eval_precision, eval_recall, eval_f1 = mlp_activ.compute_classification_metrics(y_eval, mlp_activ.predict(X_eval))
#     print(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}")
#     print(f"Eval Precision: {eval_precision:.4f}, Eval Recall: {eval_recall:.4f}, Eval F1: {eval_f1:.4f}")

#     output_txt_file.write(f"Activation Function: {activ}\n")
#     output_txt_file.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}\n")
#     output_txt_file.write(f"Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}, Test F1: {test_f1:.4f}\n\n")

#     output_txt_file.write(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}\n")
#     output_txt_file.write(f"Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}\n\n")

#     output_txt_file.write(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}\n")
#     output_txt_file.write(f"Eval Precision: {eval_precision:.4f}, Eval Recall: {eval_recall:.4f}, Eval F1: {eval_f1:.4f}\n\n")


In [93]:
# # Effect of Learning Rate:
# lr_list = [0.001, 0.01, 0.1, 0.5]
# output_txt_file = open("C:/Users/raaga/OneDrive/Desktop/IIIT-H/3-1/SMAI/smai-m24-assignments-rraagav/assignments/3/figures/vary_alpha.txt", "w")

# input_size = X_train.shape[1]
# output_size = len(np.unique(y_train))

# mlp_a = MLP_Classifier(input_size, output_size)

# for a in lr_list:
#     mlp_a.set_params(alpha=a, activation_function='tanh', optimizer='sgd', hidden_layers= 3, neurons_per_layer=[128, 64, 32], batch_size=32, epochs=50)
#     mlp_a.fit(X_train, y_train, early_stopping=True, patience=5)

#     test_activations, _ = mlp_a.forward(X_test)
#     test_loss = mlp_a.compute_loss(test_activations[-1], y_test)
#     test_accuracy = mlp_a.compute_accuracy(test_activations[-1], y_test)
#     test_precision, test_recall, test_f1 = mlp_a.compute_classification_metrics(y_test, mlp_a.predict(X_test))
#     print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
#     print(f"Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}, Test F1: {test_f1:.4f}")

#     train_activations, _ = mlp_a.forward(X_train)
#     train_loss = mlp_a.compute_loss(train_activations[-1], y_train)
#     train_accuracy = mlp_a.compute_accuracy(train_activations[-1], y_train)
#     train_precision, train_recall, train_f1 = mlp_a.compute_classification_metrics(y_train, mlp_a.predict(X_train))
#     print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
#     print(f"Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}")

#     eval_activations, _ = mlp_a.forward(X_eval)
#     eval_loss = mlp_a.compute_loss(eval_activations[-1], y_eval)
#     eval_accuracy = mlp_a.compute_accuracy(eval_activations[-1], y_eval)
#     eval_precision, eval_recall, eval_f1 = mlp_a.compute_classification_metrics(y_eval, mlp_a.predict(X_eval))
#     print(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}")
#     print(f"Eval Precision: {eval_precision:.4f}, Eval Recall: {eval_recall:.4f}, Eval F1: {eval_f1:.4f}")

#     output_txt_file.write(f"Learning Rate: {a}\n")
#     output_txt_file.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}\n")
#     output_txt_file.write(f"Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}, Test F1: {test_f1:.4f}\n\n")

#     output_txt_file.write(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}\n")
#     output_txt_file.write(f"Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}\n\n")

#     output_txt_file.write(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}\n")
#     output_txt_file.write(f"Eval Precision: {eval_precision:.4f}, Eval Recall: {eval_recall:.4f}, Eval F1: {eval_f1:.4f}\n\n")

In [94]:
# # Effect of Batch Size

# batch_size = [16, 32, 64, 128]
# output_txt_file = open("C:/Users/raaga/OneDrive/Desktop/IIIT-H/3-1/SMAI/smai-m24-assignments-rraagav/assignments/3/figures/vary_batch_size.txt", "w")

# input_size = X_train.shape[1]
# output_size = len(np.unique(y_train))

# mlp_bs = MLP_Classifier(input_size, output_size)

# for bs in batch_size:
#     mlp_bs.set_params(alpha=0.01, activation_function='tanh', optimizer='sgd', hidden_layers= 3, neurons_per_layer=[128, 64, 32], batch_size=bs, epochs=50)
#     mlp_bs.fit(X_train, y_train, early_stopping=True, patience=5)

#     test_activations, _ = mlp_bs.forward(X_test)
#     test_loss = mlp_bs.compute_loss(test_activations[-1], y_test)
#     test_accuracy = mlp_bs.compute_accuracy(test_activations[-1], y_test)
#     test_precision, test_recall, test_f1 = mlp_bs.compute_classification_metrics(y_test, mlp_bs.predict(X_test))
#     print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
#     print(f"Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}, Test F1: {test_f1:.4f}")

#     train_activations, _ = mlp_bs.forward(X_train)
#     train_loss = mlp_bs.compute_loss(train_activations[-1], y_train)
#     train_accuracy = mlp_bs.compute_accuracy(train_activations[-1], y_train)
#     train_precision, train_recall, train_f1 = mlp_bs.compute_classification_metrics(y_train, mlp_bs.predict(X_train))
#     print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
#     print(f"Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}")

#     eval_activations, _ = mlp_bs.forward(X_eval)
#     eval_loss = mlp_bs.compute_loss(eval_activations[-1], y_eval)
#     eval_accuracy = mlp_bs.compute_accuracy(eval_activations[-1], y_eval)
#     eval_precision, eval_recall, eval_f1 = mlp_bs.compute_classification_metrics(y_eval, mlp_bs.predict(X_eval))
#     print(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}")
#     print(f"Eval Precision: {eval_precision:.4f}, Eval Recall: {eval_recall:.4f}, Eval F1: {eval_f1:.4f}")

#     output_txt_file.write(f"Batch Size: {bs}\n")
#     output_txt_file.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}\n")
#     output_txt_file.write(f"Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}, Test F1: {test_f1:.4f}\n\n")

#     output_txt_file.write(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}\n")
#     output_txt_file.write(f"Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}\n\n")

#     output_txt_file.write(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}\n")
#     output_txt_file.write(f"Eval Precision: {eval_precision:.4f}, Eval Recall: {eval_recall:.4f}, Eval F1: {eval_f1:.4f}\n\n")