In [10]:
import pickle
import struct
import numpy as np
import idx2numpy
import numpy as np  # For numerical operations
import idx2numpy  # For loading MNIST dataset files
from sklearn.model_selection import train_test_split  # For splitting the dataset
import matplotlib.pyplot as plt
import numpy as np  # if you haven't already imported numpy
import os
import pickle

np.set_printoptions() 

In [11]:
class Neural_Network:
    def __init__(self, N, A, lr, Activation_Function, Weight_init, Num_epochs, Batch_size, alpha_for_LeReLu):
        self.N = N  
        self.A = A 
        self.lr = lr  
        self.Activation_Function = Activation_Function  
        self.Weight_init = Weight_init 
        self.Num_epochs = Num_epochs  
        self.Batch_size = Batch_size  
        self.alpha_for_LeReLu=alpha_for_LeReLu
        
        # Additional initialization, like setting weights, could be added here
        print("Neural Network Initialized with parameters:")
        print(f"N: {self.N}, Architecture: {self.A}, Learning Rate: {self.lr}")
        print(f"Activation Function: {self.Activation_Function}, Weight Init: {self.Weight_init}")
        print(f"Epochs: {self.Num_epochs}, Batch Size: {self.Batch_size}")
        self.Weightsnn=[]
        self.biasnn=[]
        for i in range(N):
            tempw,tempb=self.wts_for_init(A[i+1],A[i])
            self.Weightsnn.append(tempw)
            self.biasnn.append(tempb)

    def activation_func(self, z):
        if self.Activation_Function == "sigmoid":
            return 1 / (1 + np.exp(-z))
        elif self.Activation_Function == "ReLu":
            return np.maximum(0, z)
        elif self.Activation_Function == "tanh":
            return np.tanh(z)
        elif self.Activation_Function == "softmax":
            z_stable = z - np.max(z, axis=0, keepdims=True)
            exp_temp = np.exp(z_stable)
            return exp_temp / np.sum(exp_temp, axis=0, keepdims=True)
        elif self.Activation_Function == "LeakyReLu":
            return np.where(z > 0, z, self.alpha_for_LeReLu * z)
        



    def activation_grad(self, z):
        if self.Activation_Function=="sigmoid":
            return self.activation_func(z)*(1-self.activation_func(z))
        elif self.Activation_Function=="ReLu":
            return z>0
        elif self.Activation_Function=="tanh":
            return 1-np.power(self.activation_func(z),2)
        elif self.Activation_Function=="LeakyReLu":
            return np.where(z>0,1,self.alpha_for_LeReLu)
        
        

    def activation_func_out_layer(self, z, Activation_function_last_layer):
        if Activation_function_last_layer == "softmax":
            z_stable = z - np.max(z, axis=0, keepdims=True)
            exp_temp = np.exp(z_stable)
            
            return exp_temp / np.sum(exp_temp, axis=0, keepdims=True)
        elif Activation_function_last_layer == "sigmoid":
            return 1 / (1 + np.exp(z))
        elif Activation_function_last_layer == "ReLu":
            return np.maximum(0, z)
            



    def activation_grad_out_layer(self, z, Activation_function_last_layer):
        if Activation_function_last_layer=="sigmoid":
            return (np.exp(-z))/((np.exp(-z)+1)**2)
        elif Activation_function_last_layer=="ReLu":
            return np.where(z>0,1,0)
        



    def wts_for_init(self, output_dim, input_dim):
        if self.Weight_init=="zero":
            return np.zeros((output_dim,input_dim)),np.zeros((output_dim,1))
        elif self.Weight_init == "random":
            stddev = np.sqrt(2 / input_dim)
            return np.random.rand(output_dim,input_dim)*stddev,np.random.rand(output_dim,1)
        elif self.Weight_init == "normal":
            stddev = np.sqrt(1 / input_dim)
            return np.random.randn(output_dim, input_dim) * stddev, np.random.randn(output_dim, 1) * stddev
    def forward_pass(self, x_in):
        self.Activation_out=[]
        self.Z_out=[]
        x_temp=x_in
        self.Activation_out.append(x_in)
        for i in range(self.N-1):
            z=np.dot(self.Weightsnn[i],x_temp)+self.biasnn[i]
            # print(z)
            self.Z_out.append(z)
            x_temp=self.activation_func(z)
            # print(x_temp)
            self.Activation_out.append(x_temp)
        temp_1=np.dot(self.Weightsnn[-1],x_temp)+self.biasnn[-1]
        self.Z_out.append(temp_1)
        temp_2=self.activation_func_out_layer(temp_1,"softmax")
        self.Activation_out.append(temp_2)
        return self.Activation_out[-1]
    def backward_pass(self,y_pred, y_actual):



        change_w, change_b = [], []
        err = -y_actual + y_pred
        dw_out = (1 / self.Batch_size) * np.dot(err, self.Activation_out[-2].T)
        db_out = (1 / self.Batch_size) * np.sum(err, axis=1, keepdims=True)
        change_w.append(dw_out)
        change_b.append(db_out)

        for i in range(len(self.Weightsnn) - 2, -1, -1):
            err = np.dot(self.Weightsnn[i + 1].T, err)* self.activation_grad(self.Z_out[i])
            dw = (1 / self.Batch_size) * np.dot(err, self.Activation_out[i].T)
            db = (1 / self.Batch_size) * np.sum(err, axis=1, keepdims=True)
            change_w.append(dw)
            change_b.append(db)

        change_w.reverse()
        change_b.reverse()
        return change_w, change_b
    def compute_cost(self, Y_pred, Y_actual):
        m = Y_actual.shape[1]  # Number of samples
        cost = -np.sum(Y_actual * np.log(Y_pred )) / m
        return np.squeeze(cost)
    
    
    def fit(self, X_in, Y_act,X_val,Y_val, patience=10, min_delta=1e-6):
        num_samples = X_in.shape[1]
        num_batches = num_samples // self.Batch_size
        if num_samples % self.Batch_size != 0:
            num_batches += 1
        costs = []
        costs_val=[]

        # Split into training and validation sets
        X_train=X_in
        Y_train=Y_act
        train_samples = X_train.shape[1]
        
        best_val_cost = float('inf')
        patience_counter = 0

        for epoch in range(self.Num_epochs):
            # Shuffle training data
            indices = np.random.permutation(train_samples)
            X_train_shuffled = X_train[:, indices]
            Y_train_shuffled = Y_train[:, indices]
            
            for j in range(num_batches):
                start_ind = j * self.Batch_size
                end_ind = min(train_samples, (j + 1) * self.Batch_size)
                
                # Get the batch of data
                X_batch = X_train_shuffled[:, start_ind:end_ind]
                Y_batch = Y_train_shuffled[:, start_ind:end_ind]
                
                # Perform forward and backward pass
                out_put = self.forward_pass(X_batch)
                changes_w, changes_b = self.backward_pass(out_put, Y_batch)
                for k in range(len(changes_w)):
                    self.Weightsnn[k] -= self.lr * changes_w[k]
                    self.biasnn[k] -= self.lr * changes_b[k]
            
            # Calculate training cost
            Y_pred_train = self.forward_pass(X_train)
            train_cost = self.compute_cost(Y_pred_train, Y_train)
            costs.append(train_cost)

            Y_pred_val=self.forward_pass(X_val)
            val_cost=self.compute_cost(Y_pred_val,Y_val)
            costs_val.append(val_cost)

            # Calculate validation cost
            Y_pred_val = self.forward_pass(X_val)
            val_cost = self.compute_cost(Y_pred_val, Y_val)

            print(f"Epoch {epoch + 1}, Training Cost: {train_cost:.4f}, Validation Cost: {val_cost:.4f}")

            # Early stopping check
            if val_cost < best_val_cost - min_delta:
                best_val_cost = val_cost
                patience_counter = 0  # Reset patience counter if improvement is seen
            else:
                patience_counter += 1

            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch + 1}")
                break

        return costs,costs_val
    
    def compute_cost(self, Y_pred, Y_actual):
        """
        Calculate cross-entropy cost.
        """
        m = Y_actual.shape[1] 
        epsilon = 1e-10 
        cost = -np.sum(Y_actual * np.log(Y_pred + epsilon)) / m
        return np.squeeze(cost)
    
    
   
    def predict(self, X_in):
        Y_proba = self.forward_pass(X_in)
        with np.printoptions(threshold=np.inf):
            print(Y_proba)
        return np.argmax(Y_proba, axis=0)
    def predict_proba(self, X_in):
        Y_prob=self.forward_pass(X_in)
        return Y_prob.T

    def score(self, X_in, Y_act):
        predictions = self.predict(X_in)
        accuracy = np.mean(predictions == Y_act)
        return accuracy

In [12]:
activation_functions = ['sigmoid', 'tanh', 'ReLu', 'LeakyReLu']  # Customize with your actual activation functions
weight_inits = ['zero', 'random', 'normal']  # Customize with your actual initialization methods

# Dictionary to hold loaded models with tuple keys for (activation_function, init_method)
loaded_models = {}

# Loop through activation functions and weight initializations
for activation_function in activation_functions:
    for init_method in weight_inits:
        # Construct file path based on activation function and initialization method
        model_file_name = f"./Saved_models_Section-B/model_{activation_function}_{init_method}.pkl"
        
        # Load the model from the .pkl file
        with open(model_file_name, 'rb') as file:
            loaded_models[(activation_function, init_method)] = pickle.load(file)
        print(f"Loaded model for Activation Function: {activation_function}, Weight Init: {init_method}")

Loaded model for Activation Function: sigmoid, Weight Init: zero
Loaded model for Activation Function: sigmoid, Weight Init: random
Loaded model for Activation Function: sigmoid, Weight Init: normal
Loaded model for Activation Function: tanh, Weight Init: zero
Loaded model for Activation Function: tanh, Weight Init: random
Loaded model for Activation Function: tanh, Weight Init: normal
Loaded model for Activation Function: ReLu, Weight Init: zero
Loaded model for Activation Function: ReLu, Weight Init: random
Loaded model for Activation Function: ReLu, Weight Init: normal
Loaded model for Activation Function: LeakyReLu, Weight Init: zero
Loaded model for Activation Function: LeakyReLu, Weight Init: random
Loaded model for Activation Function: LeakyReLu, Weight Init: normal


In [13]:
train_images_path = './MNIST-Handwritting/train-images.idx3-ubyte'
train_labels_path = './MNIST-Handwritting/train-labels.idx1-ubyte'
test_images_path = './MNIST-Handwritting/t10k-images.idx3-ubyte'
test_labels_path = './MNIST-Handwritting/t10k-labels.idx1-ubyte'

In [14]:
X_train=idx2numpy.convert_from_file(train_images_path)
Y_train=idx2numpy.convert_from_file(train_labels_path)

print(X_train.shape)    

X_train_flat = X_train.reshape(X_train.shape[0] , -1)/255; 

print(X_train_flat.shape) ;  



Y_train_one_hot = np.eye(10)[Y_train] ; 
print(Y_train_one_hot.shape)
X_train_split, X_rem_split, Y_train_split, Y_rem_split = train_test_split(X_train_flat, Y_train_one_hot, test_size=0.2, random_state=42)


X_val_split, X_test_split, Y_val_split, Y_test_split = train_test_split(X_rem_split, Y_rem_split, test_size=0.5, random_state=42)

(60000, 28, 28)
(60000, 784)
(60000, 10)


In [15]:
final_accuracy = loaded_models[('ReLu','random')].score(X_test_split.T, np.where(Y_test_split == 1)[1])
print(final_accuracy)

[[0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.09882241 0.09882241
  0.09882241 0.09882241 0.09882241 0.09882241 0.