In [63]:
import numpy as np
import pandas as pd
from math import exp

#class
class NeuralNetwork:
    def __init__(self, hidden_nodes, num_classes, learning_rate, num_epochs):
        #initializing the paramaters
        self.hidden_nodes = hidden_nodes
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.W1 = None
        self.b1 = None
        self.W2 = None
        self.b2 = None
    #relu function which is used as activation function
    @staticmethod
    def relu_func(x):
        return np.maximum(0, x)
    #derivative of relu function
    @staticmethod
    def relu_drv(x):
        return np.where(x > 0, 1, 0)
    #softmax function used to convert the output of NN to categorical data
    @staticmethod
    def softmax_func(x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)
    
    @staticmethod
    def one_hot_enc(arr):
        argmax_labels = np.argmax(arr, axis=1)
        encoded_arr = pd.get_dummies(argmax_labels)
        return encoded_arr.values


    #loss function to determine the error
    @staticmethod
    def cross_ent(original_label, predict_label):
        m = original_label.shape[0]  # Number of training examples
        cost = - np.sum(np.multiply(original_label, np.log(predict_label))) / m
        cost = np.squeeze(cost)
        return cost

    #updating the weight after performing back propagation and getting gradients
    def update_weights(self, weights, costs, train_data):
        updated_weights = []
        self.features = train_data.shape[1]
        self.samples = len(train_data)

        for weight, cost in zip(weights, costs):
            #checking whether the dimension match and perfoming the updation.
            # w = w - n*dw
            if cost.shape == (self.features, self.hidden_nodes):
                updated_weight = weight - self.learning_rate * cost
            elif cost.shape == (self.hidden_nodes,):
                updated_weight = weight - self.learning_rate * cost
            elif cost.shape == (self.samples, self.hidden_nodes):
                updated_weight = weight - self.learning_rate * cost.sum(axis=0)
            elif cost.shape == (self.samples, self.num_classes):
                updated_weight = weight - self.learning_rate * cost.sum(axis=0)
            else:
                updated_weight = weight
            #updated weights
            updated_weights.append(updated_weight)

        return updated_weights


    #intializing random values for weights and bias
    def initialize_weights(self, train_data):
        np.random.seed(0)
        self.features = train_data.shape[1]
        self.W1 = np.random.randn(self.features, self.hidden_nodes)
        self.b1 = np.random.randn(self.hidden_nodes)
        self.W2 = np.random.randn(self.hidden_nodes, self.num_classes)
        self.b2 = np.random.randn(self.num_classes)
    #performing forward propagation
    def forward_prop(self, inp_data):
        #z1=W1*x+b1
        Z1 = np.dot(inp_data, self.W1) + self.b1
        #A1 = relu(Z1)
        A1 = self.relu_func(Z1)
        #z2=W2*x+b2
        Z2 = np.dot(A1, self.W2) + self.b2
        #A2 = relu(Z2)
        A2 = self.softmax_func(Z2)
        return A2, A1, Z1
    #Backpropagation
    def backward_prop(self, X_train, y_train, net_hidden, act_hidden, weight_output, act_output):
        m = X_train.shape[0]  # Number of training examples
        #finding gradient descent values 
        dZ2 = act_output - y_train
        dW2 = (1 / m) * np.dot(act_hidden.T, dZ2)
        db2 = dZ2
        dZ1 = np.multiply(np.dot(dZ2, weight_output.T), self.relu_drv(net_hidden))
        dW1 = np.dot(X_train.T, self.relu_drv(net_hidden) * dZ1)
        db1 = (dZ1 * self.relu_drv(net_hidden))
        return dW2, db2, dW1, db1


    #accuracy 
    def accuracy(self, y_true, y_pred):
        if len(y_true) != len(y_pred):
            print('Size does not match.')
            return 0
    
        num_samples = len(y_true)
        num_correct = sum(np.all(true_label == pred_label) for true_label, pred_label in zip(y_true, y_pred))#comparing the values and adding of the labels
        accuracy = num_correct / num_samples
        #the number of correct predictions divided by the total number of predictions made by the model
        return accuracy


    def fit(self, train_data, train_labels, val_data, val_labels):
        #initializing the weights and bias
        self.initialize_weights(train_data)

        #looping untill we the better accuracy
        for epoch in range(1, self.num_epochs + 1):
            #performing forward propagation and getting ouptut values of activation function
            act_output, act_hidden, net_hidden = self.forward_prop(train_data)
            #performing the backward propagation and getting the gradient values
            wto_grad, bo_grad, wth_grad, bh_grad = self.backward_prop(train_data, train_labels, net_hidden, act_hidden,
                                                 self.W2, act_output)
            #updating the weight using gradient descent
            self.W1, self.b1, self.W2, self.b2 = self.update_weights([self.W1, self.b1, self.W2, self.b2],
                                                         [wth_grad, bh_grad, wto_grad, bo_grad],
                                                         train_data)

            #calculating loss values
            Loss = self.cross_ent(train_labels, act_output)
            
            #predicting the labels for train and val data
            self.y_pred, _, _ = self.forward_prop(val_data)
            # One hot encoding the prediction
            self.y_pred_enc = self.one_hot_enc(self.y_pred)
            # Calculating the accuracy
            val_acc = self.accuracy(val_labels, self.y_pred_enc)
            train_acc = self.accuracy(train_labels, self.one_hot_enc(act_output))

            if epoch % 10 == 0:
                print('epoch =', epoch, 'Loss function value:', Loss, 'Training Accuracy:', train_acc, 'Validation Accuracy:', val_acc)

        


#spliting the data into train and Validation
def training_data(df, encoded_labels, train_ratio, val_ratio):
    np.random.seed(42)

    indices = np.arange(len(df))
    np.random.shuffle(indices)

    shuffled_df = df.iloc[indices]
    shuffled_labels = encoded_labels.iloc[indices]

    num_examples = len(df)
    num_train = int(train_ratio * num_examples)
    num_val = int(val_ratio * num_examples)

    train_data = shuffled_df[:num_train]
    val_data = shuffled_df[num_train:num_train + num_val]
    train_labels = shuffled_labels[:num_train]
    val_labels = shuffled_labels[num_train:num_train + num_val]

    return train_data, train_labels, val_data, val_labels

# Usage example:
np.random.seed(0)
values = []
for i in range(784):
    values.append(i)
df = pd.read_csv('C:/Users/saiko/OneDrive/Desktop/657/Assign-1/train_data.csv', names=values)
label = pd.read_csv('C:/Users/saiko/OneDrive/Desktop/657/Assign-1/train_labels.csv',
                    names=[i for i in range(0, 4)])
encoded_labels = pd.get_dummies(label)
train_data, train_labels, val_data, val_labels = training_data(df, encoded_labels, 0.8, 0.2)
train_labels = train_labels.to_numpy()
val_labels = val_labels.to_numpy()

hiddenlyr_nodes = 35
num_classes = 4
learning_rate = 0.0001
num_epoch = 200
#calling the model

nn = NeuralNetwork(hiddenlyr_nodes, num_classes, learning_rate, num_epoch)
nn.fit(train_data, train_labels, val_data, val_labels)


  num_correct = sum(np.all(true_label == pred_label) for true_label, pred_label in zip(y_true, y_pred))


epoch = 10 Loss function value: 0.7731534068008766 Training Accuracy: 0.8425491087209008 Validation Accuracy: 0.8381818181818181
epoch = 20 Loss function value: 0.5267145567140168 Training Accuracy: 0.8736050093420189 Validation Accuracy: 0.8660606060606061
epoch = 30 Loss function value: 0.43510915631697317 Training Accuracy: 0.8906731303337878 Validation Accuracy: 0.8840404040404041
epoch = 40 Loss function value: 0.38282893444267585 Training Accuracy: 0.9004191284148866 Validation Accuracy: 0.8923232323232323
epoch = 50 Loss function value: 0.34627712769381036 Training Accuracy: 0.911225571883048 Validation Accuracy: 0.8985858585858586
epoch = 60 Loss function value: 0.3184621987069905 Training Accuracy: 0.9189011765894056 Validation Accuracy: 0.9066666666666666
epoch = 70 Loss function value: 0.2988521216466954 Training Accuracy: 0.9229914659395041 Validation Accuracy: 0.9109090909090909
epoch = 80 Loss function value: 0.2843451272495522 Training Accuracy: 0.9261223047013079 Valida

# Test data should be given here

In [64]:
#test data prediction
test_data = pd.read_csv("test data location goes here")
def predict_labels(test_data, neural_network):
    A2, _, _ = neural_network.forward_prop(test_data)
    labels = np.argmax(A2, axis=1)  # Get the index of the maximum activation for each sample
    encoded_labels = np.eye(neural_network.num_classes)[labels]  # One-hot encode the labels
    return encoded_labels


In [71]:
# Assuming you have loaded the test dataset into 'test_data'
predicted_labels = predict_labels(test_data, nn)
predicted_labels = pd.DataFrame(predicted_labels)
predicted_labels

Unnamed: 0,0,1,2,3
0,0.0,1.0,0.0,0.0
1,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0
3,1.0,0.0,0.0,0.0
4,0.0,0.0,1.0,0.0
...,...,...,...,...
4945,0.0,0.0,1.0,0.0
4946,1.0,0.0,0.0,0.0
4947,0.0,0.0,1.0,0.0
4948,0.0,0.0,0.0,1.0
