## Machine Learning assignment 4 - Neural Network with BackPropagation
**Name**: Manoj Boganadham \
**Roll no**: 197121 \
**Section**: A

In [2]:
# import statements
import numpy as np 
import pandas as pd 

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [4]:
def predict(X, y, w1, w2, verbose=True, bias=True, show_all=False):
    y_pred = [] 
    
    for x in X:
        h = sigmoid(x.dot(w1.T))
        if (bias):
            temp_h = np.append(np.ones(1), h)
        else:
            temp_h = h
        o = sigmoid(temp_h.dot(w2.T))
        
        y_pred.append(o)
        
    y = np.argmax(y, axis=1)
    y_pred = np.argmax(y_pred, axis=1)
                
    if show_all:
        for idx,x in enumerate(X):
            print(f"Predicted => {y_pred[idx]}, Actual => {y[idx]}")
    
    accuracy = np.mean(y_pred==y)
    if (verbose):
        print(f"Accuracy => {accuracy*100}")
    return accuracy

In [5]:
def backprop(X, Y, n_hid, lr = 0.2,w_init=None,w_scale=1,n_iters=1, batch=False,bias=True):
    if bias:
        ones_col = np.ones((X.shape[0],1))
        X = np.concatenate((ones_col,X), axis=1)
    
    bias_int = 1 if bias else 0
    
    if w_init is not None:
        w1 = np.ones((n_hid,X.shape[1])) * w_init
        w2 = np.ones((Y.shape[1],n_hid+bias_int)) * w_init
    else:
        w1 = np.random.uniform(size=(n_hid,X.shape[1])) / w_scale
        w2 = np.random.uniform(size=(Y.shape[1],n_hid+bias_int)) / w_scale
            
    history_w1 = [] 
    history_w2 = []
    accuracies = []
    
    for i in range(n_iters):
        
        for idx, (x,y) in enumerate(zip(X,Y)):
            h = sigmoid(x.dot(w1.T))
            if bias:
                temp_h = np.append(np.ones(1), h) 
            else:
                temp_h = h
            o = sigmoid(temp_h.dot(w2.T))
            
            do = o*(1-o)*(y-o)
            dh = h * (1-h) * do.dot(w2[:,bias_int:]) 
            
            dw2 = lr * do.reshape(-1,1) * temp_h
            dw1 = lr * dh.reshape(-1,1) *(x) 

            if batch == True:
                history_w1.append(dw1)
                history_w2.append(dw2)
            
            else:
                w2 += dw2
                w1 += dw1
                
        if batch is True:
            w2 += sum(history_w2)
            w1 += sum(history_w1)

        accuracies.append(predict(X,Y,w1,w2,verbose=False,bias=bias))
        
    return w1, w2, accuracies

In [6]:
train = pd.read_csv("emnist-letters-train.csv").to_numpy()
test = pd.read_csv("emnist-letters-test.csv").to_numpy()

Y_train = train[:,0]
X_train = train[:,1:]
Y_test = test[:,0]
X_test = test[:,1:]

num_train = 5000
num_test = 500

X_train = X_train[:num_train]
Y_train = Y_train[:num_train]
X_test = X_test[:num_test]
Y_test = Y_test[:num_test]

Y_train_OH = np.zeros((Y_train.size, Y_train.max()+1))
Y_train_OH[np.arange(Y_train.size),Y_train] = 1

Y_test_OH = np.zeros((Y_test.size, Y_test.max()+1))
Y_test_OH[np.arange(Y_test.size),Y_test] = 1

X_train = X_train.reshape(X_train.shape[0],-1) / 255
X_test = X_test.reshape(X_test.shape[0],-1) / 255


w_init = None
w_scale = 10
n_hid = 100
lr = 0.01
n_iters = 100
batch = False
bias = False

w1,w2,accuracies = backprop(X_train,Y_train_OH,n_hid,lr,w_init,w_scale,n_iters,batch,bias)


epochs = np.arange(1, len(accuracies)+1)

print(f"Highest Train accuracy => {max(accuracies)}")


test_acc = predict(X_test,Y_test_OH,w1,w2,bias=False,verbose=False,show_all=False)
print("Training accuracy is less because the number of epochs is less due to the hardware constraint")

Highest Train accuracy => 0.04311626523936961
Training accuracy is less because the number of epochs is less due to the hardware constraint
