In [48]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

In [49]:
# Importing this data in current jupyter notebook
dataset = pd.read_csv('train.csv')

In [51]:
# spliting the data 
X_dev = dataset.head(1001)
Y_dev = X_dev 

X_train = dataset.tail(40999)
Y_train = X_train 


# Part 1: Neural Network Implementation:

In [35]:
def init_params(layer1,layer2):
    dict1 = {
        "w": np.random.rand(layer2,layer1 )  ,
        "b":np.random.rand(layer2,1)
    }
    return dict1 
layers = [784,120,45,10] 

weights = {}   # Variable of type dictionary to store matrices of different layers 
bias = {}  # Variable of type dictionary to store bias terms of different layers 
for i in range(1,len(layers)):
    
    # Intialise the weights and storing them into Dictionary named weights with range 0 to 1 of parameters 
    weights[f'W{i}'] = np.random.rand(layers[i],layers[i-1])
    
    # Intialise the bias and storing them into Dictionary named bias with range 0 to 1 of parameters 
    bias[f'B{i}'] = np.random.rand(layers[i],1)


In [36]:
def ReLU(Z):
    return np.maximum(0,Z)

In [37]:
def Softmax(Z):
    exp_Z = np.exp(Z)
    sum = np.sum(exp_Z)
    return exp_Z/sum

In [38]:
def forward_propagation(X,weights,bias):
    result = {
        "Z1": np.dot(weights[f'W{1}'],X) + bias[f'B{1}'] ,
        "A1":ReLU(result[f'Z{1}']) ,
        "Z2": np.dot(weights[f'W{2}'],result[f'A{1}']) + bias[f'B{2}'] ,
        "A2":ReLU(result[f'Z{2}']) ,
        "Z3": np.dot(weights[f'W{3}'],result[f'A{2}']) + bias[f'B{3}'] ,
        "A3":ReLU(result[f'Z{3}']) ,
    }    
    return result  

In [39]:
def one_hot(Y):
    l = len(Y)
    classes = np.max(Y) +1 
    matrix = np.zeros((classes,l),dtype =int)
    for i in range(0,l):
        matrix[Y[i]][i] = 1 
    return matrix.T 
        

# Part 2: Backward Propagation and Model Training:

In [40]:
def backward_propagation(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y):
    m= X.shape()
    result ={
        
        "dZ3" : A3 - Y ,
        "dW3" : (1 / m) * np.dot(dZ3, A2.T),
        "db3" : (1 / m) * np.sum(dZ3, axis=1, keepdims=True),
    
        "dA2" : np.dot(W3.T, dZ3),
        "dZ2" : dA2 * (1 - np.power(A2, 2)),
        "dW2" : (1 / m) * np.dot(dZ2, A1.T),
        "db2" : (1 / m) * np.sum(dZ2, axis=1, keepdims=True),

        "dA1" : np.dot(W2.T, dZ2),
        "dZ1": dA1 * (1 - np.power(A1, 2)),
        "dW1" : (1 / m) * np.dot(dZ1, X.T),
        "db1" : (1 / m) * np.sum(dZ1, axis=1, keepdims=True),
    }
    
    return result 


In [41]:
def update_params(W1, b1, W2, b2,W3,b3,dW1, db1, dW2, db2, dW3, db3,alpha):
    result={
        "W1": W1 -aplha*dW1,
        "W2": W2 -aplha*dW2,
        "W3": W3 -aplha*dW3,
        "b1": b1 -alpha*db1,
        "b2": b2 -alpha*db2,
        "b3": b3 -alpha*db3
    }
    return result 

In [42]:
def get_prediction(A3):
    A3_index = np.argmax(A3,axis=0)
    return A3_index 

def get_accuracy(predicted_labels,Y):
    correct = np.sum(predicted_labels == Y) # Or directly from np.mean
    accuracy = (correct / Y.size) * 100
    return accuracy

def print_labels(predicted_labels, Y):
    print("predicted_labels:",end="")
    print(predicted_labels)

    print("Y",end="")
    print(Y)


In [43]:
def Softmax(Z):
    exp_Z = np.exp(Z)
    sum = np.sum(exp_Z)
    return exp_Z/sum

def gradient_descent(X_train,Y_train,alpha,iterations ):
    
    # Take random weight and bias term 
    W = np.random.rand(len(X_train),len(X_train))
    b = np.random.rand(len(X_train),1)
    
    m = X_train.shape[0]
    for i in range(iterations):
        # Forwad propagation (helps in backward propagation)
        Z = np.dot(W.T,X_train) + b 
        a = softmax(Z)
    
        # Backward propagation calculation
        dZ = a - Y_train 
        dW = np.dot(X_train.T, dZ)/m
        db = np.sum(dZ, axis=0)/m
        
        # Update 
        W = W -alpha*dW
        b = b -alpha*db
        
        if i%10 ==0:
            accuracy = np.mean(a==Y_train)
            print(f"The Accuracy after {i}th itration is {accuracy}")
            print(a)
            
    return W,b
    
    

# Part 3: Model Evaluation:

In [46]:
def make_predictions(X,W1, b1, W2, b2):
    weights = {
        "W1" : W1 ,
        "W2" : W2
    }
    bias = {
        "B1" : b1 ,
        "B2" : b2
    }
    predictions = forward_propagation(X,weights,bias)
    return predictions

In [47]:
def test_prediction(index, X, Y, W1, b1, W2, b2):
    prediction = make_predictions(X[index], W1, b1, W2, b2)
    true_label = Y[index]
    
    plt.imshow(X[index].reshape(28, 28), cmap='gray')
    plt.axis('off')
    plt.title(f"Prediction: {prediction}, True Label: {true_label}")
    plt.show()
    
    return prediction, true_label