# PA_4: Feedforward Neural Network

## Aim
Train and test a Feedforward Neural Network for MNIST digit classification.

## Procedure
* Download `mnist_file.rar` which contains mnist data as a *pickle* file and read `mnist.py` for loading partial mnist data.
* Run read `mnist.py` file which will give 1000 train and 500 test images per each class.
* x train,y train gives the image $784\times1$ and corresponding label for training data. Similarly, for test data.
* Write
1. Neural network model using library functions.
2. Your own neural network model and train with Back propagation
    1. On the training data and report accuracy.
    2. Train with Five fold cross validation (4 fold training and 1 fold testing. Repeating this for 5 times changing the test fold each time) and report the average accuracy as train accuracy.
* Test both models with the test data.
* Find the confusion matrix and report the accuracy.

In [None]:
import numpy as np
from utils import visualise
from read_mnist import load_data
import random

y_train,x_train,y_test,x_test=load_data()

print("Train data label dim: {}".format(y_train.shape))
print("Train data features dim: {}".format(x_train.shape))
print("Test data label dim: {}".format(y_test.shape))
print("Test data features dim:{}".format(x_test.shape))

visualise(x_train)

In [None]:
def sigmoid(x):
    return 1/(1+ np.exp(-x))

def sigmoid_grad(x):
    return sigmoid(x) * ( 1 - sigmoid(x) )

# def tanh():


# def ReLU():
        
class NN:
    def __init__(self, num_layers, depth, act):
        self.num_layers = num_layers
        self.depth = depth
        self.act = act

    def forward(self,x_train):
        w = np.random.randn(x_train.shape[1], self.depth) * 0.01
        b = np.random.randn(1, self.depth) * 0.01
        print(w.shape)
        print(b.shape)
        score = np.dot(x_train, w) + b
        print(score.shape)
        
    def backward(self):
        pass

# def preprocess(X):
#     # zero center the data
#     X -= np.mean(X, axis = 0)
#     return X

# preprocessing the image
# x_train=preprocess(x_train)
# x_test=preprocess(x_test)

model = NN(num_layers=5,depth=256,act="sigmoid")
model.forward(x_train)
    
    

In [3]:
import numpy as np
from utils import visualise
from read_mnist import load_data
import random

y_train,x_train,y_test,x_test=load_data()
print("Train data label dim: {}".format(y_train.shape))
print("Train data features dim: {}".format(x_train.shape))
print("Test data label dim: {}".format(y_test.shape))
print("Test data features dim:{}".format(x_test.shape))

# uncomment to visualise dataset
# visualise(x_train)

def sigmoid(x):
    return 1/(1+ np.exp(-x))

def sigmoid_grad(x):
    return sigmoid(x).T @ (1 - sigmoid(x))

# def softmax(x):
#     for i,f in enumerate(x):
#         f -= np.max(f) # for numerical stabiluty
#         p = np.exp(f) / np.sum(np.exp(f))
#         x[i,:]=p
#     return x

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def cross_entropy(y_,y):
    n = y.shape[0]
    nll = -np.log(y_[range(n),y])
    return np.mean(nll)

def delta_cross_entropy(y_,y):
    n = y.shape[0]
    y_[range(n),y] -= 1
    return y_/n

class NN:
    def __init__(self, hidden_layers, hidden_neurons, hidden_activation):
        self.hidden_layers = hidden_layers
        self.hidden_neurons = hidden_neurons
        self.hidden_activation = hidden_activation
        self.step_size=0.01

        self.W1 = 0.01* np.random.randn(x_train.shape[1],self.hidden_neurons)
        self.b1 = np.zeros((1,self.hidden_neurons))
        self.W2 = 0.01* np.random.randn(self.hidden_neurons,10)
        self.b2 = np.zeros((1,10))

    def forward(self,x_train):
        s1=np.dot(x_train, self.W1)
        a1 = (sigmoid(s1))
        s2 = np.dot(a1, self.W2)
        a2 = softmax(s2)
        loss=cross_entropy(a2,y_train)
        return(loss,s1,a1,s2,a2)


    def backward(self, s1, a1, s2, a2):
        ds2=delta_cross_entropy(a2,y_train)
        dW2 = np.dot(a1.T, ds2)
#         db2 = np.sum(ds2, axis=0)
#         self.b2 += -self.step_size * db2
        dW1 = np.dot(x_train.T, np.dot(np.dot(ds2,self.W2.T),sigmoid_grad(s1)))
#         db1 = np.sum(np.dot(np.dot(ds2,self.W2.T),sigmoid_grad(s1)),axis=0)
#         db1.reshape(1,-1)

        self.W1 += -self.step_size * dW1
        self.W2 += -self.step_size * dW2

#         self.b1 += -self.step_size * db1
        print("dW1:", dW1, "dW2:", dW2)

model=NN(5,512,"sigmoid")
epochs=10
for epoch in range(epochs):
    loss,s1,a1,s2,a2 = model.forward(x_train)
    print("Loss: {} in {}/{}".format(loss,epoch,epochs))
    model.backward(s1, a1, s2, a2)

print(a2.shape)
preds= np.argmax(a2, axis=1)
print(preds.shape, y_train.shape)
print('training accuracy: {}'.format(np.mean(preds == y_train)) )

Train data label dim: (10000,)
Train data features dim: (10000, 784)
Test data label dim: (5000,)
Test data features dim:(5000, 784)
Loss: 11.518933256651728 in 0/10
dW1: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] dW2: [[-0.04838838 -0.04963675 -0.04760876 ... -0.04764656 -0.04817444
  -0.0483646 ]
 [-0.05114034 -0.04875452 -0.04969969 ... -0.04938899 -0.05008567
  -0.05016968]
 [-0.05180031 -0.05161987 -0.05204767 ... -0.05119931 -0.05121662
  -0.05113887]
 ...
 [-0.05063255 -0.0515161  -0.05053076 ... -0.04944979 -0.05002934
  -0.04915459]
 [-0.05289349 -0.05041991 -0.05018145 ... -0.05005996 -0.05023035
  -0.05026941]
 [-0.05653962 -0.04941076 -0.05315421 ... -0.05230298 -0.0524291
  -0.0529183 ]]
Loss: 11.536884406029197 in 1/10
dW1: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0

KeyboardInterrupt: 

In [None]:
print(preds.tolist())

In [None]:
print(y_train.tolist())