### Importing the necessary libraries

In [1]:
import sys #this is used only for printing accuracy
import csv #this is used only for reading the data 
import numpy as np

### Reading the Data

In [2]:
import numpy as np
f = open('mnist_train.csv', 'r')
reader = csv.reader(f)
train=np.array([np.array(row) for row in reader])
f.close()
f = open('mnist_test.csv', 'r')
reader = csv.reader(f)
test=np.array([np.array(row) for row in reader])
f.close()

In [3]:
str2int = lambda t: int(t)
str2int = np.vectorize(str2int)

In [4]:
train_label=str2int(train[:,0])
test_label=str2int(test[:,0])
train_x=str2int(train[:,1:])
test_x=str2int(test[:,1:])

In [5]:
inp=train_x.transpose()
inp.shape

(784, 60000)

In [6]:
test_inp=test_x.transpose()
test_inp.shape

(784, 10000)

In [7]:
target=np.zeros((10,inp.shape[1]))

for i in range(inp.shape[1]):
    target[train_label[i]][i]=1

In [None]:
#normalize inputs
#did not do it here as MNIST is a simple DataSet

In [8]:
np.random.seed(10)

### The Model

In [5]:
class model:
    """
    This is the class which must be instantiated to create a neural network model
    """
    def __init__(self,input_dim):
        """
        The constructor needs the input dimension of the neural network when created
        
        input_dim:the dimension of the input
        """
        self.input_dim=input_dim#dimension of input
        self.weights={0:np.zeros((input_dim,input_dim))}#dictionary for weights for each layer
        self.bias={0:np.zeros((input_dim,1))}#dictionary for bias for each layer
        self.layers=0#variable to keep track of the number of layers
        self.activations=[None] #list to keep track of the activation of each layer
        
    def add(self,nodes=10,activation='ReLu'):#adds a new layer to the network
        """
        This function adds a new layer to the neural network, you can specify the number of nodes(default 10) and
        the type of activation i.e ReLu or SoftMax(only for final layer), default is ReLu
        nodes: the number of nodes to be added in the new layer
        activation: the activation of the new layer
        """
        self.layers+=1#incrementing the layer count
        self.weights[self.layers]=np.random.normal(0,1.0/np.sqrt(784),size=(nodes,self.weights[self.layers-1].shape[0]))#initializing weights randomly from normal distribution
        self.bias[self.layers]=np.zeros((nodes,1)) #initializing bias as zero
        self.activations.append(activation)#storing the new activation
        return
    
    def apply_activation(self,x,activation):
        """
        applies the given activation(2nd argument) function to the given input(1st argument)
        x: ususally the w.x+b
        activation: the activation to be applied
        
        returns:the output after applying the activation function 
        """
        if activation=='ReLu':#reLu activation
            x[x<0]=0
            return x
        if activation=='Sigmoid':#sigmoid activation
            return 1/(1+np.exp(-x))
        if activation=='SoftMax':# SoftMax activation
            x-=np.max(x,axis=0)[np.newaxis,:]
            #y=np.apply_along_axis(lambda x: np.exp(x)/np.sum(np.exp(x)),0,x)#bp()#return y
            return np.exp(x)/np.sum(np.exp(x),axis=0,keepdims=True)
    
    def forwardprop(self,inputs):
        """
        This functions gives the output of the forward pass of the network in current state(i.e using the latest weights an biases) given an input
        inputs: The inputs in the format [input_dimension x Number of samples]
        
        returns: The output of the last layer
        """
        output=[0]*len(self.weights.keys())
        output[0]=inputs
        for i in range(1,len(self.weights.keys())):
            output[i]=self.apply_activation(np.dot(self.weights[i],output[i-1])+self.bias[i],self.activations[i])
        return output[-1]
    
    def performance(self,predictions,targets,test_inputs,test_labels):
        """
        This function calculates and prints the accuracy
        predictions:predicted vectors
        targets: target vectors(one hot encoded)
        test_inputs:test_inputs for test case
        test_labels:the labels(not 1 hot encodings) corresponding to the test_inputs
        """
        train_accuracy=100*(np.sum(np.argmax(predictions,axis=0)==np.argmax(targets,axis=0))/targets.shape[1])
        test_accuracy=100*(np.sum(np.argmax(self.forwardprop(test_inputs),axis=0)==test_labels)/test_inputs.shape[1])
        sys.stdout.write("training progress: %d/%d training accuracy: %0.2f testing accuracy: %0.2f \r" % (1,1,train_accuracy,test_accuracy))
        sys.stdout.flush()
        return #train_accuracy,test_accuracy
    
    def train(self,inputs,targets,test_inputs,test_labels,iters=20,learning_rate=0.00005):
        """
        This is the core function
        inputs:The inputs in the format [input_dimension x Number of samples]
        targets: The correspondin one hot encodings in the format [number of categories x Number of samples]
        test_inputs:test_inputs for test case
        test_labels:the labels(not 1 hot encodings) corresponding to the test_inputs
        iters: number of iterations , default-20
        learning_rate:step size, default-0.00005
        """
        for iteration in range(iters):
            step_size=learning_rate/(targets.shape[1])   #dividing the learning rate by batch size      
            deltas=[0]*len(self.weights.keys())     #list to store the deltas in the backward pass
            output=[0]*len(self.weights.keys())  #list to store the output(activations) of each layer
            output[0]=inputs
            weighted_outputs=[0]*len(self.weights.keys())#list to store the weighted outputs(before applying the activation function)
            #forward prop
            for i in range(1,len(self.weights.keys())):
                weighted_outputs[i]=np.dot(self.weights[i],output[i-1])+self.bias[i]
                output[i]=self.apply_activation(weighted_outputs[i],self.activations[i])
            #backprop
            #deltas, weight, bias update
            for i in range(len(self.weights.keys())-1,0,-1):
                relu_grad = lambda t: 1 if t>0 else 0
                vec_relu_grad = np.vectorize(relu_grad)
                if self.activations[i]=='SoftMax' and i==len(self.weights.keys())-1:
                    deltas[i]=output[i]-targets
                    self.weights[i]=self.weights[i]-step_size*(np.dot(deltas[i],output[i-1].transpose()))
                    #bp()
                    self.bias[i]=self.bias[i]-step_size*(np.sum(deltas[i],axis=1).reshape((-1,1)))
                elif self.activations[i]=='ReLu':
                    deltas[i]=np.multiply(np.dot(self.weights[i+1].transpose(),deltas[i+1]),vec_relu_grad(weighted_outputs[i])) 
                    self.weights[i]=self.weights[i]-step_size*(np.dot(deltas[i],output[i-1].transpose()))
                    #bp()
                    self.bias[i]=self.bias[i]-step_size*(np.sum(deltas[i],axis=1).reshape((-1,1)))
                else:
                    print('here')
            #accuracy,test_accuracy=self.performance(output[-1],targets,test_inputs,test_labels)
            

In [40]:
nn=model(input_dim=784)#sample network

In [41]:
nn.add(200,'ReLu') #first hidden layer with 200 nodes
nn.add(180,'ReLu') #second hidden layer with 180  nodes

In [42]:
nn.add(10,'SoftMax')#output Softmax layer

In [43]:
for t in range(300):#trainig for 300 epochs
    print('epoch ',t)
    nn.performance(nn.forwardprop(inp),target,test_inp,test_label)
    for i in range(600):#trainig on minibatches of size 100
        nn.train(inputs=inp[:,i*100:(i*100)+100],targets=target[:,i*100:(i*100)+100],test_inputs=test_inp,test_labels=test_label,iters=1,learning_rate=0.0001)
#nn.forwardprop(inp)

epoch  0
epoch  1 progress: 1/1 training accuracy: 10.84 testing accuracy: 10.81 
epoch  2 progress: 1/1 training accuracy: 79.05 testing accuracy: 79.80 
epoch  3 progress: 1/1 training accuracy: 83.95 testing accuracy: 84.41 
epoch  4 progress: 1/1 training accuracy: 86.37 testing accuracy: 86.49 
epoch  5 progress: 1/1 training accuracy: 87.80 testing accuracy: 87.67 
epoch  6 progress: 1/1 training accuracy: 88.78 testing accuracy: 88.53 
epoch  7 progress: 1/1 training accuracy: 89.54 testing accuracy: 89.38 
epoch  8 progress: 1/1 training accuracy: 90.17 testing accuracy: 89.89 
epoch  9 progress: 1/1 training accuracy: 90.71 testing accuracy: 90.42 
epoch  10progress: 1/1 training accuracy: 91.14 testing accuracy: 90.84 
epoch  11progress: 1/1 training accuracy: 91.55 testing accuracy: 91.14 
epoch  12progress: 1/1 training accuracy: 91.89 testing accuracy: 91.36 
epoch  13progress: 1/1 training accuracy: 92.22 testing accuracy: 91.61 
epoch  14progress: 1/1 training accuracy: 

epoch  225rogress: 1/1 training accuracy: 99.43 testing accuracy: 96.25 
epoch  226rogress: 1/1 training accuracy: 99.43 testing accuracy: 96.23 
epoch  227rogress: 1/1 training accuracy: 99.44 testing accuracy: 96.23 
epoch  228rogress: 1/1 training accuracy: 99.45 testing accuracy: 96.23 
epoch  229rogress: 1/1 training accuracy: 99.45 testing accuracy: 96.26 
epoch  230rogress: 1/1 training accuracy: 99.46 testing accuracy: 96.24 
epoch  231rogress: 1/1 training accuracy: 99.47 testing accuracy: 96.25 
epoch  232rogress: 1/1 training accuracy: 99.48 testing accuracy: 96.25 
epoch  233rogress: 1/1 training accuracy: 99.49 testing accuracy: 96.25 
epoch  234rogress: 1/1 training accuracy: 99.50 testing accuracy: 96.25 
epoch  235rogress: 1/1 training accuracy: 99.50 testing accuracy: 96.25 
epoch  236rogress: 1/1 training accuracy: 99.51 testing accuracy: 96.24 
epoch  237rogress: 1/1 training accuracy: 99.52 testing accuracy: 96.24 
epoch  238rogress: 1/1 training accuracy: 99.53 tes

### Unit Testing

In [22]:
import unittest

class ModelTestCase(unittest.TestCase):
    """Tests for Model class."""
    nn=model(10)
    def test_forwardprop(self,nn=model(10)):
        """test forward prop"""
        nn.add(10,'SoftMax')
        self.assertTrue((nn.forwardprop(np.zeros((10,1)))==0.1*np.ones((10,1))).all())
        #self.assertEqual(nn.forwardprop(np.zeros((10,1))),0.1*np.ones((10,1)))
    def test_activation_SoftMax(self,nn=model(10)):
        """test ReLu and SoftMax"""
        
        self.assertTrue((nn.apply_activation(np.zeros((10,1)),'SoftMax')==0.1*np.ones((10,1))).all())
    def test_activation_ReLu(self,nn=model(10)):
        """test ReLu and ReLu"""
       
        self.assertTrue((nn.apply_activation(np.array([[1,-1],[2,-4]]),'ReLu')==np.array([[1,0],[2,0]])).all())
    def test_add(self,nn=model(10)):
        nn.add(10,'SoftMax')
        c=nn.layers
        
        self.assertTrue(nn.activations[-1]=='SoftMax'and nn.weights[c].shape[0]==10 and nn.bias[c].shape[0]==10)
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

....
----------------------------------------------------------------------
Ran 4 tests in 0.004s

OK
