In [None]:
# loading libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder


from mlxtend.data import loadlocal_mnist
import platform

In [None]:
# loading training data
if not platform.system() == 'Windows':
    X_train, y_train = loadlocal_mnist(
            images_path='train-images-idx3-ubyte', 
            labels_path='train-labels-idx1-ubyte')

else:
    X_train, y_train = loadlocal_mnist(
            images_path='train-images.idx3-ubyte', 
            labels_path='train-labels.idx1-ubyte'
    )
    
if not platform.system() == 'Windows':
    X_test, y_test = loadlocal_mnist(
            images_path='train-images-idx3-ubyte', 
            labels_path='train-labels-idx1-ubyte')

else:
    X_test, y_test = loadlocal_mnist(
            images_path='t10k-images.idx3-ubyte', 
            labels_path='t10k-labels.idx1-ubyte'
    )
    
print("train shape: ",X_train.shape)
print("test shape: ",X_test.shape)

train shape:  (60000, 784)
test shape:  (10000, 784)


In [None]:
X_train=np.transpose(X_train)/256
X_test=np.transpose(X_test)/256

y_train=y_train.reshape(60000,1)
y_train=np.transpose(y_train)

y_test=y_test.reshape(10000,1)
y_test=np.transpose(y_test)

In [None]:
enc = OneHotEncoder()
y_train_transformed=enc.fit_transform(np.transpose(y_train)).toarray()
y_train_transformed=np.transpose(y_train_transformed)

y_test_transformed=enc.fit_transform(np.transpose(y_test)).toarray()
y_test_transformed=np.transpose(y_test_transformed)

In [None]:
# inializing parameters for L layer neural network
def initialize_parameters(layers_neurons_list):
    
    total_layers=len(layers_neurons_list)
    
    parameters={}
    
    for i in range(1,total_layers):
        parameters['w'+str(i)] = np.random.randn(layers_neurons_list[i],layers_neurons_list[i-1])/np.sqrt(layers_neurons_list[i-1])
        parameters['b'+str(i)] = np.random.randn(layers_neurons_list[i],1)/np.sqrt(layers_neurons_list[i-1])
        
    return parameters

In [None]:
# feed forward  { hidden layer activation sigmoid and output layer activation softmax } 

def feed_forward(input_data,parameters):
    
    cache={}
    cache['a'+str(0)]=input_data
    temp=input_data
    for i in range(1,int(len(parameters)/2)):
        temp = np.dot(parameters['w'+str(i)],temp) + parameters['b'+str(i)]
        cache['z'+str(i)]=temp
        temp = 1/(1+np.exp(-temp))
        cache['a'+str(i)]=temp
        
    temp =  np.dot(parameters['w'+str(int(len(parameters)/2))],temp) + parameters['b'+str(int(len(parameters)/2))] 
    cache['z'+str(int(len(parameters)/2))] = temp
    output = np.exp(temp)/np.sum(np.exp(temp),axis=0) 
    cache['a'+str(int(len(parameters)/2))] = output
    #cost=np.sum(-np.log(output)*labels)/labels.shape[1]
    
    return cache,output

In [None]:
def cost_cross_entropy(output,labels):
    a=-np.log(output)*labels
    return np.sum(a)/labels.shape[1]
    

In [None]:
def accuracy(x,y,para):
    tuned_prob=feed_forward(x,para)[1]
    max_arguments=np.argmax(tuned_prob,axis=0).reshape(1,tuned_prob.shape[1])
    classified=np.zeros([tuned_prob.shape[0],tuned_prob.shape[1]])
    for i in range(0,tuned_prob.shape[1]):
        classified[max_arguments[0,i],i]=1
    return (np.sum(classified*y)/y.shape[1])*100

In [None]:
def back_prop(cache,y,output,parameters):
    
    grad={}
    total_layers = int(len(cache)/2)
    
    m = y.shape[1]
    grad['dz'+str(total_layers)] = output-y
    grad['dw'+str(total_layers)] = (1/m)*np.dot(grad['dz'+str(total_layers)],np.transpose(cache['a'+str(total_layers-1)]))
    grad['db'+str(total_layers)] = (1/m)*np.sum(grad['dz'+str(total_layers)],axis=1).reshape(y.shape[0],1)
    
    for i in range(total_layers-1,0,-1):
        grad['da'+str(i)] = np.dot(np.transpose(parameters['w'+str(i+1)]),grad['dz'+str(i+1)])
        grad['dz'+str(i)] = grad['da'+str(i)] * cache['a'+str(i)] *(1-cache['a'+str(i)])
        grad['dw'+str(i)] = (1/m)*np.dot(grad['dz'+str(i)],np.transpose(cache['a'+str(i-1)]))
        grad['db'+str(i)] = (1/m)*np.sum(grad['dz'+str(i)],axis=1).reshape(grad['dz'+str(i)].shape[0],1)
    
    
    #grad['dz'+str(total_layers)]= (1/m)*output-y
    #grad['dw'+str(total_layers)]=(1/m)*np.dot(grad['dz'+str(total_layers)],np.transpose(cache['a'+str(total_layers-1)]))
    #grad['db'+str(total_layers)]=(1/m)*np.sum(grad['dz'+str(total_layers)],axis=1).reshape(grad['dz'+str(total_layers)].shape[0],1)
    
    
    
   # for i in range(total_layers-1,0,-1):
    #    grad['da'+str(i)] = (1/m)* np.dot( np.transpose(parameters['w'+str(i+1)]) , grad['dz'+str(i+1)]  )
     #   grad['dz'+str(i)]= (1/m)*grad['da'+str(i)] * cache['z'+str(i)] *(1-cache['z'+str(i)])
      #  grad['dw'+str(i)]=(1/m)*np.dot(grad['dz'+str(i)], np.transpose(cache['a'+str(i-1)])   )
       # grad['db'+str(i)]=(1/m)*np.sum(grad['dz'+str(i)],axis=1).reshape(grad['dz'+str(i)].shape[0],1)
        
        
    return grad
    

In [None]:
def update_parameters(parameters,grad,learning_rate=0.01):
    layers=int(len(parameters)/2)
    for i in range(layers):
        parameters['w'+str(i+1)] = parameters['w'+str(i+1)] - (learning_rate*grad['dw'+str(i+1)])
        parameters['b'+str(i+1)] = parameters['b'+str(i+1)] - (learning_rate*grad['db'+str(i+1)])
    




In [None]:
parameters=initialize_parameters([784,128,10])


In [None]:
for epochs in range(1,450):
    accu_train=accuracy(X_train,y_train_transformed,parameters)
    accu_test=accuracy(X_test,y_test_transformed,parameters)
    grad=back_prop(feed_forward(X_train,parameters)[0],y_train_transformed,feed_forward(X_train,parameters)[1],parameters)
    cost=cost_cross_entropy(feed_forward(X_train,parameters)[1],y_train_transformed)
    
    update_parameters(parameters,grad,learning_rate=0.8)
    print("epoch: "+str(epochs)+"-----"+"cost: "+str(cost)+"-----"+"accu_train: "+str(accu_train)[:4]+"-----accu_test: "+str(accu_test)[:4]) 

epoch: 1-----cost: 2.5830331683472085-----accu_train: 9.93-----accu_test: 10.3
epoch: 2-----cost: 2.7971122673802262-----accu_train: 15.6-----accu_test: 16.1
epoch: 3-----cost: 2.4647074982145156-----accu_train: 12.8-----accu_test: 13.0
epoch: 4-----cost: 2.4569399905710427-----accu_train: 19.0-----accu_test: 19.4
epoch: 5-----cost: 2.237219417339316-----accu_train: 26.5-----accu_test: 26.0
epoch: 6-----cost: 2.1716324246643497-----accu_train: 24.2-----accu_test: 24.2
epoch: 7-----cost: 2.0692252088740997-----accu_train: 28.6-----accu_test: 28.4
epoch: 8-----cost: 2.00014635553658-----accu_train: 27.8-----accu_test: 28.0
epoch: 9-----cost: 1.942968408187343-----accu_train: 36.9-----accu_test: 37.8
epoch: 10-----cost: 1.861492864433213-----accu_train: 34.8-----accu_test: 34.9
epoch: 11-----cost: 1.8147006299175499-----accu_train: 46.3-----accu_test: 47.0
epoch: 12-----cost: 1.7293972122998107-----accu_train: 41.7-----accu_test: 42.5
epoch: 13-----cost: 1.6895735515794676-----accu_train:

epoch: 104-----cost: 0.421386997608091-----accu_train: 88.8-----accu_test: 89.4
epoch: 105-----cost: 0.41972960443405116-----accu_train: 88.8-----accu_test: 89.4
epoch: 106-----cost: 0.41810529790563106-----accu_train: 88.8-----accu_test: 89.5
epoch: 107-----cost: 0.41651291708612276-----accu_train: 88.9-----accu_test: 89.5
epoch: 108-----cost: 0.4149512555122493-----accu_train: 88.9-----accu_test: 89.5
epoch: 109-----cost: 0.41341928477683715-----accu_train: 88.9-----accu_test: 89.5
epoch: 110-----cost: 0.41191601240970693-----accu_train: 89.0-----accu_test: 89.5
epoch: 111-----cost: 0.41044053282251286-----accu_train: 89.0-----accu_test: 89.6
epoch: 112-----cost: 0.4089919879367097-----accu_train: 89.0-----accu_test: 89.6
epoch: 113-----cost: 0.40756957055243287-----accu_train: 89.0-----accu_test: 89.6
epoch: 114-----cost: 0.40617251511103536-----accu_train: 89.1-----accu_test: 89.6
epoch: 115-----cost: 0.40480009143808504-----accu_train: 89.1-----accu_test: 89.7
epoch: 116-----cost:

epoch: 205-----cost: 0.3348605906277631-----accu_train: 90.5-----accu_test: 90.8
epoch: 206-----cost: 0.33440130100373067-----accu_train: 90.5-----accu_test: 90.8
epoch: 207-----cost: 0.33394575991288783-----accu_train: 90.5-----accu_test: 90.8
epoch: 208-----cost: 0.3334939111130853-----accu_train: 90.5-----accu_test: 90.8
epoch: 209-----cost: 0.33304569948195917-----accu_train: 90.5-----accu_test: 90.8
epoch: 210-----cost: 0.33260107098885755-----accu_train: 90.6-----accu_test: 90.8
epoch: 211-----cost: 0.3321599726676066-----accu_train: 90.6-----accu_test: 90.8
epoch: 212-----cost: 0.3317223525900904-----accu_train: 90.6-----accu_test: 90.8
epoch: 213-----cost: 0.3312881598406132-----accu_train: 90.6-----accu_test: 90.9
epoch: 214-----cost: 0.33085734449101695-----accu_train: 90.6-----accu_test: 90.9
epoch: 215-----cost: 0.33042985757653204-----accu_train: 90.6-----accu_test: 90.9
epoch: 216-----cost: 0.3300056510723337-----accu_train: 90.6-----accu_test: 90.9
epoch: 217-----cost: 0

epoch: 306-----cost: 0.3011017312740523-----accu_train: 91.3-----accu_test: 91.8
epoch: 307-----cost: 0.30085165221189003-----accu_train: 91.3-----accu_test: 91.8
epoch: 308-----cost: 0.3006026471041999-----accu_train: 91.3-----accu_test: 91.8
epoch: 309-----cost: 0.30035470485494226-----accu_train: 91.4-----accu_test: 91.8
epoch: 310-----cost: 0.30010781451165897-----accu_train: 91.4-----accu_test: 91.8
epoch: 311-----cost: 0.29986196526315667-----accu_train: 91.4-----accu_test: 91.8
epoch: 312-----cost: 0.29961714643723525-----accu_train: 91.4-----accu_test: 91.8
epoch: 313-----cost: 0.29937334749846256-----accu_train: 91.4-----accu_test: 91.8
epoch: 314-----cost: 0.29913055804599264-----accu_train: 91.4-----accu_test: 91.8
epoch: 315-----cost: 0.29888876781142576-----accu_train: 91.4-----accu_test: 91.8
epoch: 316-----cost: 0.2986479666567137-----accu_train: 91.4-----accu_test: 91.8
epoch: 317-----cost: 0.29840814457210296-----accu_train: 91.4-----accu_test: 91.8
epoch: 318-----cost

epoch: 407-----cost: 0.279865214123617-----accu_train: 91.9-----accu_test: 92.2
epoch: 408-----cost: 0.2796846956485229-----accu_train: 91.9-----accu_test: 92.2
epoch: 409-----cost: 0.27950459843195186-----accu_train: 91.9-----accu_test: 92.2
epoch: 410-----cost: 0.2793249190035626-----accu_train: 91.9-----accu_test: 92.2
epoch: 411-----cost: 0.2791456539293488-----accu_train: 91.9-----accu_test: 92.2
epoch: 412-----cost: 0.27896679981122524-----accu_train: 91.9-----accu_test: 92.2
epoch: 413-----cost: 0.27878835328662227-----accu_train: 91.9-----accu_test: 92.2
epoch: 414-----cost: 0.2786103110280832-----accu_train: 91.9-----accu_test: 92.2
epoch: 415-----cost: 0.2784326697428707-----accu_train: 92.0-----accu_test: 92.2
epoch: 416-----cost: 0.2782554261725752-----accu_train: 92.0-----accu_test: 92.2
epoch: 417-----cost: 0.2780785770927316-----accu_train: 92.0-----accu_test: 92.2
epoch: 418-----cost: 0.27790211931243874-----accu_train: 92.0-----accu_test: 92.2
epoch: 419-----cost: 0.27

# Highest accuracy on test

## epoch: 449-----cost: 0.2726100460281659-----accu_train: 92.1-----accu_test: 92.4


