In [1]:
#Making all necessary imports
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
#Fetching the Dataset 
X,y = fetch_openml("mnist_784", version = 1, return_X_y = True)

X = np.array(X)
y = np.int_(y)

In [3]:
#defining one hot encoding on y to simplify error checking 
def one_hot_encoding(y):
    one_hot_y = np.zeros((np.amax(y) +1, y.size))
    one_hot_y[y, np.arange(y.size)] = 1
    return one_hot_y.T

In [4]:
#performing one hot encoding
y = one_hot_encoding(y)

In [5]:
#defining the activiation sigmoid function and its derivative
def sigmoid(x):
    return 1/(1+np.exp(-x))

def deriv_sigmoid(x):
    return sigmoid(x)*(1 - sigmoid(x))

In [6]:
#defining the functions necessary fer training the network

#initializong weights and biases
def init_par():
    w1 = np.random.randn(25, 784)
    b1 = np.random.randn(25, 1)
    w2 = np.random.randn(10, 25)
    b2 = np.random.randn(10, 1)
    
    return w1, b1, w2, b2

#defining the forward prop a0 -> a1 -> a2
def fw_prop(w1, b1, w2, b2, X):
    a0 = X.T
    z1 = w1.dot(a0) + b1
    a1 = sigmoid(z1)
    z2 = w2.dot(a1) + b2
    a2 = sigmoid(z2)
    
    return z1, a1, z2, a2


#defining backward prop to obtain parameters for updation
def bk_prop(z1, a1, z2, a2, w2, X, y):
    m = X.shape[0]
    p = 1/m
    
    da2 = (a2 - y.T)*deriv_sigmoid(z2)
    dw2 = p*da2.dot(a1.T)
    db2 = np.array([p*np.sum(da2)]).T
    
    da1 = w2.T.dot(da2)*deriv_sigmoid(z1) 
    dw1 = p*da1.dot(X)
    db1 = np.array([p*np.sum(da1)]).T
    
    return dw1, db1, dw2, db2


#function to update parameters
def upd_par(w1, b1, w2, b2, dw1, db1, dw2, db2, lr):
    w1 -= lr * dw1
    b1 -= lr * db1
    w2 -= lr * dw2
    b2 -= lr * db2
    
    return w1, b1, w2, b2

In [7]:
#Geadient descent function to train the neural net
def gradient_descent(x, Y, epochs, alpha):
    W1, b1 , W2, b2 = init_par()
    
    for i in range(epochs):
        Z1, A1, Z2, A2 = fw_prop(W1, b1 , W2, b2, x)
        dW1, db1, dW2, db2 = bk_prop(Z1, A1, Z2, A2, W2, x, Y)
        W1, b1 , W2, b2 = upd_par(W1, b1 , W2, b2, dW1, db1, dW2, db2, alpha)
        
    return W1, b1, W2, b2, A2

In [8]:
#function to test the accuracy fot the NN for the test set
def testNN(X, W1, b1 , W2, b2):
    a0 = X.T
    a1 = sigmoid(W1.dot(a0) + b1)
    a2 = sigmoid(W2.dot(a1) + b2)
    
    return a2    

In [9]:
#function to determine the score of the neural net
def scoring(a2 ,y):
    print(y)
    print(a2)
    error = np.mean(np.abs(y - a2))
    score = 1 - error
    return score 

In [10]:
#Dividing dataset into test and train
from sklearn.model_selection import train_test_split
print(X.shape)
print(y.shape)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = (1/7), random_state = 42)
print(len(X_train), len(X_test))

alpha = 0.1
epochs = 1000

(70000, 784)
(70000, 10)
60000 10000


In [11]:
#training the network to get necessary parameters
W1, b1, W2, b2, A2 = gradient_descent(X_train, y_train, epochs, alpha)

  return 1/(1+np.exp(-x))


In [12]:
#determining the train score
score = scoring(A2.T, y_train)
print('training score:', score)

[[0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 1. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[[1.01208309e-04 1.05408424e-01 2.52551610e-01 ... 4.93542947e-02
  1.22723186e-04 6.20149248e-02]
 [5.52023932e-03 4.09838923e-02 2.53198528e-02 ... 1.84364945e-01
  7.08530305e-05 9.64350899e-02]
 [5.42781235e-03 2.08597502e-01 5.02082415e-04 ... 1.34980499e-03
  5.14450869e-02 2.70113956e-02]
 ...
 [2.97571632e-02 1.48094182e-01 5.33737095e-04 ... 2.21412098e-02
  5.70365404e-04 3.92480991e-03]
 [1.98995321e-04 6.83783087e-03 7.38866832e-02 ... 1.31095861e-02
  4.34528204e-04 2.67014635e-02]
 [5.26937065e-04 3.43367494e-01 3.12904618e-03 ... 1.57305462e-03
  4.54893962e-03 8.78443858e-02]]
training score: 0.8783338390506237


In [13]:
#determining the test score
a2_test = testNN(X_test, W1, b1, W2, b2)
score = scoring(a2_test.T, y_test)
print('test score:', score)

[[0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[0.00140263 0.02351121 0.00077194 ... 0.00914311 0.00061805 0.10159876]
 [0.00790896 0.04308933 0.00048213 ... 0.02262927 0.00014331 0.03649703]
 [0.03326418 0.02432448 0.0047906  ... 0.00558903 0.00172284 0.0189213 ]
 ...
 [0.00061282 0.00926022 0.00033816 ... 0.00774748 0.0008373  0.04528028]
 [0.00425201 0.01119353 0.00211313 ... 0.00325804 0.00256842 0.03922223]
 [0.00187138 0.0230825  0.00034139 ... 0.00704677 0.00349285 0.09409049]]
test score: 0.877754164845185


  return 1/(1+np.exp(-x))
