### Importing mnist dataset 

In [355]:
from tensorflow.keras.datasets import mnist

In [437]:
mnist_data=mnist.load_data()

In [438]:
(x_train,y_train),(x_test,y_test)=mnist_data

In [439]:
x_train.shape,y_train.shape

((60000, 28, 28), (60000,))

In [440]:
from sklearn.model_selection import train_test_split
import numpy as np

In [441]:
# x_train,x_test,y_train,y_test=train_test_split(x_train,y_train,train_size=5000,test_size=1000)

In [442]:
x_train=np.reshape(x_train,(60000,784))

In [443]:
x_train=x_train/255

In [444]:
x_train.shape

(60000, 784)

### convert output to one-hot encoding

In [445]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

In [446]:
def hot_one_encoding(y):
    encoder = LabelEncoder()
    encoder.fit(y)
    encoded_Y = encoder.transform(y)
        # convert integers to dummy variables (i.e. one hot encoded)
    hot_encoded = np_utils.to_categorical(encoded_Y)
#     return hot_encoded.T
    return hot_encoded.T

In [447]:
hot_one_encoding(y_train).shape

(10, 60000)

In [461]:
np.sum(hot_one_encoding(y_train))

60000.0

In [448]:
### forward propogation with one hidden layer
### there were 10 units in layer1
### there were 10 units in out layer

### For 1st Hidden Layer

In [449]:
def initial_parameter(x_train):
#     x_train=np.reshape(x_train,(5000,784))
    m,input_param=x_train.shape
    n_units_hidden=10
    
    w1=np.random.rand(input_param,n_units_hidden)-0.5
    ##w1--784 X 10
    b1=np.random.rand(n_units_hidden,1)-0.5
    ##w1--10 X 1
    w2=np.random.rand(n_units_hidden,n_units_hidden)-0.5
    ##w1--10 X 10
    b2=np.random.rand(n_units_hidden,1)-0.5
    ##w1--10 X 1
    return w1,b1,w2,b2

In [450]:
def forward_propogation(x_train,w1,b1,w2,b2):
#     print("shape of x_train",x_train.shape,"shape of w1",w1.shape,"shape of b1",b1.shape,"shape of w2",w2.shape,"shape of b2",b2.shape)
    A0=x_train.T
    #A0--784 X m
    z1=np.dot(w1.T,A0)+b1 
    ##w1.T--10 X 784 A0--784 X m
    ##z1--10 X m
    A1=relu(z1)
    ##A1--10 X m
    z2=np.dot(w2,A1)+b2
    ##w2--10 X 10 A1--10 X m
    ##z2--10 X m
    A2=softmax(z2)
    ##A2--10 X m
    return z1,A1,z2,A2

In [451]:
def softmax(z):
#     f=np.exp(z-np.max(z,axis=0))
    f=np.exp(z)
    return f/np.sum(f,axis=0)

In [452]:
def relu(z1):
    return np.maximum(z1,0)

### Get Pridiction

In [453]:
def get_prediction(A2):
    return np.argmax(A2,0)

### Accuracy Calculation

In [454]:
def accuracy(prediction,y):
    print("correct outputs ", np.sum(prediction==y),"out of ", len(y))
#     print(np.sum(prediction==y))
    return np.sum(prediction==y)/len(y)

### Backward Propogation

In [455]:
def backward_propogation(w1,w2,A1,A2,z1,z2,x,y):
    m=len(y)
    dz2=A2-hot_one_encoding(y)
    ##A2--10 X m dz2--10 X m
    ##dz2--10 X m
    dw2=1/m*(np.dot(dz2,A1.T))
    ##dz2--10 X m A1.T-- m X 10
    ##dw2--10 X 10
#     db2=1/m*(np.sum(dz2,1,keepdims=True))
    db2=1/m*np.sum(dz2)
    ##db2--10 X 1
    dz1=np.dot(w2.T,dz2)*derivative_relu(z1)
    ##w2--10 X 10 dz2--10 X m z1--10 X m
    ##dz1--10 X m
    dw1=1/m*(np.dot(dz1,x))
    ##dz1--10 X m  x--m X 784
    ##dw1--10 X 784
    db1=1/m*np.sum(dz1)
    ##dz1--10 X m
    ##db1--10 X 1
    return dw1,db1,dw2,db2
    

In [456]:
def update_parameter(w1,dw1,b1,db1,w2,dw2,b2,db2,l_r):
    w1=w1-l_r*dw1.T
    b1=b1-l_r*db1
    w2=w2-l_r*dw2
    b2=b2-l_r*db2
    return w1,b1,w2,b2
    

In [457]:
def derivative_relu(z):
    return z>0

In [458]:
def gradient_descent(x,y,no_iteration,l_r):
    w1,b1,w2,b2=initial_parameter(x)
    for i in range(no_iteration):
        z1,A1,z2,A2=forward_propogation(x,w1,b1,w2,b2)
        dw1,db1,dw2,db2=backward_propogation(w1,w2,A1,A2,z1,z2,x,y)
        w1,b1,w2,b2=update_parameter(w1,dw1,b1,db1,w2,dw2,b2,db2,l_r)
        if (i%50==0):
            print("itration:", i)
            print("accuracy:",accuracy(get_prediction(A2),y))
    return w1,b1,w2,b2
        
    

In [460]:
gradient_descent(x_train,y_train,500,0.1)

itration: 0
correct outputs  6763 out of  60000
accuracy: 0.11271666666666667
itration: 50
correct outputs  33776 out of  60000
accuracy: 0.5629333333333333
itration: 100
correct outputs  42200 out of  60000
accuracy: 0.7033333333333334
itration: 150
correct outputs  45366 out of  60000
accuracy: 0.7561
itration: 200
correct outputs  47105 out of  60000
accuracy: 0.7850833333333334
itration: 250
correct outputs  48325 out of  60000
accuracy: 0.8054166666666667
itration: 300
correct outputs  49160 out of  60000
accuracy: 0.8193333333333334
itration: 350
correct outputs  49770 out of  60000
accuracy: 0.8295
itration: 400
correct outputs  50225 out of  60000
accuracy: 0.8370833333333333
itration: 450
correct outputs  50637 out of  60000
accuracy: 0.84395


(array([[-0.29506211, -0.21792867, -0.24853345, ...,  0.17228014,
         -0.13386121, -0.22401101],
        [-0.42842424,  0.28404602, -0.11778225, ...,  0.12717305,
         -0.19821192,  0.21218229],
        [-0.30647306, -0.23761044,  0.04079665, ..., -0.03780257,
         -0.43847643, -0.09675978],
        ...,
        [-0.21808602,  0.34437177, -0.12970453, ...,  0.4831638 ,
          0.04722593,  0.42281829],
        [ 0.33025018, -0.4424234 ,  0.36884073, ...,  0.4508786 ,
         -0.35996143, -0.42066507],
        [-0.34260181, -0.44659152, -0.45711022, ...,  0.40322488,
          0.06045593,  0.15804158]]),
 array([[0.71831544],
        [0.33017504],
        [1.13252562],
        [0.95224621],
        [0.81861369],
        [0.4408373 ],
        [0.98893046],
        [1.06573395],
        [0.96343941],
        [0.47795732]]),
 array([[ 0.0811205 ,  0.28042149,  0.73067641, -0.26083963,  0.09288527,
          0.79068118, -0.31959682, -0.72698541, -0.54579437, -0.23450396],
  