In [90]:
import numpy as np
X_train = np.loadtxt('train_X.csv', delimiter = ',')
Y_train = np.loadtxt('train_label.csv', delimiter = ',')

X_test = np.loadtxt('test_X.csv', delimiter = ',')
Y_test = np.loadtxt('test_label.csv', delimiter = ',')
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(1000, 784)
(1000, 10)
(350, 784)
(350, 10)


In [91]:
def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
def dertanh(x):
    return 1-np.power(np.tanh(x),2)
def softmax(x):
    expX = np.exp(x)
    return expX/np.sum(expX, axis = 1,keepdims=True)

In [92]:
def initializeparameters(n0,n1,n2):
    W1 = np.random.randn(n1, n0)*0.01
    B1 = np.zeros((n1, 1))
    
    W2 = np.random.randn(n2, n1)*0.01
    B2 = np.zeros((n2, 1))
    parameters={
        'W1' :W1,
        'B1' :B1,
        'W2' :W2,
        'B2' :B2
    }
    return parameters
def forwardpropogation(X,parameters):
    W1=parameters['W1']
    W2=parameters['W2']
    B1=parameters['B1']
    B2=parameters['B2']
    Z1=(np.dot(W1,X.T)+B1).T
    A1=tanh(Z1)
    Z2=(np.dot(W2,A1.T)+B2).T
    A2=softmax(Z2)
    forward_cache={
        'Z1' :Z1,
        'A1' :A1,
        'Z2' :Z2,
        'A2' :A2

    }
    return forward_cache
def costfunction(Y,A2):
    m=Y.shape[0]
    cost = -(1/m)*np.sum(Y*np.log(A2))
    return cost
def backwardpropagation(X,Y,parameters,forward_cache):
    W1=parameters['W1']
    W2=parameters['W2']
    B1=parameters['B1']
    B2=parameters['B2']
    Z1=forward_cache['Z1']
    A1=forward_cache['A1']
    Z2=forward_cache['Z2']
    A2=forward_cache['A2']
    m=Y.shape[0]
    dZ2=A2-Y
    dW2=(1/m)*np.dot(dZ2.T,A1)
    dB2=(1/m)*np.sum(dZ2,axis=0,keepdims=True).T
    dA1=np.dot(dZ2,W2)
    dZ1=dA1*dertanh(Z1)
    dW1=(1/m)*np.dot(dZ1.T,X)
    dB1=(1/m)*np.sum(dZ1,axis=0,keepdims=True).T
    gradients={
        'dW1' :dW1,
        'dB1' :dB1,
        'dW2' :dW2,
        'dB2' :dB2
    }
    return gradients
def updateparameters(parameters,gradients,learningrate):
    W1=parameters['W1']
    W2=parameters['W2']
    B1=parameters['B1']
    B2=parameters['B2']
    dW1=gradients['dW1']
    dW2=gradients['dW2']
    dB1=gradients['dB1']
    dB2=gradients['dB2']
    W1-=learningrate*dW1
    B1-=learningrate*dB1
    W2-=learningrate*dW2
    B2-=learningrate*dB2
    parameters={
        'W1' :W1,
        'B1' :B1,
        'W2' :W2,
        'B2' :B2
    }
    return parameters
def model(X,Y,n1,learningrate,iterations):
    n0=X.shape[1]
    n2=Y.shape[1]
    costlist=[]
    parameters=initializeparameters(n0,n1,n2)
    for i in range(iterations):
        forward_cache=forwardpropogation(X,parameters)
        cost=costfunction(Y,forward_cache['A2'])
        gradients=backwardpropagation(X,Y,parameters,forward_cache)
        parameters=updateparameters(parameters,gradients,learningrate)
        if(i%(iterations/10)==0):
            costlist.append(cost)
            print('cost after ',i,'th iterations is ',cost)
    return parameters,costlist

In [93]:
parameters,costlist=model(X_train,Y_train,100,0.05,900)

cost after  0 th iterations is  2.3079089673988737
cost after  90 th iterations is  0.15019614345366408
cost after  180 th iterations is  0.06296696929333889
cost after  270 th iterations is  0.0453066521147352
cost after  360 th iterations is  0.03612514736846104
cost after  450 th iterations is  0.03017478001643724
cost after  540 th iterations is  0.025902022524667768
cost after  630 th iterations is  0.02236105633179549
cost after  720 th iterations is  0.019845346015825386
cost after  810 th iterations is  0.017758172566727917


In [94]:
def accuracy(X,Y,parameters):
    forward_cache=forwardpropogation(X,parameters)
    A2=forward_cache['A2']
    a=np.argmax(A2,1)
    b=np.argmax(Y,1)
    correct=0
    for i in range(0,a.shape[0]):
        if a[i]==b[i]:
            correct+=1
    return round((correct/a.shape[0])*100,2)
print('accuracy of the model on training set',accuracy(X_train,Y_train,parameters),'%')
print('accuracy of the model on testing set',accuracy(X_test,Y_test,parameters),'%')

accuracy of the model on training set 99.9 %
accuracy of the model on testing set 86.0 %
