In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report

In [2]:
columns=['Pixel'+str(x) for x in range(784)]
train_images=pd.read_csv('./train_image.csv',header=None)
train_images.columns=columns
train_labels=pd.read_csv('./train_label.csv',header=None)
train_labels.columns=['Label']
test_images=pd.read_csv('./test_image.csv',header=None)
test_images.columns=columns
test_labels=pd.read_csv('./test_label.csv',header=None)
test_labels.columns=['Label']
train_data=train_labels.join(train_images)

In [3]:
train_data=np.array(train_data)
np.random.shuffle(train_data)
n_rows,n_columns=train_data.shape
print(train_data.shape)

(60000, 785)


In [4]:
cross_validation_datasplit=train_data[0:1000].T
y_cross=cross_validation_datasplit[0]
x_cross=cross_validation_datasplit[1:n_columns]
x_cross=x_cross/255.

In [5]:
train_datasplit=train_data[1000:n_rows].T
y_train=train_datasplit[0]
x_train=train_datasplit[1:n_columns]
x_train=x_train/255.
x_train_rows,x_train_columns=x_train.shape

In [6]:
def initialize_neural_network():
    W1=np.random.rand(32,784)-0.5
    b1=np.random.rand(32,1)-0.5
    W2=np.random.rand(16,32)-0.5
    b2=np.random.rand(16,1)-0.5
    W3=np.random.rand(10,16)-0.5
    b3=np.random.rand(10,1)-0.5
    return W1,b1,W2,b2,W3,b3

In [7]:
def sigmoid_activation_function(X):
    return 1/(1+np.exp(-X))

In [8]:
def softmax_activation_function(X):
    return np.exp(X)/sum(np.exp(X))

In [9]:
def forward_propagation(W1,b1,W2,b2,W3,b3,X):
    Z1=np.dot(W1,X)+b1
    A1=sigmoid_activation_function(Z1)
    Z2=np.dot(W2,A1)+b2
    A2=sigmoid_activation_function(Z2)
    Z3=np.dot(W3,A2)+b3
    A3=softmax_activation_function(Z3)
    return Z1,A1,Z2,A2,Z3,A3

In [10]:
def one_hot_encoding(y_pred):
    one_hot_encoded_Y = np.zeros((y_pred.size, y_pred.max() + 1))
    one_hot_encoded_Y[np.arange(y_pred.size), y_pred] = 1
    one_hot_encoded_Y = one_hot_encoded_Y.T
    return one_hot_encoded_Y

In [11]:
def back_propogation(Z1,A1,W1,Z2,A2,W2,Z3,A3,W3,X,Y):
    one_hot_encoded_Y = one_hot_encoding(Y)
    dZ3=A3-one_hot_encoded_Y
    dW3=(1./x_train_rows)*np.dot(dZ3,A2.T)
    db3=(1./x_train_rows)*np.sum(dZ3)
    dA2=np.dot(W3.T,dZ3)
    dZ2=dA2*sigmoid_activation_function(Z2)*(1-sigmoid_activation_function(Z2))
    dW2=(1./x_train_rows)*np.dot(dZ2,A1.T)
    db2=(1./x_train_rows)*np.sum(dZ2)
    dA1=np.dot(W2.T,dZ2)
    dZ1=dA1*sigmoid_activation_function(Z1)*(1-sigmoid_activation_function(Z1))
    dW1=(1./x_train_rows)*np.dot(dZ1,X.T)
    db1=(1./x_train_rows)*np.sum(dZ1)
    return dW1,db1,dW2,db2,dW3,db3

In [12]:
def update_parameters(W1,b1,dW1,db1,W2,b2,dW2,db2,W3,b3,dW3,db3,alpha):
    W1=W1-alpha*dW1
    b1=b1-alpha*db1
    W2=W2-alpha*dW2
    b2=b2-alpha*db2
    W3=W3-alpha*dW3
    b3=b3-alpha*db3
    return W1,b1,W2,b2,W3,b3

In [13]:
def gradient_decent(X,Y,alpha,iterations):
    W1,b1,W2,b2,W3,b3=initialize_neural_network()
    for i in range(iterations):
        Z1,A1,Z2,A2,Z3,A3=forward_propagation(W1,b1,W2,b2,W3,b3,X)
        dW1,db1,dW2,db2,dW3,db3=back_propogation(Z1,A1,W1,Z2,A2,W2,Z3,A3,W3,X,Y)
        W1,b1,W2,b2,W3,b3=update_parameters(W1,b1,dW1,db1,W2,b2,dW2,db2,W3,b3,dW3,db3,alpha)
        if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A3)
            print(get_accuracy(predictions, Y))
    return W1,b1,W2,b2,W3,b3

In [14]:
def get_predictions(A3):
    return np.argmax(A3,0)

In [15]:
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

In [16]:
W1,b1,W2,b2,W3,b3=gradient_decent(x_train,y_train,0.10,1000)

Iteration:  0
[1 1 1 ... 1 1 1] [6 4 4 ... 6 8 8]
0.11230508474576271
Iteration:  10
[7 7 7 ... 7 2 7] [6 4 4 ... 6 8 8]
0.22852542372881357
Iteration:  20
[4 4 4 ... 4 4 4] [6 4 4 ... 6 8 8]
0.45528813559322034
Iteration:  30
[6 4 4 ... 6 8 4] [6 4 4 ... 6 8 8]
0.6585593220338983
Iteration:  40
[2 4 4 ... 6 2 4] [6 4 4 ... 6 8 8]
0.7509152542372881
Iteration:  50
[6 4 4 ... 6 8 4] [6 4 4 ... 6 8 8]
0.702322033898305
Iteration:  60
[6 4 4 ... 6 8 8] [6 4 4 ... 6 8 8]
0.8270847457627118
Iteration:  70
[6 4 4 ... 6 3 8] [6 4 4 ... 6 8 8]
0.8392542372881356
Iteration:  80
[6 4 4 ... 6 2 8] [6 4 4 ... 6 8 8]
0.8759322033898305
Iteration:  90
[6 4 4 ... 6 2 8] [6 4 4 ... 6 8 8]
0.8936610169491526
Iteration:  100
[2 4 4 ... 6 2 9] [6 4 4 ... 6 8 8]
0.815677966101695
Iteration:  110
[6 4 4 ... 6 2 8] [6 4 4 ... 6 8 8]
0.9179830508474577
Iteration:  120
[6 4 4 ... 6 2 8] [6 4 4 ... 6 8 8]
0.9228983050847458
Iteration:  130
[6 4 4 ... 6 2 8] [6 4 4 ... 6 8 8]
0.9271864406779661
Iteration:  140


In [17]:
Z1,A1,Z2,A2,Z3,A3=forward_propagation(W1,b1,W2,b2,W3,b3,x_cross)

In [18]:
predictions = get_predictions(A3)

In [19]:
get_accuracy(predictions, y_cross)

[1 2 8 3 3 5 4 0 8 2 9 6 3 2 2 0 3 6 1 8 2 9 6 0 5 1 0 7 4 8 2 2 6 3 1 2 3
 5 1 9 5 6 2 5 9 0 5 1 8 5 2 8 9 7 1 1 0 6 1 7 3 6 6 2 8 6 7 1 9 4 5 7 5 3
 7 5 0 4 2 7 4 8 1 9 9 6 8 6 8 8 8 6 8 7 0 6 8 0 8 8 6 4 0 1 4 7 9 8 9 7 6
 9 4 2 8 5 5 5 6 0 8 3 5 7 8 6 1 7 2 6 2 8 6 5 7 3 6 9 1 8 7 4 3 5 3 7 9 0
 3 0 6 1 9 2 3 3 5 0 1 4 4 4 6 1 5 7 5 7 8 2 4 4 8 5 2 4 8 9 2 5 4 0 5 5 9
 0 0 0 8 8 3 4 2 4 0 1 5 0 6 6 9 8 6 0 0 4 4 6 2 3 5 2 3 9 4 8 7 9 2 7 6 2
 4 3 4 7 4 6 9 9 5 0 9 1 9 2 3 5 1 9 7 1 7 7 8 4 9 2 3 4 6 1 6 5 1 2 6 0 6
 2 1 6 0 9 7 8 0 8 9 3 0 0 6 6 6 1 2 0 4 3 6 3 3 2 9 5 2 9 2 1 6 6 1 0 7 8
 2 0 6 2 1 8 7 8 4 7 3 3 4 2 1 2 8 8 5 2 2 3 3 2 0 3 5 6 0 9 7 5 2 2 6 3 8
 1 0 6 8 8 3 4 5 5 6 8 7 9 4 6 3 6 7 0 0 6 6 0 5 7 8 7 9 0 5 6 6 3 1 6 2 7
 9 1 1 6 7 1 4 8 3 4 8 2 1 3 3 5 2 3 9 6 7 8 6 1 7 3 4 2 0 4 0 7 9 6 2 4 8
 3 3 4 6 4 7 3 0 9 1 1 1 2 1 4 0 2 9 0 6 2 0 9 6 2 1 4 4 9 6 4 6 2 5 1 7 5
 2 0 9 9 5 4 3 0 1 5 3 3 8 8 8 4 1 6 3 5 5 4 2 1 7 8 7 1 0 9 3 2 6 9 5 5 9
 0 0 0 1 9 2 9 3 0 1 9 7 

0.968

In [20]:
import csv

In [30]:
print(predictions.shape)

(1000,)
(1000,)
