In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# train_data = pd.read_csv('data/mnist_train.csv')
# x_train = train_data.iloc[:, 1:].values.astype('uint8').reshape(-1, 28, 28)  # reshape to (num_samples, 28, 28)
# y_train = train_data.iloc[:, 0].values  # labels

# # Load the test data
# test_data = pd.read_csv('data/mnist_test.csv')
# x_test = test_data.iloc[:, 1:].values.astype('uint8').reshape(-1, 28, 28)  # reshape to (num_samples, 28, 28)
# y_test = test_data.iloc[:, 0].values  # labels


In [36]:
data = np.loadtxt('data/mnist_train.csv',skiprows=1,delimiter=',')
m,n = data.shape
np.random.shuffle(data)

data_test = data[0:10000].T
x_test = data_test[1:n]
y_test = data_test[0]
# x_test = x_test/255

data_train = data[10000:m].T
y_train = data_train[0]
x_train = data_train[1:n]
# x_train = x_train/255
_,m_train = x_train.shape



# train_X = train_array[:,1:].T
# train_Y = train_array[:,0].T

# test_array = np.loadtxt('data/mnist_test.csv',skiprows=1,delimiter=',')
# np.random.shuffle(test_array)
# test_X = test_array[:,1:].T
# test_Y = test_array[:,0].T

In [37]:
# One Hot encoding the output

def one_hot_encode(Y):
    Y = Y.astype(int)
    encoded_y = np.zeros((Y.size,int(Y.max())+1))
    encoded_y[np.arange(Y.size),Y] = 1
    encoded_y = encoded_y.T
    return encoded_y

In [38]:
# Parameter initialization

def init_params():
    W1 = np.random.rand(10,784)
    b1 = np.random.rand(10,1)
    W2 = np.random.rand(10,10)
    b2 = np.random.rand(10,1)
    return W1,b1,W2,b2

In [39]:
# Activation functions

def relu(z):
    return np.maximum(0,z)

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=0))  # Subtracting max for numerical stability
    return exp_z / np.sum(exp_z, axis=0)

def deriv_relu(z):
    return z>0

In [40]:
# Forward Propogation phase

def forward_propogation(W1,b1,W2,b2,X):
    Z1 = W1.dot(X) + b1
    A1 = relu(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)

    return Z1,A1,Z2,A2

In [41]:
# Backpropogation phase

def backpropogation(Z1,A1,Z2,A2,W1,W2,Y,X):
    y_encoded = one_hot_encode(Y)
    m = Y.size
    dZ2 = A2 - y_encoded         #gradient of softmax
    dW2 = 1/m * dZ2.dot(A1.T)
    db2 = 1/m * np.sum(dZ2,axis = 1).reshape(-1,1)
    dZ1 = W2.T.dot(dZ2) * deriv_relu(Z1)
    dW1 = 1/m * dZ1.dot(X.T)
    db1 = 1/m * np.sum(dZ1,axis=1).reshape(-1,1)
    return dW1,db1,dW2,db2

In [42]:
# Update parameters

def param_update(W1,dW1,b1,db1,W2,dW2,b2,db2,alpha):
    W1 = W1 - alpha* dW1
    b1 = b1 - alpha* db1
    W2 = W2 - alpha* dW2
    b2 = b2 - alpha* db2
    return W1,b1,W2,b2

In [43]:
# Accuracy check at every epoch:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

In [44]:
# Gradient Descent

def grad_desc(X,Y,iterations,alpha):
    W1,b1,W2,b2 = init_params()
    for i in range(iterations):
        Z1,A1,Z2,A2 = forward_propogation(W1,b1,W2,b2,X)
        dW1,db1,dW2,db2 = backpropogation(Z1,A1,Z2,A2,W1,W2,Y,X)
        W1,b1,W2,b2 = param_update(W1,dW1,b1,db1,W2,dW2,b2,db2,alpha)
        if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print(get_accuracy(predictions, Y))
    return W1,b1,W2,b2

In [45]:
# Training:

W1,b1,W2,b2 = grad_desc(x_train,y_train,200,0.01)

Iteration:  0
[3 3 3 ... 3 3 3] [9. 7. 2. ... 5. 0. 4.]
0.10156
Iteration:  10
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  20
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  30
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  40
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  50
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  60
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  70
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  80
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  90
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  100
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  110
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218
Iteration:  120
[1 1 1 ... 1 1 1] [9. 7. 2. ... 5. 0. 4.]
0.11218


KeyboardInterrupt: 