In [None]:
import numpy as np 
import pandas as pd 
import os
from matplotlib import pyplot as plt

In [None]:
training_data = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_train.csv')
data = np.array(training_data)
data = data.T
Y_training = data[0, :1000] # (1000,)
X_training = data[1:,:1000] # (784,1000)
X_training = X_training / 255

Y_test = data[0, 1000:2000] # (1000,)
X_test = data[1:,1000:2000] # (784,1000)
X_test = X_test / 255

In [None]:
def init_params(m):
    W1 = np.random.randn(10, 784)
    b1 = np.random.randn(10, 1)
    
    W2 = np.random.randn(10, 10)
    b2 = np.random.randn(10, 1)
    
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(0, Z)

def softmax(Z):
    return np.exp(Z) / sum(np.exp(Z))

def forward_propagation(X, W1, b1, W2, b2):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    
    return A1, Z1, A2, Z2

def derivative_ReLU(Z):
    return Z > 0

def one_hot_encoding(Y):
    a = np.array(Y)
    b = np.zeros((a.size, a.max()+1))
    b[np.arange(a.size),a] = 1
    return b

def backward_propagation(X, Y, W1, A1, Z1, W2, A2):
    m = Y.size
    
    dZ2 = A2 - one_hot_encoding(Y).T
    db2 = 1/m * np.sum(dZ2)
    dW2 = 1/m * dZ2.dot(A1.T)
    
    dZ1 = W2.T.dot(dZ2) * derivative_ReLU(Z1)
    db1 = 1/m * np.sum(dZ1)
    dW1 = 1/m * dZ1.dot(X.T)
    
    return dW1, db1, dW2, db2

def update_params(W1, b1, dW1, db1, W2, b2, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    
    return W1, b1, W2, b2
    
def get_accuracy(A2, Y):
    return np.sum(np.argmax(A2, 0) == Y) * 100 / Y.size

def train(X, Y, iterations, alpha):
    W1, b1, W2, b2 = init_params(Y.size)
    for i in range(iterations):
        A1, Z1, A2, Z2        = forward_propagation(X, W1, b1, W2, b2)
        dW1, db1, dW2, db2    = backward_propagation(X, Y, W1, A1, Z1, W2, A2)
        W1, b1, W2, b2        = update_params(W1, b1, dW1, db1, W2, b2, dW2, db2, alpha)
        
        if i % 100 == 0:
            print("{}/{}: accuracy = {}%".format(i, iterations, get_accuracy(A2, Y)))
            
    return W1, b1, W2, b2

def make_prediction(X, Y, W1, b1, W2, b2, index):
    _, _, A2, _        = forward_propagation(X, W1, b1, W2, b2)
    
    output = np.argmax(A2.T[index])
    print("expected:", output)
    print("actual:", Y[index])
    plt.imshow((X.T[index]*255).reshape(28, 28))
    plt.show()

In [None]:
W1, b1, W2, b2 = train(X_training, Y_training, 2000, 1)

In [None]:
make_prediction(X_test, Y_test, W1, b1, W2, b2, 102)