In [532]:
import pandas as pd
import numpy as np

In [533]:
#colnames = ['result'] + ['label' + str(i) for i in range(784)]
train_set = pd.read_csv('mnist_train.csv', header=None, sep=',')
test_set = pd.read_csv('mnist_test.csv', header=None, sep=',')

In [534]:
train_set = np.array(train_set).T 
test_set = np.array(test_set).T 

In [615]:
Y_train = train_set[0]
Y_train_ = np.array([[(1 if i == y else 0) for i in range(10)] for y in train_set[0]]).T
X_train = train_set[1:] / 255

Y_test = test_set[0]
X_test = test_set[1:] / 255

In [703]:
def relu(X):
    return np.maximum(0, X)

def d_relu(X):
    return (X > 0) * 1

def softmax(X):    
    return np.exp(X) / np.sum(np.exp(X), axis=0, keepdims=True)

def is_correct(A, y):
    return np.argmax(A) == y


In [704]:
def init_weights():
    W1 = np.random.rand(20, 784) - 0.5
    b1 = np.random.rand(20, 1) - 0.5

    W2 = np.random.rand(10, 20) - 0.5
    b2 = np.random.rand(10, 1) - 0.5

    return W1, b1, W2, b2

def forward_propagation(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = relu(Z1)

    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)

    return Z1, A1, Z2, A2

def backward_propagation(Z1, A1, Z2, A2, W1, b1, W2, b2, X, Y):
    m = Y.shape[1]
    
    dZ2 = A2 - Y
    dW2 = 1/m * dZ2.dot(A1.T)
    db2 = 1/m * np.sum(dZ2)

    dZ1 = W2.T.dot(dZ2) * d_relu(Z1)
    dW1 = 1/m * dZ1.dot(X.T)
    db1 = 1/m * np.sum(dZ1)

    return dW1, db1, dW2, db2

def update_weights(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2

    return W1, b1, W2, b2

def train_model(W1, b1, W2, b2, X, Y, n_iter, learning_rate = 0.01):

    for i in range(1, n_iter + 1):
        Z1, A1, Z2, A2 = forward_propagation(W1, b1, W2, b2, X)

        dW1, db1, dW2, db2 = backward_propagation(Z1, A1, Z2, A2, W1, b1, W2, b2, X, Y)

        W1, b1, W2, b2 = update_weights(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)

    return W1, b1, W2, b2

def test_model(W1, b1, W2, b2, X_test, Y_test):
    n_correct = 0

    for i in range(X_test.shape[1]):
        Z1, A1, Z2, A2 = forward_propagation(W1, b1, W2, b2, X_test[:, i:i+1])

        if is_correct(A2, Y_test[i]):
            n_correct += 1
            
    return f"Model precision: {np.round(n_correct / X_test.shape[1] * 100, 2)}% ({n_correct}/{X_test.shape[1]})"

In [696]:
W1, b1, W2, b2 = init_weights()
print(test_model(W1, b1, W2, b2, X_test, Y_test))
n = 0

Model precision: 11.36% (1136/10000)


In [697]:
for i in range(5):
    W1, b1, W2, b2 = train_model(W1, b1, W2, b2, X_train, Y_train_, 50, 0.02)
    n += 50
    print(f"#{n}>> {test_model(W1, b1, W2, b2, X_test, Y_test)}")

#50>> Model precision: 21.97% (2197/10000)
#100>> Model precision: 32.25% (3225/10000)
#150>> Model precision: 39.8% (3980/10000)
#200>> Model precision: 46.23% (4623/10000)
#250>> Model precision: 52.31% (5231/10000)


In [705]:
def init_weights_2():
    W1 = np.random.rand(20, 784) - 0.5
    b1 = np.random.rand(20, 1) - 0.5

    W2 = np.random.rand(20, 20) - 0.5
    b2 = np.random.rand(20, 1) - 0.5

    W3 = np.random.rand(10, 20) - 0.5
    b3 = np.random.rand(10, 1) - 0.5
    
    return W1, b1, W2, b2, W3, b3

def forward_propagation_2(W1, b1, W2, b2, W3, b3, X):
    Z1 = W1.dot(X) + b1
    A1 = relu(Z1)

    Z2 = W2.dot(A1) + b2
    A2 = relu(Z2)

    Z3 = W3.dot(A2) + b3
    A3 = softmax(Z3)

    return Z1, A1, Z2, A2, Z3, A3

def backward_propagation_2(Z1, A1, Z2, A2, Z3, A3, W1, b1, W2, b2, W3, b3, X, Y):
    m = Y.shape[1]

    dZ3 = A3 - Y
    dW3 = 1/m * dZ3.dot(A2.T)
    db3 = 1/m * np.sum(dZ3)

    dZ2 = W3.T.dot(dZ3) * d_relu(Z2)
    dW2 = 1/m * dZ2.dot(A1.T)
    db2 = 1/m * np.sum(dZ2)

    dZ1 = W2.T.dot(dZ2) * d_relu(Z1)
    dW1 = 1/m * dZ1.dot(X.T)
    db1 = 1/m * np.sum(dZ1)

    return dW1, db1, dW2, db2, dW3, db3

def update_weights_2(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, learning_rate):
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2

    W3 = W3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3

    return W1, b1, W2, b2, W3, b3

def train_model_2(W1, b1, W2, b2, W3, b3, X, Y, n_iter, learning_rate = 0.01):

    for i in range(1, n_iter + 1):
        Z1, A1, Z2, A2, Z3, A3 = forward_propagation_2(W1, b1, W2, b2, W3, b3, X)

        dW1, db1, dW2, db2, dW3, db3 = backward_propagation_2(Z1, A1, Z2, A2, Z3, A3, W1, b1, W2, b2, W3, b3, X, Y)

        W1, b1, W2, b2, W3, b3 = update_weights_2(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, learning_rate)

    return W1, b1, W2, b2, W3, b3

def test_model_2(W1, b1, W2, b2, W3, b3, X_test, Y_test):
    n_correct = 0

    for i in range(X_test.shape[1]):
        Z1, A1, Z2, A2, Z3, A3 = forward_propagation_2(W1, b1, W2, b2, W3, b3, X_test[:, i:i+1])

        if is_correct(A2, Y_test[i]):
            n_correct += 1
            
    return f"Model precision: {np.round(n_correct / X_test.shape[1] * 100, 2)}% ({n_correct}/{X_test.shape[1]})"

In [706]:
W1, b1, W2, b2, W3, b3 = init_weights_2()
print(test_model_2(W1, b1, W2, b2, W3, b3, X_test, Y_test))
n = 0

Model precision: 6.87% (687/10000)


In [707]:
for i in range(5):
    W1, b1, W2, b2, W3, b3 = train_model_2(W1, b1, W2, b2, W3, b3, X_train, Y_train_, 50, 0.02)
    n += 50
    print(f"#{n}>> {test_model_2(W1, b1, W2, b2, W3, b3, X_test, Y_test)}")

#50>> Model precision: 8.14% (814/10000)


KeyboardInterrupt: 