In [65]:
import tensorflow as tf
import numpy as np

In [66]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train = x_train.reshape(len(x_train), -1) / 255
x_test = x_test.reshape(len(x_test), -1) / 255

print("Number of original training examples:", len(x_train))
print("Number of original test examples:", len(x_test))

print(x_train[0].reshape(1, 784).shape)
print(y_train[0])

Number of original training examples: 60000
Number of original test examples: 10000
(1, 784)
5


In [67]:
tiny_const = 0.005

def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    
def sigmoid_d(x):
    s = sigmoid(x)
    return s * (1 - s)
    
def relu(x):
    return np.maximum(x, 0)

def relu_d(x):
    return 1 * (x > 0)

def J(y, a):
    return -np.sum(y * np.log(a + tiny_const) + (1 - y) * np.log(1 - (a + tiny_const)))

def J_d(y, a):
    return (a - y) / ((1 - a + tiny_const) * (a + tiny_const))

In [75]:
def classify(x):
    output_layer = sigmoid(W_2 @ relu(W_1 @ x + b_1) + b_2)
    digit = np.argmax(output_layer)
    return digit, output_layer

In [76]:
def train_one_example(num_iterations, learning_rate):
    global W_2, b_2, W_1, b_1
    
    a_0 = x_train[0]
    y = np.zeros(10)
    y[y_train[0]] = 1
    print(y)
    
    for _ in range(num_iterations):
        z_1 = W_1 @ a_0 + b_1
        a_1 = relu(z_1)
        
        z_2 = W_2 @ a_1 + b_2
        a_2 = sigmoid(z_2)
        
        delta_2 = J_d(y, a_2) * sigmoid_d(z_2)
        delta_1 = W_2.T @ delta_2 * relu_d(z_1)
        
        adj_W_2 = delta_2 @ a_2.T
        adj_W_1 = delta_1 @ a_1.T
        
        W_2 = W_2 - learning_rate * adj_W_2
        b_2 = b_2 - learning_rate * delta_2
        
        W_1 = W_1 - learning_rate * adj_W_1
        b_1 = b_1 - learning_rate * delta_1

In [77]:
W_1 = np.random.rand(300, 784) * 2 - 1
b_1 = np.random.rand(300) * 2 - 1

W_2 = np.random.rand(10, 300) * 2 - 1
b_2 = np.random.rand(10) * 2 - 1

In [78]:
ex1 = classify(x_train[0])
print(ex1)
print("Desired: " + str(y_train[0]))

train_one_example(100000, 0.00005)

ex1 = classify(x_train[0])
print(ex1)

(1, array([9.99942676e-01, 1.00000000e+00, 9.99985586e-01, 1.00000000e+00,
       9.72364251e-10, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
       8.32545204e-23, 2.23337371e-31]))
Desired: 5
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
(5, array([0.13690762, 0.17824638, 0.19706437, 0.23019584, 0.22316264,
       0.86214071, 0.22557013, 0.14352549, 0.14106328, 0.2314917 ]))
