# Three-layer network on MNIST

In [23]:
"""
This code trains a three-layer neural network on the MNIST dataset of handwritten digits.

The network has the following architecture:
- Input layer with 784 nodes (28x28 pixel images flattened)
- Hidden layer with 40 nodes and ReLU activation
- Output layer with 10 nodes (one for each digit class) and no activation

The network is trained using backpropagation with a learning rate of 0.005 for 350 iterations. The training and test data are loaded from the Keras MNIST dataset.

The code calculates the mean squared error and classification accuracy on the training set during each iteration and prints the results.
"""
import sys, numpy as np
from keras.dataset import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])
one_hot_labels = np.zeros((len(labels), 10))

for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1

labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28 * 28) / 255
test_labels = np.zeros((len(y_test), 10))

for i, l in enumerate(y_test):
    test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x: (x >= 0) * x # Returns x if x > 0, else 0
relu2deriv = lambda x: x >= 0 # Returns 1 for input > 0, else 0
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10) # Hyperparameters

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1 # 784 x 40
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1 # 40 x 10

for j in range(iterations):
    error, correct_cnt = (0.0, 0)

    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) \
                            * relu2deriv(layer_1) 
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r" + \
                     "I:" + str(j) + \
                     " Error:" + str(error/float(len(images)))[0:5] + \
                     " Correct:" + str(correct_cnt/float(len(images))))


ModuleNotFoundError: No module named 'tensorflow'