In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def relu(x): return (x>0) * x

In [3]:
def relu_derivative(x): return x > 0

In [4]:
def tanh(x): return np.tanh(x)
def tanh_derivative(output): return 1.0 - output**2

In [5]:
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [6]:
alpha, iterations, hidden_size = (0.005, 300, 100)
pixels_per_image, num_labels = (784, 10)

In [7]:
from torchvision.datasets import MNIST
train_dataset = MNIST(root='./data', train=True, download=True)
test_dataset = MNIST(root='./data', train=False, download=True)
import torch.nn.functional as F
x_train, y_train, x_test, y_test = train_dataset.data / 255., F.one_hot(train_dataset.targets, num_classes=10), test_dataset.data / 255., F.one_hot(test_dataset.targets, num_classes=10)
x_train = x_train.reshape(-1, 28 ** 2)
x_test = x_test.reshape(-1, 28 ** 2)
images, labels = x_train.numpy(), y_train.numpy()
test_images, test_labels = x_test.numpy(), y_test.numpy()

In [8]:
images = images[:1000]
labels = labels[:1000]

In [10]:
np.random.seed(1)
weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1
for j in range(iterations):
    error, correct_cnt = 0, 0
    for i in range(images.shape[0]):
        layer_0 = images[i:i+1]
        layer_1 = tanh(layer_0 @ weights_0_1)
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(layer_1 @ weights_1_2)
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) ==  np.argmax(labels[i:i+1]))
        layer_2_delta = layer_2 - labels[i:i+1]
        layer_1_delta = layer_2_delta @ weights_1_2.T * tanh_derivative(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 -= layer_1.T @ layer_2_delta * alpha
        weights_0_1 -= layer_0.T @ layer_1_delta * alpha
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        print("\n" + \
        "I:" + str(j) + \
        " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
        " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
        " Train-Err:" + str(error/ float(len(images)))[0:5] +\
        " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:2.371 Test-Acc:0.5606 Train-Err:0.749 Train-Acc:0.498

I:10 Test-Err:62.90 Test-Acc:0.5906 Train-Err:0.158 Train-Acc:0.901

I:20 Test-Err:260.4 Test-Acc:0.1069 Train-Err:0.316 Train-Acc:0.776

I:30 Test-Err:60382 Test-Acc:0.0617 Train-Err:1.051 Train-Acc:0.084

I:40 Test-Err:12674 Test-Acc:0.0982 Train-Err:0.957 Train-Acc:0.087

I:50 Test-Err:14147 Test-Acc:0.1017 Train-Err:0.958 Train-Acc:0.086

I:60 Test-Err:18394 Test-Acc:0.1926 Train-Err:0.953 Train-Acc:0.099

I:70 Test-Err:25396 Test-Acc:0.0358 Train-Err:0.960 Train-Acc:0.092

I:80 Test-Err:36530 Test-Acc:0.0283 Train-Err:0.960 Train-Acc:0.081

I:90 Test-Err:51100 Test-Acc:0.0958 Train-Err:0.960 Train-Acc:0.08

I:100 Test-Err:49828 Test-Acc:0.0958 Train-Err:0.957 Train-Acc:0.095

I:110 Test-Err:40013 Test-Acc:0.101 Train-Err:0.956 Train-Acc:0.098

I:120 Test-Err:48825 Test-Acc:0.1029 Train-Err:0.955 Train-Acc:0.095

I:130 Test-Err:54961 Test-Acc:0.1111 Train-Err:0.957 Train-Acc:0.1
