In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def relu(x): return (x>0) * x

In [3]:
def relu_derivative(x): return x > 0

In [4]:
def tanh(x): return np.tanh(x)
def tanh_derivative(output): return 1.0 - output**2

In [5]:
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [6]:
alpha, iterations, hidden_size = (0.005, 300, 100)
pixels_per_image, num_labels = (784, 10)

In [7]:
from torchvision.datasets import MNIST
train_dataset = MNIST(root='./data', train=True, download=True)
test_dataset = MNIST(root='./data', train=False, download=True)
import torch.nn.functional as F
x_train, y_train, x_test, y_test = train_dataset.data / 255., F.one_hot(train_dataset.targets, num_classes=10), test_dataset.data / 255., F.one_hot(test_dataset.targets, num_classes=10)
x_train = x_train.reshape(-1, 28 ** 2)
x_test = x_test.reshape(-1, 28 ** 2)
images, labels = x_train.numpy(), y_train.numpy()
test_images, test_labels = x_test.numpy(), y_test.numpy()

In [8]:
images = images[:1000]
labels = labels[:1000]

In [9]:
def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[:, row_from:row_to, col_from:col_to]
    return section.reshape(-1, 1, row_to-row_from, col_to-col_from)

In [10]:
alpha, iterations = 2, 300

In [11]:
pixels_per_image, num_labels = 784, 10

In [12]:
batch_size = 128
input_rows = 28
input_cols = 28

In [13]:
kernel_rows = 3
kernel_cols = 3

In [14]:
num_kernels = 16

In [15]:
hidden_size = (input_rows - kernel_rows) * (input_cols - kernel_cols) * num_kernels

In [16]:
np.random.seed(1)
kernels = .02 * np.random.random((kernel_rows * kernel_cols, num_kernels)) - .01
weights_1_2 = .2 * np.random.random((hidden_size, num_labels)) - .1

In [17]:
for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end=((i * batch_size),((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0],28,28)
        layer_0.shape

        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0,
                                         row_start,
                                         row_start+kernel_rows,
                                         col_start,
                                         col_start+kernel_cols)
                sects.append(sect)

        expanded_input = np.concatenate(sects,axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1],-1)

        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0],-1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1,weights_1_2))

        for k in range(batch_size):
            labelset = labels[batch_start+k:batch_start+k+1]
            _inc = int(np.argmax(layer_2[k:k+1]) == 
                               np.argmax(labelset))
            correct_cnt += _inc

        layer_2_delta = (labels[batch_start:batch_end]-layer_2)\
                        / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * \
                        tanh_derivative(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        l1d_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(l1d_reshape)
        kernels -= alpha * k_update
    
    test_correct_cnt = 0

    for i in range(len(test_images)):

        layer_0 = test_images[i:i+1]
#         layer_1 = tanh(np.dot(layer_0,weights_0_1))
        layer_0 = layer_0.reshape(layer_0.shape[0],28,28)
        layer_0.shape

        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0,
                                         row_start,
                                         row_start+kernel_rows,
                                         col_start,
                                         col_start+kernel_cols)
                sects.append(sect)

        expanded_input = np.concatenate(sects,axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1],-1)

        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0],-1))
        layer_2 = np.dot(layer_1,weights_1_2)

        test_correct_cnt += int(np.argmax(layer_2) == 
                                np.argmax(test_labels[i:i+1]))
    print("\n"+ \
        "I:" + str(j) + \
        " Test-Acc:"+str(test_correct_cnt/float(len(test_images)))+\
        " Train-Acc:" + str(correct_cnt/float(len(images))))


I:0 Test-Acc:0.0288 Train-Acc:0.055

I:1 Test-Acc:0.0273 Train-Acc:0.037

I:2 Test-Acc:0.028 Train-Acc:0.037

I:3 Test-Acc:0.0292 Train-Acc:0.04

I:4 Test-Acc:0.0339 Train-Acc:0.046

I:5 Test-Acc:0.0478 Train-Acc:0.068

I:6 Test-Acc:0.076 Train-Acc:0.083

I:7 Test-Acc:0.1316 Train-Acc:0.096

I:8 Test-Acc:0.2137 Train-Acc:0.127

I:9 Test-Acc:0.2941 Train-Acc:0.148

I:10 Test-Acc:0.3563 Train-Acc:0.181

I:11 Test-Acc:0.4023 Train-Acc:0.209

I:12 Test-Acc:0.4358 Train-Acc:0.238

I:13 Test-Acc:0.4473 Train-Acc:0.286

I:14 Test-Acc:0.4389 Train-Acc:0.274

I:15 Test-Acc:0.3951 Train-Acc:0.257

I:16 Test-Acc:0.2222 Train-Acc:0.243

I:17 Test-Acc:0.0613 Train-Acc:0.112

I:18 Test-Acc:0.0266 Train-Acc:0.035
