In [1]:
import numpy as np, sys
np.random.seed(1)
from keras.datasets import mnist

In [2]:
"""
training and testing data
"""

(X_Train, Y_Train), (x_test, y_test) = mnist.load_data()

images, labels = (X_Train[0:1000].reshape(1000, 28*28)/ 255, Y_Train[0:1000])

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))

one_hot_labels = np.zeros((len(labels), 10))

for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1

labels = one_hot_labels

for i, l in enumerate(y_test): 
    test_labels[i][l] = 1
    

In [3]:
"""
Activation Functions (non linearities)
"""
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output): 
    return 1 - (output ** 2)

def softmax(x): 
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)




In [4]:
"""
Specification Variables
"""

alpha, iterations = (2, 300)
pixels_per_image, num_labels = (784, 10)
batch_size = 128

input_rows = 28
input_cols = 28

kernel_rows = 3
kernel_cols = 3
num_kernels = 16

# the hidden size of the layer is the difference of the columns/rows of the input 
# to the colums/rows of the kernel defined above. 
hidden_size = ((input_rows - kernel_rows) * (input_cols - kernel_cols)) * num_kernels
kernels = .02 * np.random.random((kernel_rows*kernel_cols, num_kernels)) - 0.01
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1




In [5]:
"""
Conv net image structuring function
"""
def get_image_section(layer, row_from, row_to, col_from, col_to): 
    section = layer[:, row_from:row_to, col_from:col_to]
    return section.reshape(-1, 1, row_to - row_from, col_to- col_from)

In [6]:
""" 
Main Training Loop
"""

# Training Loop
for j in range(iterations): 
    correct_cnt = 0
    # iterate through the batch size 
    for i in range(int(len(images)/ batch_size)-1): 
        
        """
        Define our input layers Prior to Convolution List
        """
        batch_start, batch_end=((i * batch_size), ((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)
        layer_0.shape
        
        """
        Make our list of conv layers (kernels to iterate through)
        """
        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows): 
            for col_start in range(layer_0.shape[2] - kernel_cols): 
                sect = get_image_section(layer_0, 
                                        row_start, 
                                        row_start+kernel_rows, 
                                        col_start, 
                                        col_start+kernel_cols)
                sects.append(sect)
        
        expanded_input = np.concatenate(sects, axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1], -1)
        
        """
        Build out our traditional layers using Conv. Listing
        """
        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0], -1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask*2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        
        for k in range(batch_size):
            labelset = labels[batch_start+k:batch_start+k+1]
            _inc = int(np.argmax(layer_2[k:k+1]) == np.argmax(labelset))
            correct_cnt += _inc
        
        """
        Define our deltas
        """
        layer_2_delta = (labels[batch_start:batch_end] - layer_2) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        """
        Adjust the weights
        """
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        lld_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(lld_reshape)
        kernels -= alpha * k_update
        
    test_correct_cnt = 0
    
    for i in range(len(test_images)): 
        layer_0 = test_images[i:i+1]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)
        layer_0.shape
        
        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows): 
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0, 
                                        row_start, 
                                        row_start+kernel_rows, 
                                        col_start, 
                                        col_start+kernel_cols)
                sects.append(sect)
        
        expanded_input = np.concatenate(sects, axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1], -1)
        
        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0], -1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    if(j%1 == 0): 
        sys.stdout.write("\n" + \
                        "I: " + str(j) + \
                        " Test-Acc: " + str(test_correct_cnt/float(len(test_images))) + \
                        " Train-Acc: " + str(correct_cnt/float(len(images))))


I: 0 Test-Acc: 0.0331 Train-Acc: 0.05
I: 1 Test-Acc: 0.0294 Train-Acc: 0.042
I: 2 Test-Acc: 0.0299 Train-Acc: 0.029
I: 3 Test-Acc: 0.0292 Train-Acc: 0.024
I: 4 Test-Acc: 0.0307 Train-Acc: 0.035
I: 5 Test-Acc: 0.0349 Train-Acc: 0.042
I: 6 Test-Acc: 0.0433 Train-Acc: 0.056
I: 7 Test-Acc: 0.0621 Train-Acc: 0.072
I: 8 Test-Acc: 0.1037 Train-Acc: 0.055
I: 9 Test-Acc: 0.1688 Train-Acc: 0.099
I: 10 Test-Acc: 0.2343 Train-Acc: 0.112
I: 11 Test-Acc: 0.2923 Train-Acc: 0.145
I: 12 Test-Acc: 0.3387 Train-Acc: 0.175
I: 13 Test-Acc: 0.3792 Train-Acc: 0.178
I: 14 Test-Acc: 0.41 Train-Acc: 0.204
I: 15 Test-Acc: 0.4326 Train-Acc: 0.232
I: 16 Test-Acc: 0.437 Train-Acc: 0.244
I: 17 Test-Acc: 0.4272 Train-Acc: 0.243
I: 18 Test-Acc: 0.3784 Train-Acc: 0.221
I: 19 Test-Acc: 0.2658 Train-Acc: 0.189
I: 20 Test-Acc: 0.0674 Train-Acc: 0.124
I: 21 Test-Acc: 0.0134 Train-Acc: 0.037
I: 22 Test-Acc: 0.0075 Train-Acc: 0.016
I: 23 Test-Acc: 0.009 Train-Acc: 0.008
I: 24 Test-Acc: 0.0124 Train-Acc: 0.017
I: 25 Test-Acc

KeyboardInterrupt: 