In [25]:
import numpy as np, sys
np.random.seed(1)
from keras.datasets import mnist


from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train[0:1000]
y_train = y_train[0:1000]

transformed_y_train = []

for y_label in y_train:
    zero = np.zeros((10,))
    zero[y_label] = 1
    transformed_y_train.append(zero)

y_train = np.array(transformed_y_train)

transformed_y_test = []

for y_label in y_test:
    zero = np.zeros((10,))
    zero[y_label] = 1
    transformed_y_test.append(zero)

y_test = np.array(transformed_y_test)

# normalize input, avoid divergence
x_train = x_train / 255
x_test = x_test / 255

def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

BATCH_SIZE = 128
ALPHA = 2
EPOCHS = 300

INPUT_ROWS = 28
INPUT_COLUMNS = 28

KERNEL_ROWS = 3
KERNEL_COLUMNS = 3
KERNEL_COUNT = 16
OUTPUT_SIZE = 10


NUMBER_OF_SAMPLES_PER_IMAGE = (INPUT_ROWS - KERNEL_ROWS) * (INPUT_COLUMNS - KERNEL_COLUMNS)               

# weights_0_1 = 0.02*np.random.random((pixels_per_image,hidden_size))-0.01
kernels = 0.02*np.random.random((KERNEL_ROWS*KERNEL_COLUMNS,
                                 KERNEL_COUNT))-0.01

weights_1_2 = 0.2*np.random.random((NUMBER_OF_SAMPLES_PER_IMAGE * KERNEL_COUNT,
                                    OUTPUT_SIZE)) - 0.1



# def get_image_section(layer,row_from, row_to, col_from, col_to):
#     section = layer[:,row_from:row_to,col_from:col_to]
#     return section.reshape(-1,1,row_to-row_from, col_to-col_from)

def get_sections_from_input(input_data):
    sections = []
    
    for row in range(INPUT_ROWS - KERNEL_ROWS):
        for column in range(INPUT_COLUMNS - KERNEL_COLUMNS):
            section = input_data[:, row:row + KERNEL_ROWS, column:column + KERNEL_COLUMNS]
            
            # extend by one axis for future concatenation
            sections.append(section.reshape(-1,1,KERNEL_ROWS, KERNEL_COLUMNS))
    
    return sections

def predict(input_data, kernels, weights_1_2):
    # list of len 25x25 of (128, 3, 3)
    sections = get_sections_from_input(input_data)

    # concatenate sections by axis=1
    # (X, 625, 3, 3)
    sections_array = np.concatenate(sections, axis=1)

    # reshape to (80 000, 9) 625*128 = 80k
    input_flattened = sections_array.reshape(-1, KERNEL_ROWS * KERNEL_COLUMNS)

    layer_1 = input_flattened.dot(kernels).reshape(sections_array.shape[0], -1)
    layer_1 = tanh(layer_1)
    layer_2 = np.dot(layer_1,weights_1_2)
    
    return layer_2

sects = get_sections_from_input(layer_0)

expanded_input = np.concatenate(sects,axis=1)
es = expanded_input.shape
flattened_input = expanded_input.reshape(es[0]*es[1],-1)

kernel_output = flattened_input.dot(kernels)
layer_1 = tanh(kernel_output.reshape(es[0],-1))
layer_2 = np.dot(layer_1,weights_1_2)

def accuracy(x_input, y_input, kernels, weights_1_2):
    prediction = predict(x_input, kernels, weights_1_2)
    
    match = 0
    for predicted, label in zip(prediction, y_input):
        if np.argmax(predicted) == np.argmax(label):
            match += 1

    return match / prediction.shape[0] 

def train(x_train, y_train, kernels, weights_1_2):
    for epoch in range(EPOCHS):
        for batch_id in range(len(x_train) // BATCH_SIZE):
#             batch_start, batch_end=((i * BATCH_SIZE),((i+1)*BATCH_SIZE))
            batch_start = batch_id * BATCH_SIZE
            batch_end = batch_start + BATCH_SIZE
        
            # (128, 28, 28)
            x_train_batch = x_train[batch_start:batch_end]
#             y_train_batch = y_train[batch_index_start:batch_index_end]

            sects = get_sections_from_input(x_train_batch)

            expanded_input = np.concatenate(sects,axis=1)
            es = expanded_input.shape
            flattened_input = expanded_input.reshape(es[0]*es[1],-1)

            kernel_output = flattened_input.dot(kernels)
            layer_1 = tanh(kernel_output.reshape(es[0],-1))
            dropout_mask = np.random.randint(2,size=layer_1.shape)
            layer_1 *= dropout_mask * 2
            layer_2 = softmax(np.dot(layer_1,weights_1_2))
            

            layer_2_delta = (y_train[batch_start:batch_end]-layer_2)\
                            / (BATCH_SIZE * layer_2.shape[0])
            layer_1_delta = layer_2_delta.dot(weights_1_2.T) * \
                            tanh2deriv(layer_1)
            layer_1_delta *= dropout_mask
            weights_1_2 += ALPHA * layer_1.T.dot(layer_2_delta)
            l1d_reshape = layer_1_delta.reshape(kernel_output.shape)
            k_update = flattened_input.T.dot(l1d_reshape)
            kernels -= ALPHA * k_update

        test_correct_cnt = 0

        for i in range(len(x_test)):

            layer_0 = x_test[i:i+1]

            sects = get_sections_from_input(layer_0)

            expanded_input = np.concatenate(sects,axis=1)
            es = expanded_input.shape
            flattened_input = expanded_input.reshape(es[0]*es[1],-1)

            kernel_output = flattened_input.dot(kernels)
            layer_1 = tanh(kernel_output.reshape(es[0],-1))
            layer_2 = np.dot(layer_1,weights_1_2)

            test_correct_cnt += int(np.argmax(layer_2) == 
                                    np.argmax(y_test[i:i+1]))
        
          
        print(accuracy(x_train, y_train, kernels, weights_1_2))
        print(accuracy(x_test, y_test, kernels, weights_1_2))
        print()

        
    return kernels, weights_1_2

train(x_train, y_train, kernels, weights_1_2)

0.025
0.0288

0.018
0.0273

0.021
0.028

0.029
0.0292

0.031
0.0339

0.039
0.0478

0.068
0.076

0.123
0.1316

0.21
0.2137

0.276
0.2941

0.33
0.3563

0.386
0.4023

0.428
0.4358

0.436
0.4473

0.433
0.4389

0.386
0.3951

0.212
0.2222

0.059
0.0613

0.017
0.0266

0.008
0.0127

0.008
0.0133

0.015
0.0185

0.031
0.0363

0.089
0.0928

0.195
0.1994



KeyboardInterrupt: 

In [None]:
sdd