In [7]:
from init_mnist import init, load
import numpy as np

np.random.seed(1)

In [8]:
# in case that dataset is missing or pickle is broken uncomment this
# init()

x_train, y_train, x_test, y_test = load()

# take first 1000 samples
x_train = x_train[0:1000]
y_train = y_train[0:1000]

# transform labels from [2] to [0,0,1,0,0,0,0,0,0,0]
OUT_CLASSES = 10

transformed_y_train = []

for y_label in y_train:
    zero = np.zeros((OUT_CLASSES,))
    zero[y_label] = 1
    transformed_y_train.append(zero)

y_train = np.array(transformed_y_train)

transformed_y_test = []

for y_label in y_test:
    zero = np.zeros((OUT_CLASSES,))
    zero[y_label] = 1
    transformed_y_test.append(zero)

y_test = np.array(transformed_y_test)

# normalize input, avoid divergence
x_train = x_train / 255
x_test = x_test / 255

# prepare input for conv layer
x_train = x_train.reshape(x_train.shape[0], 28, 28)
x_test = x_test.reshape(x_test.shape[0], 28, 28)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(1000, 28, 28)
(1000, 10)
(10000, 28, 28)
(10000, 10)


In [33]:
x_train.shape

(1000, 28, 28)

In [9]:
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

def softmax2deriv(output):
    return output / (output.shape[0] * BATCH_SIZE)

In [12]:
BATCH_SIZE = 128
ALPHA = 2
EPOCHS = 300

INPUT_ROWS = 28
INPUT_COLUMNS = 28

KERNEL_ROWS = 3
KERNEL_COLUMNS = 3
KERNEL_COUNT = 16
OUTPUT_SIZE = 10

NUMBER_OF_SAMPLES_PER_IMAGE = (INPUT_ROWS - KERNEL_ROWS) * (INPUT_COLUMNS - KERNEL_COLUMNS)

# (9, 16)
kernels = 0.02 * np.random.rand(KERNEL_ROWS * KERNEL_COLUMNS, KERNEL_COUNT) - 0.01

# (25*25*16, OUTPUT_SIZE)
weights_1_2 = 0.2 * np.random.rand(NUMBER_OF_SAMPLES_PER_IMAGE * KERNEL_COUNT, OUTPUT_SIZE) - 0.1


def get_sections_from_input(input_data):
    sections = []
    
    for row in range(INPUT_ROWS - KERNEL_ROWS):
        for column in range(INPUT_COLUMNS - KERNEL_COLUMNS):
            section = input_data[:, row:row + KERNEL_ROWS, column:column + KERNEL_COLUMNS]
            
            # extend by one axis for future concatenation
            sections.append(section.reshape(-1,1,KERNEL_ROWS, KERNEL_COLUMNS))
    
    return sections

def predict(input_data, kernels, weights_1_2):
    # list of len 25x25 of (128, 3, 3)
    sections = get_sections_from_input(input_data)
    
    # (625, 128, 3, 3)
    sections_array = np.array(sections)
    
    # reshape to (80 000, 9) 625*128 = 80k
    input_flattened = sections_array.reshape(-1, KERNEL_ROWS * KERNEL_COLUMNS)
    
    # reshape to (128, 25*25*16)
    layer_1 = input_flattened.dot(kernels).reshape(input_data.shape[0], -1)
    layer_1 = tanh(layer_1)
    
#     dropout_mask = np.random.randint(2, size=layer_1.shape)
#     layer_1 = layer_1 * dropout_mask * 2
    
    layer_2 = layer_1.dot(weights_1_2)
#     layer_2 = softmax(layer_2)
    
    return layer_2
    
def accuracy(x_input, y_input, kernels, weights_1_2):
    prediction = predict(x_input, kernels, weights_1_2)
    
    match = 0
    for predicted, label in zip(prediction, y_input):
        if np.argmax(predicted) == np.argmax(label):
            match += 1

    return match / len(prediction)  

def train(x_train, y_train, kernels, weights_1_2):
    for epoch in range(EPOCHS):
        print(f"Epoch: {epoch}")
        for batch_id in range(x_train.shape[0] // BATCH_SIZE):
            batch_index_start = batch_id * BATCH_SIZE
            batch_index_end = batch_index_start + BATCH_SIZE

            # (128, 28, 28)
            x_train_batch = x_train[batch_index_start:batch_index_end]
            y_train_batch = y_train[batch_index_start:batch_index_end]

            # list of len 25x25 of (128, 1, 3, 3)
            sections = get_sections_from_input(x_train_batch)
            
            # concatenate sections by axis=1
            # (128, 625, 3, 3)
            sections_array = np.concatenate(sections, axis=1)

            # reshape to (80 000, 9) 625*128 = 80k
            input_flattened = sections_array.reshape(-1, KERNEL_ROWS * KERNEL_COLUMNS)
            
            # reshape to (128, 25*25*16)
            layer_1 = input_flattened.dot(kernels).reshape(x_train_batch.shape[0], -1)
            layer_1 = tanh(layer_1)

            dropout_mask = np.random.randint(2, size=layer_1.shape)
            layer_1 = layer_1 * dropout_mask * 2

            layer_2 = layer_1.dot(weights_1_2)
            layer_2 = softmax(layer_2)

            # backpropagation
            # BATCH_SIZE because delta is calculated from number of BATCH_SIZE samples
            # (BATCH_SIZE, 10)
            layer_2_delta = softmax2deriv((y_train_batch - layer_2))

            # (128, 25*25*16)
            layer_1_delta = layer_2_delta.dot(weights_1_2.T)
            layer_1_delta = layer_1_delta * tanh2deriv(layer_1)
            layer_1_delta *= dropout_mask

            # weighted delta - how much network misses because of wrong weights
            # (25*25*16, OUTPUT_SIZE)
            weighted_delta_1_2 = layer_1.T.dot(layer_2_delta)
            weights_1_2 += ALPHA * weighted_delta_1_2

            # weighted delta - how much network misses because of wrong weights
            # (9, 80 000) * (80 000, 16)
            weighted_delta_kernels = input_flattened.T.dot(layer_1_delta.reshape(-1, KERNEL_COUNT))
            kernels += ALPHA * weighted_delta_kernels
               
        if epoch % 50 == 0:    
            print(f"Train acc: {accuracy(x_train, y_train, kernels, weights_1_2)}")
            print(f"Test acc: {accuracy(x_test, y_test, kernels, weights_1_2)}")
        
    return kernels, weights_1_2

In [13]:
kernels, weights_1_2 = train(x_train, y_train, kernels, weights_1_2)

Epoch: 0
Train acc: 0.107
Test acc: 0.0992
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Epoch: 20
Epoch: 21
Epoch: 22
Epoch: 23
Epoch: 24
Epoch: 25
Epoch: 26
Epoch: 27
Epoch: 28
Epoch: 29
Epoch: 30
Epoch: 31
Epoch: 32
Epoch: 33
Epoch: 34
Epoch: 35
Epoch: 36
Epoch: 37
Epoch: 38
Epoch: 39
Epoch: 40
Epoch: 41
Epoch: 42
Epoch: 43
Epoch: 44
Epoch: 45
Epoch: 46
Epoch: 47
Epoch: 48
Epoch: 49
Epoch: 50
Train acc: 0.099
Test acc: 0.0982
Epoch: 51
Epoch: 52
Epoch: 53
Epoch: 54
Epoch: 55
Epoch: 56
Epoch: 57
Epoch: 58
Epoch: 59
Epoch: 60
Epoch: 61
Epoch: 62
Epoch: 63
Epoch: 64
Epoch: 65
Epoch: 66
Epoch: 67
Epoch: 68
Epoch: 69
Epoch: 70
Epoch: 71
Epoch: 72
Epoch: 73
Epoch: 74
Epoch: 75
Epoch: 76
Epoch: 77
Epoch: 78
Epoch: 79
Epoch: 80
Epoch: 81
Epoch: 82
Epoch: 83
Epoch: 84
Epoch: 85
Epoch: 86
Epoch: 87
Epoch: 88
Epoch: 89
Epoch: 90


KeyboardInterrupt: 

In [None]:
from matplotlib.pyplot import plot

# plot history and see generalization
plot(list(range(len(acc_history))), acc_history, '-')
plot(list(range(len(test_history))), test_history, 'g-')

In [53]:
import numpy as np, sys
np.random.seed(1)

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28, 28) / 255,
                  y_train[0:1000])


one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28,28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1
    
x_train = images
y_train = labels
x_test = test_images
y_test = test_labels
# in case that dataset is missing or pickle is broken uncomment this
# init()

# x_train, y_train, x_test, y_test = load()

# # take first 1000 samples
# x_train = x_train[0:1000]
# y_train = y_train[0:1000]

# # transform labels from [2] to [0,0,1,0,0,0,0,0,0,0]
# OUT_CLASSES = 10

# transformed_y_train = []

# for y_label in y_train:
#     zero = np.zeros((OUT_CLASSES,))
#     zero[y_label] = 1
#     transformed_y_train.append(zero)

# y_train = transformed_y_train

# transformed_y_test = []

# for y_label in y_test:
#     zero = np.zeros((OUT_CLASSES,))
#     zero[y_label] = 1
#     transformed_y_test.append(zero)

# y_test = transformed_y_test

# # normalize input, avoid divergence
# x_train = x_train / 255
# x_test = x_test / 255

# # prepare input for conv layer
# x_train = x_train.reshape(x_train.shape[0], 28, 28)
# x_test = x_test.reshape(x_test.shape[0], 28, 28)

def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

def softmax2deriv(output):
    return output / (output.shape[0] * BATCH_SIZE)

BATCH_SIZE = 128
ALPHA = 2
EPOCHS = 300

INPUT_ROWS = 28
INPUT_COLUMNS = 28

KERNEL_ROWS = 3
KERNEL_COLUMNS = 3
KERNEL_COUNT = 16
OUTPUT_SIZE = 10

NUMBER_OF_SAMPLES_PER_IMAGE = (INPUT_ROWS - KERNEL_ROWS) * (INPUT_COLUMNS - KERNEL_COLUMNS)

# (9, 16)
kernels = 0.02 * np.random.random((KERNEL_ROWS * KERNEL_COLUMNS, KERNEL_COUNT)) - 0.01

# (25*25*16, OUTPUT_SIZE)
weights_1_2 = 0.02 * np.random.random((NUMBER_OF_SAMPLES_PER_IMAGE * KERNEL_COUNT, OUTPUT_SIZE)) - 0.01


def get_sections_from_input(input_data):
    sections = []
    
    for row in range(INPUT_ROWS - KERNEL_ROWS):
        for column in range(INPUT_COLUMNS - KERNEL_COLUMNS):
            section = input_data[:, row:row + KERNEL_ROWS, column:column + KERNEL_COLUMNS]
            
            # extend by one axis for future concatenation
            sections.append(section.reshape(-1,1,KERNEL_ROWS, KERNEL_COLUMNS))
    
    return sections

def predict(input_data, kernels, weights_1_2):
    # list of len 25x25 of (128, 3, 3)
    sections = get_sections_from_input(input_data)
            
    # concatenate sections by axis=1
    # (128, 625, 3, 3)
    sections_array = np.concatenate(sections, axis=1)

    # reshape to (80 000, 9) 625*128 = 80k
    # input_flattened = sections_array.reshape(-1, KERNEL_ROWS * KERNEL_COLUMNS)
    s = sections_array.shape
    input_flattened = sections_array.reshape(s[0]*s[1], -1)

    # reshape to (128, 25*25*16)
    # layer_1 = input_flattened.dot(kernels).reshape(x_train_batch.shape[0], -1)
    layer_1 = input_flattened.dot(kernels).reshape(s[0], -1)
    layer_1 = tanh(layer_1)

    dropout_mask = np.random.randint(2, size=layer_1.shape)
    layer_1 *= dropout_mask * 2

    layer_2 = layer_1.dot(weights_1_2)
    layer_2 = softmax(layer_2)
    
    return layer_2
    
def accuracy(x_input, y_input, kernels, weights_1_2):
    prediction = predict(x_input, kernels, weights_1_2)
    
    match = 0
    for predicted, label in zip(prediction, y_input):
        if np.argmax(predicted) == np.argmax(label):
            match += 1

    return match / len(prediction)  

def train(x_train, y_train, kernels, weights_1_2):
    for epoch in range(EPOCHS):
        print(f"Epoch: {epoch}")
        for batch_id in range(x_train.shape[0] // BATCH_SIZE):
            batch_index_start = batch_id * BATCH_SIZE
            batch_index_end = batch_index_start + BATCH_SIZE

            # (128, 28, 28)
            x_train_batch = x_train[batch_index_start:batch_index_end]
            y_train_batch = y_train[batch_index_start:batch_index_end]

            # list of len 25x25 of (128, 1, 3, 3)
            sections = get_sections_from_input(x_train_batch)
            
            # concatenate sections by axis=1
            # (128, 625, 3, 3)
            sections_array = np.concatenate(sections, axis=1)

            # reshape to (80 000, 9) 625*128 = 80k
            # input_flattened = sections_array.reshape(-1, KERNEL_ROWS * KERNEL_COLUMNS)
            s = sections_array.shape
            input_flattened = sections_array.reshape(s[0]*s[1], -1)
            
            # reshape to (128, 25*25*16)
            # layer_1 = input_flattened.dot(kernels).reshape(x_train_batch.shape[0], -1)
            kernel_output = input_flattened.dot(kernels)
            layer_1 = kernel_output.reshape(s[0], -1)
            layer_1 = tanh(layer_1)

            dropout_mask = np.random.randint(2, size=layer_1.shape)
            layer_1 *= dropout_mask * 2

            layer_2 = layer_1.dot(weights_1_2)
            layer_2 = softmax(layer_2)

            # backpropagation
            # BATCH_SIZE because delta is calculated from number of BATCH_SIZE samples
            # (BATCH_SIZE, 10)
            # layer_2_delta = softmax2deriv((y_train_batch - layer_2))
            layer_2_delta = (y_train_batch - layer_2)\
                        / (BATCH_SIZE * layer_2.shape[0])

            # (128, 25*25*16)
            layer_1_delta = layer_2_delta.dot(weights_1_2.T)
            layer_1_delta = layer_1_delta * tanh2deriv(layer_1)
            layer_1_delta *= dropout_mask

            # weighted delta - how much network misses because of wrong weights
            # (25*25*16, OUTPUT_SIZE)
            weighted_delta_1_2 = layer_1.T.dot(layer_2_delta)
            weights_1_2 += ALPHA * weighted_delta_1_2

            # weighted delta - how much network misses because of wrong weights
            # (9, 80 000) * (80 000, 16)
            weighted_delta_kernels = input_flattened.T.dot(layer_1_delta.reshape(kernel_output.shape))
            kernels -= ALPHA * weighted_delta_kernels
               
        if epoch % 50 == 0:    
            print(f"Train acc: {accuracy(x_train, y_train, kernels, weights_1_2)}")
            print(f"Test acc: {accuracy(x_test, y_test, kernels, weights_1_2)}")
        
    return kernels, weights_1_2

In [54]:
kernels, weights_1_2 = train(x_train, y_train, kernels, weights_1_2)

Epoch: 0
Train acc: 0.084
Test acc: 0.0837
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Epoch: 20
Epoch: 21
Epoch: 22
Epoch: 23
Epoch: 24
Epoch: 25
Epoch: 26
Epoch: 27
Epoch: 28
Epoch: 29
Epoch: 30
Epoch: 31
Epoch: 32
Epoch: 33
Epoch: 34
Epoch: 35
Epoch: 36
Epoch: 37
Epoch: 38
Epoch: 39
Epoch: 40
Epoch: 41
Epoch: 42
Epoch: 43
Epoch: 44
Epoch: 45
Epoch: 46
Epoch: 47
Epoch: 48
Epoch: 49
Epoch: 50
Train acc: 0.15
Test acc: 0.1299
Epoch: 51
Epoch: 52
Epoch: 53
Epoch: 54
Epoch: 55
Epoch: 56
Epoch: 57
Epoch: 58
Epoch: 59
Epoch: 60
Epoch: 61
Epoch: 62
Epoch: 63
Epoch: 64
Epoch: 65
Epoch: 66
Epoch: 67
Epoch: 68
Epoch: 69
Epoch: 70
Epoch: 71
Epoch: 72
Epoch: 73
Epoch: 74
Epoch: 75
Epoch: 76
Epoch: 77
Epoch: 78
Epoch: 79
Epoch: 80
Epoch: 81
Epoch: 82
Epoch: 83
Epoch: 84
Epoch: 85
Epoch: 86
Epoch: 87
Epoch: 88
Epoch: 89
Epoch: 90
Epoch: 91
Epoch: 92
Epoch: 93
Epo

KeyboardInterrupt: 

In [2]:
from keras.datasets import mnist
import numpy as np
(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255,
                  y_train[0:1000])


one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

print(images.shape)
print(labels.shape)
print(test_images.shape)
print(test_labels.shape)

(1000, 784)
(1000, 10)
(10000, 784)
(10000, 10)


In [4]:
from init_mnist import init, load
import numpy as np

np.random.seed(1)
x_train, y_train, x_test, y_test = load()

# take first 1000 samples
x_train = x_train[0:1000]
y_train = y_train[0:1000]

# transform labels from [2] to [0,0,1,0,0,0,0,0,0,0]
OUT_CLASSES = 10

transformed_y_train = []

for y_label in y_train:
    zero = np.zeros((OUT_CLASSES,))
    zero[y_label] = 1
    transformed_y_train.append(zero)

y_train = transformed_y_train

transformed_y_test = []

for y_label in y_test:
    zero = np.zeros((OUT_CLASSES,))
    zero[y_label] = 1
    transformed_y_test.append(zero)

y_test = transformed_y_test

# normalize input, avoid divergence
x_train = x_train / 255
x_test = x_test / 255

# prepare input for conv layer
x_train = x_train.reshape(x_train.shape[0], 28, 28)
x_test = x_test.reshape(x_test.shape[0], 28, 28)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(1000, 28, 28)


AttributeError: 'list' object has no attribute 'shape'