In [219]:
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt

In [237]:
class Conv_op:
    def __init__(self, num_filters):
        self.num_filters = num_filters
        self.conv_filter = np.random.randn(num_filters, 3, 3) / 9
    
    def image_region(self, image):
        height, width = image.shape
        for j in range(height - 2):
            for k in range(width - 2):
                image_patch = image[j : (j + 3), k : (k + 3)]
                yield image_patch, j, k
    
    def forward_prop(self, image):
        height, width = image.shape
#        print (height, width)
        self.cache = image
        conv_out = np.zeros((height - 2, width - 2, self.num_filters))
        for image_patch, i, j in self.image_region(image):
            conv_out[i, j] = np.sum(image_patch * self.conv_filter, axis=(1,2))
        return conv_out
    
    def back_prop(self, dL_dout, learning_rate):
        dL_dF_params = np.zeros(self.conv_filter.shape)
        for image_patch, i, j in self.image_region(self.cache):
            for k in range(self.num_filters):
                dL_dF_params[k] += image_patch * dL_dout[i, j, k]
        self.conv_filter -= learning_rate * dL_dF_params
        return None

In [221]:
class Max_pool:
    def image_region(self, image):
        new_h = image.shape[0] // 2
        new_w = image.shape[1] // 2
        for i in range(new_h):
            for j in range(new_w):
                image_patch = image[(i * 2) : (i * 2 + 2), (j * 2) : (j * 2 + 2)]
                yield image_patch, i, j
    
    def forward_prop(self, image):
        output = np.zeros((image.shape[0] // 2, image.shape[1] // 2, image.shape[2]))
        self.image = image
        for image_patch, i, j in self.image_region(image):
            output[i, j] = np.amax(image_patch, axis=(0, 1))
        return output
    
    def back_prop(self, dL_dout):
        dL_dmax_pool = np.zeros(self.image.shape)
        for image_patch, i, j in self.image_region(self.image):
            h, w, nf = image_patch.shape
            max_val = np.amax(image_patch, axis = (0, 1))
            for i1 in range(h):
                for j1 in range(w):
                    for k1 in range(nf):
                        if image_patch[i1, j1, k1] == max_val[k1]:
                            dL_dmax_pool[i * 2 + i1, j * 2 + j1, k1] = dL_dout[i, j, k1]
        return dL_dmax_pool

In [222]:
class Softmax:
    def __init__(self, input_len, nodes):
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.biases = np.zeros(nodes)
        
    def forward_prop(self, image):
        self.last_input_shape = image.shape

        image = image.flatten()
        self.last_input = image

        input_len, nodes = self.weights.shape

        totals = np.dot(image, self.weights) + self.biases
        self.last_totals = totals

        exp = np.exp(totals)
        return exp / np.sum(exp, axis=0)

    
    def back_prop(self, d_L_d_out, learn_rate):
        for i, gradient in enumerate(d_L_d_out):
            if gradient == 0:
                continue

            t_exp = np.exp(self.last_totals)
            S = np.sum(t_exp)

            d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
            d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)

            d_t_d_w = self.last_input
            d_t_d_b = 1
            d_t_d_inputs = self.weights

            d_L_d_t = gradient * d_out_d_t

            d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
            d_L_d_b = d_L_d_t * d_t_d_b
            d_L_d_inputs = d_t_d_inputs @ d_L_d_t

            self.weights -= learn_rate * d_L_d_w
            self.biases -= learn_rate * d_L_d_b

            return d_L_d_inputs.reshape(self.last_input_shape)

In [223]:
import keras
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()


conn = Conv_op(8)
pool = Max_pool()
sm = Softmax(13 * 13 * 8, 10)

In [224]:
def forward(image, label):
    out = conn.forward_prop((image / 255) - 0.5)
    out = pool.forward_prop(out)
    out = sm.forward_prop(out)

    loss = -np.log(out[label])
    acc = 1 if np.argmax(out) == label else 0
    return out, loss, acc
#print (y_train[0])
#print (forward(x_train[0], y_train[0]))

In [225]:
def train(im, label, lr = 0.005):
    out, loss, acc = forward(im, label)
    gradient = np.zeros(10)
    gradient[label] = -1 / out[label]
    
    
    gradient = sm.back_prop(gradient, lr)
    gradient = pool.back_prop(gradient)
    gradient = conn.back_prop(gradient, lr)
    return loss, acc

#print (train(x_train[0], y_train[0]))

In [226]:
for epoch in range(5):
    print('--- Epoch %d ---' % (epoch + 1))

    x_train = x_train[:1000]
    y_train = y_train[:1000]
    permutation = np.random.permutation(len(x_train))
    train_images = x_train[permutation]
    train_labels = y_train[permutation]

    loss = 0
    num_correct = 0
    for i, (im, label) in enumerate(zip(train_images, train_labels)):
        if i % 100 == 99:
            print(
                '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
                (i + 1, loss / 100, num_correct)
              )
            loss = 0
            num_correct = 0

        l, acc = train(im, label)
        loss += l
        num_correct += acc

--- Epoch 1 ---
[Step 100] Past 100 steps: Average Loss 2.243 | Accuracy: 18%
[Step 200] Past 100 steps: Average Loss 1.974 | Accuracy: 38%
[Step 300] Past 100 steps: Average Loss 1.549 | Accuracy: 51%
[Step 400] Past 100 steps: Average Loss 1.124 | Accuracy: 64%
[Step 500] Past 100 steps: Average Loss 0.925 | Accuracy: 67%
[Step 600] Past 100 steps: Average Loss 0.682 | Accuracy: 79%
[Step 700] Past 100 steps: Average Loss 0.684 | Accuracy: 78%
[Step 800] Past 100 steps: Average Loss 0.759 | Accuracy: 78%
[Step 900] Past 100 steps: Average Loss 0.821 | Accuracy: 73%
[Step 1000] Past 100 steps: Average Loss 0.753 | Accuracy: 72%
--- Epoch 2 ---
[Step 100] Past 100 steps: Average Loss 0.543 | Accuracy: 85%
[Step 200] Past 100 steps: Average Loss 0.633 | Accuracy: 84%
[Step 300] Past 100 steps: Average Loss 0.730 | Accuracy: 81%
[Step 400] Past 100 steps: Average Loss 0.402 | Accuracy: 87%
[Step 500] Past 100 steps: Average Loss 0.417 | Accuracy: 89%
[Step 600] Past 100 steps: Average Lo

In [227]:
loss = 0
num_correct = 0
prediction = []

def get_label(probs):
    label = 0
    label_prob = probs[0]
    for i in range(1, len(probs)):
        if probs[i] > label_prob:
            label = i
            label_prob = probs[i]
    return label

for im, label in zip(test_images, test_labels):
    out, l, acc = forward(im, label)
    prediction.append(get_label(out))
    loss += l
    num_correct += acc

num_tests = len(test_images)
print('mnist test loss:', loss / num_tests)
print('mnist test accuracy:', num_correct / num_tests)
error_rate = 1 - num_correct / num_tests
print('mnist error rate:', error_rate)

mnist test loss: 0.5208169985731882
mnist test accuracy: 0.827
mnist error rate: 0.17300000000000004


In [228]:
from pandas import *
cm = [[0 for j in range(10)] for i in range(10)]
for p, l in zip(prediction, test_labels):
    cm[l][p] += 1

print(DataFrame(cm))

    0    1   2   3    4   5   6   7   8   9
0  78    0   2   0    1   2   0   0   2   0
1   0  118   1   0    0   3   0   0   4   0
2   0    0  97   3    1   0   1   4  10   0
3   0    0   1  71    1  25   0   1   7   1
4   0    0   0   0  102   1   1   0   2   4
5   2    0   1   0    1  72   1   6   3   1
6   3    0   2   0    9  11  61   0   1   0
7   0    0   6   1    4   1   0  82   2   3
8   2    0   0   0    3   5   0   1  76   2
9   0    0   0   1   13   0   0   6   4  70


In [229]:
import tensorflow as tf
(x_train_fmnist, y_train_fmnist), (x_test_fmnist, y_test_fmnist) = tf.keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [235]:
train_images = x_train_fmnist[:1000]
train_labels = y_train_fmnist[:1000]
test_images = x_test_fmnist[:1000]
test_labels = y_test_fmnist[:1000]

conv = Conv_op(8)                  
pool = Max_pool()                  
softmax = Softmax(13 * 13 * 8, 10) 

def forward(image, label):
    out = conv.forward_prop((image / 255) - 0.5)
    out = pool.forward_prop(out)
    out = softmax.forward_prop(out)

    loss = -np.log(out[label])
    acc = 1 if np.argmax(out) == label else 0
    return out, loss, acc


def train(im, label, lr=.005):
    out, loss, acc = forward(im, label)
    gradient = np.zeros(10)
    gradient[label] = -1 / out[label]

    gradient = softmax.back_prop(gradient, lr)
    gradient = pool.back_prop(gradient)
    gradient = conv.back_prop(gradient, lr)
    return loss, acc


for epoch in range(5):
    print('--- Epoch %d ---' % (epoch + 1))

    permutation = np.random.permutation(len(train_images))
    train_images = train_images[permutation]
    train_labels = train_labels[permutation]

    loss = 0
    num_correct = 0
    for i, (im, label) in enumerate(zip(train_images, train_labels)):
        if i % 100 == 99:
            print(
                '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
                (i + 1, loss / 100, num_correct)
              )
            loss = 0
            num_correct = 0

        l, acc = train(im, label)
        loss += l
        num_correct += acc

--- Epoch 1 ---
[Step 100] Past 100 steps: Average Loss 2.189 | Accuracy: 29%
[Step 200] Past 100 steps: Average Loss 1.763 | Accuracy: 42%
[Step 300] Past 100 steps: Average Loss 1.295 | Accuracy: 53%
[Step 400] Past 100 steps: Average Loss 0.935 | Accuracy: 69%
[Step 500] Past 100 steps: Average Loss 1.062 | Accuracy: 64%
[Step 600] Past 100 steps: Average Loss 0.896 | Accuracy: 71%
[Step 700] Past 100 steps: Average Loss 0.886 | Accuracy: 69%
[Step 800] Past 100 steps: Average Loss 0.986 | Accuracy: 62%
[Step 900] Past 100 steps: Average Loss 0.822 | Accuracy: 72%
[Step 1000] Past 100 steps: Average Loss 0.730 | Accuracy: 77%
--- Epoch 2 ---
[Step 100] Past 100 steps: Average Loss 0.676 | Accuracy: 70%
[Step 200] Past 100 steps: Average Loss 0.923 | Accuracy: 70%
[Step 300] Past 100 steps: Average Loss 0.782 | Accuracy: 75%
[Step 400] Past 100 steps: Average Loss 0.583 | Accuracy: 75%
[Step 500] Past 100 steps: Average Loss 0.691 | Accuracy: 76%
[Step 600] Past 100 steps: Average Lo

In [236]:
loss = 0
num_correct = 0
prediction = []

for im, label in zip(test_images, test_labels):
    out, l, acc = forward(im, label)
    prediction.append(get_label(out))
    loss += l
    num_correct += acc

num_tests = len(test_images)
print('fminst test loss:', loss / num_tests)
print('fmnist test accuracy:', num_correct / num_tests)
error_rate = 1 - num_correct / num_tests
print('fmnist error rate:', error_rate)

cm = [[0 for j in range(10)] for i in range(10)]
for p, l in zip(prediction, test_labels):
    cm[l][p] += 1

print(DataFrame(cm))

fminst test loss: 0.5981562575862959
fmnist test accuracy: 0.776
fmnist error rate: 0.22399999999999998
    0   1   2   3   4   5   6   7   8   9
0  95   0   2   0   0   1   9   0   0   0
1   4  97   0   3   0   0   1   0   0   0
2   4   0  72   0  19   0  16   0   0   0
3  14   4   0  59   4   0  11   0   1   0
4   0   0  15   1  88   0  11   0   0   0
5   0   0   0   0   0  77   0   4   1   5
6  21   0  15   1  16   1  41   0   2   0
7   0   0   0   0   0  10   0  73   0  12
8   0   0   0   0   0   5   4   0  86   0
9   0   0   0   0   0   4   0   3   0  88
