In [102]:
import numpy as np
np.random.seed(98)

In [62]:
class Conv:
    def __init__(self, num_filters):
        self.num_filters = num_filters
        self.filter = np.random.rand(num_filters, 3, 3)*np.sqrt(1/(3*3*num_filters))       #Xavier Initialization

    def iterate_regions(self, image):
        h, w = image.shape
        
        for i in range(h-2):
            for j in range(w-2):
                im_region = image[i:(i+3), j:(j+3)]
                yield im_region, i, j

    def forward(self,input):
        self.last_input = input
        h,w = input.shape
        output = np.zeros((h-2, w-2, self.num_filters))

        for im_regions, i, j in self.iterate_regions(input):
            output[i,j] = np.sum(im_regions*self.filter, axis = (1,2))
            return output
        
    def backprop(self, d_l_d_out, learnrate):
        #d-l-d -->> loss gradient for layer's output
        d_l_d_filters = np.zeros(self.filter.shape)

        for im_regions, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                d_l_d_filters[f] += d_l_d_out[i, j, f]*im_regions

        self.filter -= learnrate * d_l_d_filters
        return None


In [63]:
class Max_Pool:
    def iterate_regions(self, image):
        h,w, d = image.shape

        new_h = h//2
        new_w = w//2

        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i*2):(i*2 + 2), (j*2):(j*2 + 2)]
                yield im_region, i, j

    
    def forward(self,input):
        self.last_input = input
        h, w, num_filters = input.shape
        output = np.zeros((h//2, w//2, num_filters))

        for im_region, i, j in self.iterate_regions(input):
            output[i,j] = np.amax(im_region, axis = (0,1))

        return output
    
    def backprop(self, d_l_d_out):
        d_l_d_input = np.zeros(self.last_input.shape)

        for im_region, i, j in self.iterate_regions(self.last_input):
            h, w, f = im_region.shape
            max = np.amax(im_region, axis = (0, 1))

            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                        if im_region[i2, j2, f2] == max[f2]:
                            d_l_d_input[i*2+i2, j*2+j2, f2] = d_l_d_out[i, j, f2]
                            break

        return d_l_d_input

In [64]:
class Softmax:
    def __init__(self, input_len, nodes):
        self.weights = np.random.randn(input_len, nodes)/input_len
        self.biases = np.zeros(nodes)


    def forward(self, input):
        self.last_input_shape = input.shape
        input = input.flatten()
        self.last_input = input

        input_len, nodes = self.weights.shape

        totals = np.dot(input , self.weights) + self.biases
        self.last_totals = totals

        exp = np.exp(totals)
        return(exp/np.sum(exp, axis = 0))
    

    def backprop(self, dl_dout, learn_rate):
        for i, grad in enumerate(dl_dout):
            if grad == 0:
                continue
            
            t_exp = np.exp(self.last_totals)
            S = np.sum(t_exp)

            dout_dt = -t_exp[i]*t_exp/(S**2)
            dout_dt[i] = t_exp[i]*(S-t_exp[i])/(S**2)

            dt_dw = self.last_input
            dt_db = 1
            dt_dinput = self.weights

            dl_dt = grad*dout_dt

            dl_dw = dt_dw[np.newaxis].T @ dl_dt[np.newaxis]
            dl_db = dl_dt * dt_db
            dl_dinputs = dt_dinput @ dl_dt

            self.weights -= learn_rate*dl_dw
            self.biases -= learn_rate * dl_db

            return dl_dinputs.reshape(self.last_input_shape)


In [65]:
import mnist


In [66]:
from tensorflow.keras.datasets import mnist

In [87]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [88]:
x_train = x_train[:1000]
y_train = y_train[:1000]
x_test = x_test[:1000]
y_test = y_test[:1000]

In [90]:
import matplotlib.pyplot as plt

In [73]:
conv = Conv(8)
pool = Max_Pool()
soft = Softmax(13*13*8, 10)     #nodes --> probable number of classes

def forward(img, label):
    out = conv.forward((img/255) -0.5)
    out = pool.forward(out)
    out = soft.forward(out)

    loss = -np.log(out[label])
    acc = 1 if (np.argmax(out) == label)    else 0

    return out, loss, acc
    
def train(img, label, alpha = 0.005):
    out, loss, acc = forward(img, label)
    grad = np.zeros(10)
    grad[label] = -1/out[label]

    grad = soft.backprop(grad, alpha)
    grad = pool.backprop(grad)
    grad = conv.backprop(grad, alpha)

    return loss, acc


In [106]:
for epoch in range(3):
    print('EPOCH %d' %(epoch+1))

    perm = np.random.permutation(len(x_train))
    x_train = x_train[perm]
    y_train = y_train[perm]

    loss = 0
    num_correct = 0

    for i, (img, label) in enumerate(zip(x_train, y_train)):
        if i>0 and i % 100 == 99:
            print('[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %(i+1, loss/100, num_correct))
            loss = 0
            num_correct = 0
            l, acc = train(img, label)
            loss+=1
            num_correct += acc


EPOCH 1
[Step 100] Past 100 steps: Average Loss 0.000 | Accuracy: 0%
[Step 200] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 300] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 400] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 500] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 600] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 700] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 800] Past 100 steps: Average Loss 0.010 | Accuracy: 1%
[Step 900] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 1000] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
EPOCH 2
[Step 100] Past 100 steps: Average Loss 0.000 | Accuracy: 0%
[Step 200] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 300] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 400] Past 100 steps: Average Loss 0.010 | Accuracy: 1%
[Step 500] Past 100 steps: Average Loss 0.010 | Accuracy: 0%
[Step 600] Past 100 steps: Average Loss 0.010 | Accuracy: 1%
[Step 7