In [None]:
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Preprocess data
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

train_images = train_images[:1000]
train_labels = train_labels[:1000]

class ConvLayer:
    def _init_(self, num_filters, filter_size, input_shape):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.input_shape = input_shape
        self.filters = np.random.randn(num_filters, input_shape[2], filter_size, filter_size) * 0.01
        
    def iterate_regions(self, image):
        h, w = image.shape[0], image.shape[1]
        for i in range(h - self.filter_size + 1):
            for j in range(w - self.filter_size + 1):
                im_region = image[i:i+self.filter_size, j:j+self.filter_size, :]
                yield im_region, i, j
    
    def forward(self, input):
        self.last_input = input
        h, w = input.shape[0], input.shape[1]
        output = np.zeros((h - self.filter_size + 1, w - self.filter_size + 1, self.num_filters))
        for im_region, i, j in self.iterate_regions(input):
            for f in range(self.num_filters):
                output[i, j, f] = np.sum(im_region * self.filters[f], axis=(0, 1, 2))
        return output
    
    def backprop(self, d_L_d_out, learn_rate):
        d_L_d_filters = np.zeros(self.filters.shape)
        
        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                
                d_L_d_filters[f] += np.sum(d_L_d_out[i, j, f] * im_region, axis=0)
                
        self.filters -= learn_rate * d_L_d_filters
        return None



class MaxPoolLayer:
    def iterate_regions(self, image):
        h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2
        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                yield im_region, i, j
    
    def forward(self, input):
        self.last_input = input
        h, w, num_filters = input.shape
        output = np.zeros((h // 2, w // 2, num_filters))
        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.amax(im_region, axis=(0, 1))
        return output
    
    def backprop(self, d_L_d_out):
        d_L_d_input = np.zeros(self.last_input.shape)
        for im_region, i, j in self.iterate_regions(self.last_input):
            h, w, f = im_region.shape
            amax = np.amax(im_region, axis=(0, 1))
            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                        if im_region[i2, j2, f2] == amax[f2]:
                            d_L_d_input[i * 2 + i2, j * 2 + j2, f2] = d_L_d_out[i, j, f2]
        return d_L_d_input


class Softmax:
    def _init_(self, input_len, nodes):
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.biases = np.zeros(nodes)
        
    def forward(self, input):
        self.last_input_shape = input.shape
        input = input.flatten()
        self.last_input = input
        totals = np.dot(input, self.weights) + self.biases
        self.last_totals = totals
        exp = np.exp(totals)
        return exp / np.sum(exp, axis=0)
    
    def backprop(self, d_L_d_out, learn_rate):
        for i, gradient in enumerate(d_L_d_out):
            if gradient == 0:
                continue
            exp_totals = np.exp(self.last_totals)
            S = np.sum(exp_totals)
            d_out_d_t = -exp_totals[i] * exp_totals / (S ** 2)
            d_out_d_t[i] = exp_totals[i] * (S - exp_totals[i]) / (S ** 2)
            d_t_d_w = self.last_input
            d_t_d_b = 1
            d_t_d_inputs = self.weights
            d_L_d_t = gradient * d_out_d_t
            d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
            d_L_d_b = d_L_d_t * d_t_d_b
            d_L_d_inputs = d_t_d_inputs @ d_L_d_t
            self.weights -= learn_rate * d_L_d_w
            self.biases -= learn_rate * d_L_d_b
            return d_L_d_inputs.reshape(self.last_input_shape)


class CNN:
    def _init_(self):
        self.conv1 = ConvLayer(8, 3, (28, 28, 1))
        self.pool1 = MaxPoolLayer()
        self.softmax = Softmax(13 * 13 * 8, 10)
    
    def forward(self, image, label):
        conv_out = self.conv1.forward((image / 255) - 0.5)
        pooled_out = self.pool1.forward(conv_out)
        softmax_out = self.softmax.forward(pooled_out)
        loss = -np.log(softmax_out[label])
        acc = 1 if np.argmax(softmax_out) == label else 0
        return softmax_out, loss, acc
    
    def train(self, image, label, lr=0.0010):
        softmax_out, loss, acc = self.forward(image, label)
        gradient = np.zeros(10)
        gradient[label] = -1 / softmax_out[label]
        gradient = self.softmax.backprop(gradient, lr)
        gradient = self.pool1.backprop(gradient)
        gradient = self.conv1.backprop(gradient, lr)
        return loss, acc

cnn = CNN()

for epoch in range(3):
    print('Epoch:', epoch + 1)
    permutation = np.random.permutation(len(train_images))
    train_images = train_images[permutation]
    train_labels = train_labels[permutation]

    loss = 0
    num_correct = 0

    for i, (image, label) in enumerate(zip(train_images, train_labels)):
        l, acc = cnn.train(image, np.argmax(label))
        loss += l
        num_correct += acc

        if i % 100 == 99:
            print('[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' % (i + 1, loss / 100, num_correct))
            loss = 0
            num_correct = 0

In [5]:
filter=np.random.randn(3, 3, 3)/9
d_l_d_filters = np.zeros(filter.shape)
d_l_d_filters.shape

(3, 3, 3)