In [15]:
import tensorflow as tf
import random
import math

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train, x_test = x_train / 255.0, x_test / 255.0

def one_hot_encode(labels, num_classes):
    return [[1 if i == label else 0 for i in range(num_classes)] for label in labels]

y_train = one_hot_encode(y_train, 10)
y_test = one_hot_encode(y_test, 10)

In [22]:
import math
import random


def matrix_multiply(A, B):
    result = [[0] * len(B[0]) for _ in range(len(A))]
    for i in range(len(A)):
        for j in range(len(B[0])):
            for k in range(len(B)):
                result[i][j] += A[i][k] * B[k][j]
    return result


def add_matrices(A, B):
    result = [[A[i][j] + B[i][j] for j in range(len(A[0]))] for i in range(len(A))]
    return result


def relu(x):
    return max(0, x)


def relu_derivative(x):
    return 1 if x > 0 else 0


def softmax(x):
    max_x = max(x)
    exps = [math.exp(i - max_x) for i in x]
    sum_exps = sum(exps)
    return [j / sum_exps for j in exps]


def convolve2d(image, kernel, stride=1, padding=0):
    kernel_height, kernel_width = len(kernel), len(kernel[0])
    image_height, image_width = len(image), len(image[0])

    output_height = (image_height - kernel_height + 2 * padding) // stride + 1
    output_width = (image_width - kernel_width + 2 * padding) // stride + 1
    output = [[0] * output_width for _ in range(output_height)]

    padded_image = [[0] * (image_width + 2 * padding) for _ in range(image_height + 2 * padding)]
    for i in range(image_height):
        for j in range(image_width):
            padded_image[i + padding][j + padding] = image[i][j]

    for y in range(0, output_height):
        for x in range(0, output_width):
            for i in range(kernel_height):
                for j in range(kernel_width):
                    output[y][x] += padded_image[y * stride + i][x * stride + j] * kernel[i][j]
    return output


class ConvLayer:
    def __init__(self, num_filters, kernel_size, stride=1, padding=0):
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.kernels = [[[random.random() for _ in range(kernel_size)] for _ in range(kernel_size)] for _ in
                        range(num_filters)]
        self.biases = [random.random() for _ in range(num_filters)]

    def forward(self, input_image):
        self.input_image = input_image
        self.output = []
        for i in range(self.num_filters):
            convolved_image = convolve2d(input_image, self.kernels[i], self.stride, self.padding)
            self.output.append([[relu(value + self.biases[i]) for value in row] for row in convolved_image])
        return self.output

    def backward(self, d_out, learning_rate):
        d_input = [[0 for _ in range(len(self.input_image[0]))] for _ in range(len(self.input_image))]
        d_kernels = [[[0 for _ in range(self.kernel_size)] for _ in range(self.kernel_size)] for _ in
                     range(self.num_filters)]
        d_biases = [0 for _ in range(self.num_filters)]

        for i in range(self.num_filters):
            for y in range(len(d_out[i])):
                for x in range(len(d_out[i][0])):
                    d_biases[i] += d_out[i][y][x] * relu_derivative(self.output[i][y][x])
                    for ky in range(self.kernel_size):
                        for kx in range(self.kernel_size):
                            if y * self.stride + ky < len(self.input_image) and x * self.stride + kx < len(
                                    self.input_image[0]):
                                d_kernels[i][ky][kx] += d_out[i][y][x] * relu_derivative(self.output[i][y][x]) * \
                                                        self.input_image[y * self.stride + ky][x * self.stride + kx]
                                d_input[y * self.stride + ky][x * self.stride + kx] += d_out[i][y][x] * relu_derivative(
                                    self.output[i][y][x]) * self.kernels[i][ky][kx]

        for i in range(self.num_filters):
            for ky in range(self.kernel_size):
                for kx in range(self.kernel_size):
                    self.kernels[i][ky][kx] -= learning_rate * d_kernels[i][ky][kx]
            self.biases[i] -= learning_rate * d_biases[i]

        return d_input


class PoolingLayer:
    def __init__(self, pool_size, stride):
        self.pool_size = pool_size
        self.stride = stride

    def forward(self, input_image):
        self.input_image = input_image
        input_height, input_width = len(input_image), len(input_image[0])
        output_height = (input_height - self.pool_size) // self.stride + 1
        output_width = (input_width - self.pool_size) // self.stride + 1
        self.output = [[0] * output_width for _ in range(output_height)]
        self.mask = [[[[0] * self.pool_size for _ in range(self.pool_size)] for _ in range(output_width)] for _ in
                     range(output_height)]

        for y in range(output_height):
            for x in range(output_width):
                pool_region = [
                    (input_image[y * self.stride + i][x * self.stride + j], i, j)
                    for i in range(self.pool_size) for j in range(self.pool_size)
                ]
                max_value, max_i, max_j = max(pool_region)
                self.output[y][x] = max_value
                self.mask[y][x][max_i][max_j] = 1
        return self.output

    def backward(self, d_out):
        d_input = [[0 for _ in range(len(self.input_image[0]))] for _ in range(len(self.input_image))]

        for y in range(len(d_out)):
            for x in range(len(d_out[0])):
                for i in range(self.pool_size):
                    for j in range(self.pool_size):
                        if self.mask[y][x][i][j] == 1:
                            d_input[y * self.stride + i][x * self.stride + j] += d_out[y][x]

        return d_input


class FullyConnectedLayer:
    def __init__(self, input_size, output_size):
        self.weights = [[random.random() for _ in range(output_size)] for _ in range(input_size)]
        self.biases = [random.random() for _ in range(output_size)]

    def forward(self, input_vector):
        self.input_vector = input_vector
        self.logits = [
            sum(input_vector[j] * self.weights[j][i] for j in range(len(input_vector))) + self.biases[i]
            for i in range(len(self.biases))
        ]
        self.output = softmax(self.logits)
        return self.output

    def backward(self, d_out, learning_rate):
        d_input = [0 for _ in range(len(self.input_vector))]
        d_weights = [[0 for _ in range(len(self.weights[0]))] for _ in range(len(self.weights))]
        d_biases = [0 for _ in range(len(self.biases))]

        for i in range(len(d_out)):
            for j in range(len(self.input_vector)):
                d_weights[j][i] += d_out[i] * self.input_vector[j]
                d_input[j] += d_out[i] * self.weights[j][i]
            d_biases[i] += d_out[i]

        for i in range(len(self.weights)):
            for j in range(len(self.weights[0])):
                self.weights[i][j] -= learning_rate * d_weights[i][j]
        for i in range(len(self.biases)):
            self.biases[i] -= learning_rate * d_biases[i]

        return d_input


class CNN:
    def __init__(self):
        self.conv1 = ConvLayer(8, 3)
        self.pool1 = PoolingLayer(2, 2)
        self.fc1 = FullyConnectedLayer(13 * 13 * 8, 10)

    def forward(self, input_image):
        self.conv_output = self.conv1.forward(input_image)
        self.pooled_output = [self.pool1.forward(fmap) for fmap in self.conv_output]
        self.flattened_output = [value for fmap in self.pooled_output for row in fmap for value in row]
        output = self.fc1.forward(self.flattened_output)
        return output

    def backward(self, d_out, learning_rate):
        d_fc1 = self.fc1.backward(d_out, learning_rate)

        d_fc1 = [d_fc1[i * 13 * 13: (i + 1) * 13 * 13] for i in range(8)]
        d_fc1 = [[d_fc1[i][j * 13: (j + 1) * 13] for j in range(13)] for i in range(8)]

        d_pool1 = [self.pool1.backward(d_fc1[i]) for i in range(8)]

        d_conv1 = d_pool1

        self.conv1.backward(d_conv1, learning_rate)


def cross_entropy_loss(predictions, labels):
    epsilon = 1e-10
    loss = -sum([labels[i] * math.log(predictions[i] + epsilon) for i in range(len(labels))])
    return loss


def train_cnn(cnn, images, labels, epochs, learning_rate):
    for epoch in range(epochs):
        total_loss = 0
        correct_predictions = 0

        for i in range(len(images)):
            predictions = cnn.forward(images[i])

            loss = cross_entropy_loss(predictions, labels[i])
            total_loss += loss

            if predictions.index(max(predictions)) == labels[i].index(1):
                correct_predictions += 1

            d_out = [predictions[j] - labels[i][j] for j in range(len(predictions))]
            cnn.backward(d_out, learning_rate)

        average_loss = total_loss / len(images)
        accuracy = correct_predictions / len(images)

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {average_loss}, Accuracy: {accuracy * 100}%")

cnn = CNN()
train_images, train_labels = x_train[:1000], y_train[:1000]
train_cnn(cnn, train_images, train_labels, epochs=10, learning_rate=0.01)

Epoch 1/10, Loss: 1.7662877784032567, Accuracy: 65.2%
Epoch 2/10, Loss: 0.5974659121399905, Accuracy: 82.6%
Epoch 3/10, Loss: 0.46546614127758545, Accuracy: 86.6%
Epoch 4/10, Loss: 0.3805264049173322, Accuracy: 88.4%
Epoch 5/10, Loss: 0.3133890954707715, Accuracy: 90.10000000000001%
Epoch 6/10, Loss: 0.2559184245755764, Accuracy: 92.2%
Epoch 7/10, Loss: 0.20922362379665335, Accuracy: 93.4%
Epoch 8/10, Loss: 0.16682317388443968, Accuracy: 95.19999999999999%
Epoch 9/10, Loss: 0.13593946695497136, Accuracy: 96.7%
Epoch 10/10, Loss: 0.10865583039963435, Accuracy: 97.0%
