In [None]:
# import necessary libraries

import numpy as np
from keras.datasets import mnist
from PIL import Image

In [None]:
# load & pre-process MNIST dataset

def load_data():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0  # normalize to [0, 1]
    return x_train, y_train, x_test, y_test

# convert labels to one-hot encoding
def one_hot_encode(labels, num_classes=10):
    return np.eye(num_classes)[labels]

In [None]:
# utility functions

# sigmoid activation function
def sigmoid(x):
    x = np.clip(x, -500, 500)  # clamp to prevent overflow
    return 1 / (1 + np.exp(-x))

# sigmoid derivative
def sigmoid_derivative(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

# softmax activation function
def softmax(x):
    if x.ndim == 1:  # for 1D arrays
        exps = np.exp(x - np.max(x))
        return exps / np.sum(exps)
    elif x.ndim == 2:  # for 2D arrays
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)

# forward pass for convolution
def conv2d(input, kernel):
    h, w = input.shape
    kh, kw = kernel.shape
    output = np.zeros((h - kh + 1, w - kw + 1))
    for i in range(output.shape[0]):
        for j in range(output.shape[1]):
            output[i, j] = np.sum(input[i:i+kh, j:j+kw] * kernel)
    return output

# average pooling
def avg_pooling(input, pool_size, stride):
    h, w = input.shape
    ph, pw = pool_size
    output = []
    for i in range(0, h - ph + 1, stride):
        row = []
        for j in range(0, w - pw + 1, stride):
            region = input[i:i+ph, j:j+pw]
            row.append(np.mean(region))
        output.append(row)
    return np.array(output)

# flatten function
def flatten(input):
    return input.flatten()

# cross-entropy loss
def cross_entropy_loss(y_pred, y_true):
    n = y_pred.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-15)) / n

In [None]:
# implement CNN architecture

# initialize weights for CNN
def initialize_weights():
    np.random.seed(0)
    conv_kernel = np.random.randn(2, 3, 3) * 0.1  # two kernels of size 3x3
    fc_weights = np.random.randn(10, 2 * 13 * 13) * 0.1  # fully connected weights
    return conv_kernel, fc_weights

# forward propagation
def forward_propagation(x, conv_kernel, fc_weights):
    conv_outputs = [sigmoid(conv2d(x, kernel)) for kernel in conv_kernel]
    pooled_outputs = [avg_pooling(output, (2, 2), 2) for output in conv_outputs]
    flattened = np.concatenate([output.flatten() for output in pooled_outputs])  # concatenate flattened pooled outputs
    logits = np.dot(fc_weights, flattened)  # fully connected layer
    predictions = softmax(logits)  # apply softmax to logits
    return predictions, conv_outputs, pooled_outputs, flattened, logits

# backward propagation
def backward_propagation(x, y_true, conv_outputs, pooled_outputs, flattened, logits, conv_kernel, fc_weights, lr=0.01):
    # gradient of softmax + cross-entropy
    softmax_grad = softmax(logits) - y_true

    # Gradients for fully connected layer
    fc_grad = np.outer(softmax_grad, flattened)

    # gradients for convolutional kernels
    kernel_grads = np.zeros_like(conv_kernel)
    for i, kernel in enumerate(conv_kernel):
        pooled_grad = sigmoid_derivative(conv_outputs[i])  # backprop through pooling & activation
        kernel_grads[i] = conv2d(x, pooled_grad)

    # update weights
    fc_weights -= lr * fc_grad
    conv_kernel -= lr * kernel_grads

# training loop
def train(x_train, y_train, conv_kernel, fc_weights, epochs=10, lr=0.01):
    for epoch in range(epochs):
        loss = 0
        for i in range(len(x_train)):
            x = x_train[i]
            y = y_train[i]
            y_true = one_hot_encode(np.array([y]))
            predictions, conv_outputs, pooled_outputs, flattened, logits = forward_propagation(x, conv_kernel, fc_weights)
            loss += cross_entropy_loss(predictions, y_true)
            backward_propagation(x, y_true, conv_outputs, pooled_outputs, flattened, logits, conv_kernel, fc_weights, lr)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss / len(x_train)}")

# evaluation function
def evaluate(x_test, y_test, conv_kernel, fc_weights):
    correct = 0
    for i in range(len(x_test)):
        x = x_test[i]
        y = y_test[i]
        predictions, *_ = forward_propagation(x, conv_kernel, fc_weights)
        if np.argmax(predictions) == y:
            correct += 1
    accuracy = correct / len(x_test)
    print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
# train & view results of CNN

x_train, y_train, x_test, y_test = load_data()
y_train_one_hot = one_hot_encode(y_train)

conv_kernel, fc_weights = initialize_weights()

train(x_train, y_train, conv_kernel, fc_weights, epochs=10, lr=0.01)
evaluate(x_test, y_test, conv_kernel, fc_weights)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10, Loss: 0.05028540449492675
Epoch 2/10, Loss: 0.038176612262655424
Epoch 3/10, Loss: 0.03637649503355718
Epoch 4/10, Loss: 0.03547008456537964
Epoch 5/10, Loss: 0.03490383922425014
Epoch 6/10, Loss: 0.034509492658570236
Epoch 7/10, Loss: 0.03421602293758765
Epoch 8/10, Loss: 0.0339876292365444
Epoch 9/10, Loss: 0.033804064069005886
Epoch 10/10, Loss: 0.03365290203624081
Accuracy: 88.19%
