<a href="https://colab.research.google.com/github/sidhu2690/ai-from-scratch/blob/main/03_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import numpy as np

In [26]:
class Conv2D:
    def __init__(self, num_filters, filter_size):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.filters = np.random.randn(num_filters, filter_size, filter_size) / np.sqrt(filter_size * filter_size)
        self.bias = np.zeros(num_filters)

    def forward(self, input):
        self.last_input = input
        h, w = input.shape
        f = self.filter_size

        output = np.zeros((h - f + 1, w - f + 1, self.num_filters))

        for i in range(h - f + 1):
            for j in range(w - f + 1):
                region = input[i:i+f, j:j+f]
                for k in range(self.num_filters):
                    output[i, j, k] = np.sum(region * self.filters[k]) + self.bias[k]

        return output

    def backward(self, d_out, lr):
        h, w = self.last_input.shape
        f = self.filter_size

        d_filters = np.zeros_like(self.filters)
        d_bias = np.zeros_like(self.bias)
        d_input = np.zeros_like(self.last_input)

        for i in range(h - f + 1):
            for j in range(w - f + 1):
                region = self.last_input[i:i+f, j:j+f]

                for k in range(self.num_filters):
                    d_filters[k] += d_out[i, j, k] * region
                    d_bias[k] += d_out[i, j, k]
                    d_input[i:i+f, j:j+f] += d_out[i, j, k] * self.filters[k]

        self.filters -= lr * d_filters
        self.bias -= lr * d_bias

        return d_input

In [27]:
class AvgPool2D:
    def __init__(self, pool_size=2):
        self.pool_size = pool_size

    def forward(self, input):
        self.last_input = input
        h, w, c = input.shape
        p = self.pool_size

        out_h = h // p
        out_w = w // p

        output = np.zeros((out_h, out_w, c))

        for i in range(out_h):
            for j in range(out_w):
                for k in range(c):
                    region = input[i*p:(i+1)*p, j*p:(j+1)*p, k]
                    output[i, j, k] = np.mean(region)

        return output

    def backward(self, d_out):
        h, w, c = self.last_input.shape
        p = self.pool_size

        d_input = np.zeros_like(self.last_input)

        out_h = h // p
        out_w = w // p

        for i in range(out_h):
            for j in range(out_w):
                for k in range(c):
                    avg_grad = d_out[i, j, k] / (p * p)
                    d_input[i*p:(i+1)*p, j*p:(j+1)*p, k] = avg_grad

        return d_input

In [28]:
class ReLU:
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, d_out):
        return d_out * (self.x > 0)


In [29]:
class Dense:
    def __init__(self, in_dim, out_dim):
        self.W = np.random.randn(out_dim, in_dim) * np.sqrt(2.0 / in_dim)
        self.b = np.zeros((out_dim, 1))

    def forward(self, x):
        self.x = x
        return self.W @ x + self.b

    def backward(self, d_out, lr):
        dW = d_out @ self.x.T
        db = d_out
        dx = self.W.T @ d_out

        self.W -= lr * dW
        self.b -= lr * db

        return dx

In [31]:
def softmax(x):
    x = x - np.max(x)
    e = np.exp(x)
    return e / np.sum(e)


def cross_entropy(pred, label):
    return -np.log(pred[label, 0] + 1e-9)

In [32]:
conv = Conv2D(8, 3)
relu = ReLU()
pool = AvgPool2D(2)
dense = Dense(13 * 13 * 8, 10)

In [33]:
def forward(image):
    out = conv.forward(image)
    out = relu.forward(out)
    out = pool.forward(out)
    out = out.reshape(-1, 1)
    out = dense.forward(out)
    out = softmax(out)
    return out


def backward(pred, label, lr):
    d_out = pred.copy()
    d_out[label] -= 1

    d_out = dense.backward(d_out, lr)
    d_out = d_out.reshape(13, 13, 8)
    d_out = pool.backward(d_out)
    d_out = relu.backward(d_out)
    conv.backward(d_out, lr)

In [34]:
from keras.datasets import mnist

(trainX, trainY), (testX, testY) = mnist.load_data()
trainX = trainX / 255.0
testX = testX / 255.0

lr = 0.005

In [35]:
for epoch in range(3):
    loss = 0
    correct = 0

    indices = np.random.permutation(len(trainX))[:1000]

    for i in indices:
        x = trainX[i]
        y = trainY[i]

        pred = forward(x)
        loss += cross_entropy(pred, y)

        if np.argmax(pred) == y:
            correct += 1

        backward(pred, y, lr)

    print(f"Epoch {epoch+1}, Loss: {loss/len(indices):.4f}, Acc: {correct/len(indices):.4f}")

Epoch 1, Loss: 1.1973, Acc: 0.6680
Epoch 2, Loss: 0.6299, Acc: 0.8080
Epoch 3, Loss: 0.4861, Acc: 0.8440
