**Data Import and Preprocessing**

In [3]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
from hyperopt import fmin, tpe, hp, Trials
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout
from keras.optimizers import Adam
from keras.datasets import cifar10
import numpy as np
import tensorflow as tf
from hyperopt import space_eval

# Load Data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Name Classes
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Preprocess Data
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)
x_train = x_train / 255.0
x_test = x_test / 255.0
x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)

**Perform Zero-Padding, Kernel and get Feature Map**

In [None]:
def convolve(image, kernel, stride=1, padding=0):
    _, _, channels = image.shape  
    kernel_height, kernel_weight, _ = kernel.shape
    padded_image = np.pad(image, ((padding, padding), (padding, padding), (0, 0)), mode='constant')
    padded_height, padded_weight, _ = padded_image.shape

    feature_map_height = (padded_height - kernel_height) // stride + 1 # floor division
    feature_map_width = (padded_weight - kernel_weight) // stride + 1
    feature_map = np.zeros((feature_map_height, feature_map_width))

    # Apply the Kernel over the Image
    for i in range(feature_map_height):
        for j in range(feature_map_width):
            region = padded_image[i * stride:i * stride + kernel_height, j * stride:j * stride + kernel_weight, :]
            feature_map[i, j] = np.sum(region * kernel)

    return feature_map


**Activation Function -> ReLU**

In [5]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

**Activation Function -> Softmax**

In [6]:
def softmax(z):
    exp_z = np.exp(z - np.max(z))
    return exp_z / np.sum(exp_z)

def softmax_derivative(output, true_label):
    return output - true_label  # Derivative for softmax + cross-entropy

In [7]:
def upsample(grad_pooled, pool_size, original_shape):
    """
    Upsample the gradient from the pooled feature map back to the original size.
    """
    h, w = original_shape
    upsampled = np.zeros(original_shape)
    pooled_h, pooled_w = grad_pooled.shape

    for i in range(pooled_h):
        for j in range(pooled_w):
            start_i, end_i = i * pool_size, (i + 1) * pool_size
            start_j, end_j = j * pool_size, (j + 1) * pool_size
            upsampled[start_i:end_i, start_j:end_j] = grad_pooled[i, j]

    return upsampled


**Max Pooling**

In [8]:
def max_pooling(feature_map, n):
    rows, cols = feature_map.shape
    new_rows = rows // n
    new_cols = cols // n
    new_array = np.zeros((new_rows, new_cols))

    for i in range(new_rows):
        for j in range(new_cols):
            region = feature_map[i * n:(i + 1) * n, j * n:(j + 1) * n]
            new_array[i, j] = np.max(region)

    return new_array



**Flattening Feature Map after MaxPooling**

In [9]:
def flatten(feature_map):
    return feature_map.flatten()


**Dense Layer Implementation**

In [10]:
def dense_layer(inputs, weights, bias, activation=None):
    z = np.dot(weights, inputs) + bias
    if activation == 'relu':
        return relu(z)
    elif activation == 'softmax':
        return softmax(z)
    return z


In [11]:
def backprop_dense(dense_output, dense_weights, dense_bias,  prev_activation, true_label, activation='relu'):
    grad_output = dense_output - true_label
    grad_weights = np.outer(grad_output, prev_activation)
    grad_bias = grad_output
    grad_prev_activation = np.dot(dense_weights.T, grad_output)
    return grad_weights, grad_bias, grad_prev_activation



In [12]:
def backprop_conv(feature_map, kernel, input_image, stride, padding):
    kernel_h, kernel_w, kernel_c = kernel.shape
    grad_kernel = np.zeros_like(kernel)
    grad_input = np.zeros_like(input_image)

    padded_input = np.pad(input_image, ((padding, padding), (padding, padding), (0, 0)), mode='constant')
    grad_padded_input = np.pad(grad_input, ((padding, padding), (padding, padding), (0, 0)), mode='constant')

    for i in range(feature_map.shape[0]):
        for j in range(feature_map.shape[1]):
            for k in range(kernel_c):
                region = padded_input[i * stride:i * stride + kernel_h, j * stride:j * stride + kernel_w, k]
                grad_kernel[:, :, k] += feature_map[i, j] * region
                grad_padded_input[i * stride:i * stride + kernel_h, j * stride:j * stride + kernel_w, k] += kernel[:, :, k] * feature_map[i, j]

    if padding > 0:
        grad_input = grad_padded_input[padding:-padding, padding:-padding, :]
    else:
        grad_input = grad_padded_input

    return grad_kernel, grad_input

**Categorical Cross Entropy**

In [13]:

def categorical_cross_entropy(y_true, y_pred):
    epsilon = 1e-8
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.sum(y_true * np.log(y_pred))


In [None]:

# Training Function
def train_cnn(x_train, y_train, kernel, dense_weights, dense_bias, epochs, learning_rate, padding, pool_size):
    for epoch in range(epochs):
        total_loss = 0
        for i in range(len(x_train)):
            image = x_train[i]
            true_label = y_train[i]

            conv_output = convolve(image, kernel, stride=1, padding=padding)
            relu_output = relu(conv_output)
            pooled_output = max_pooling(relu_output, pool_size)
            flat_output = flatten(pooled_output)
            dense_output = dense_layer(flat_output, dense_weights, dense_bias, activation='softmax')
            loss = -np.sum(true_label * np.log(dense_output + 1e-8))
            total_loss += loss
    
            grad_weights_dense, grad_bias_dense, grad_flat = backprop_dense(dense_output, dense_weights, dense_bias, flat_output, true_label)
            grad_pooled = grad_flat.reshape(pooled_output.shape)

            grad_upsampled = upsample(grad_pooled, pool_size, relu_output.shape)
            grad_relu = relu_derivative(relu_output) * grad_upsampled
            grad_kernel, _ = backprop_conv(grad_relu, kernel, image, stride=1, padding=padding)

            dense_weights -= learning_rate * grad_weights_dense
            dense_bias -= learning_rate * grad_bias_dense
            kernel -= learning_rate * grad_kernel

        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(x_train):.4f}")

kernel = np.random.randn(3, 3, 3) * 0.1  
dense_weights = np.random.randn(10, (32 // 2) * (32 // 2)) * 0.1  
dense_bias = np.random.randn(10) * 0.1

# Hyperparameters
epochs = 5
learning_rate = 0.001
padding = 1
pool_size = 2

# Train the CNN
train_cnn(x_train, y_train, kernel, dense_weights, dense_bias, epochs, learning_rate, padding, pool_size)

Epoch 1, Loss: 2.1457
Epoch 2, Loss: 1.8736
Epoch 3, Loss: 1.8042
