# Convolutional Layer

In [None]:
import numpy as np, sys
np.random.seed(1)
from keras.datasets import mnist

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Prepare training data: take first 1000 images, flatten and normalize
images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

# Convert labels to one-hot encoding
one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

# Prepare test data: flatten and normalize images, convert labels to one-hot
test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

# Activation function: hyperbolic tangent
def tanh(x):
    return np.tanh(x)

# Derivative of tanh for backpropagation
def tanh2deriv(output):
    return 1 - (output ** 2)

# Softmax activation function for output layer
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

# Hyperparameters
alpha, iterations = (2, 300)  # Learning rate and number of training iterations
pixels_per_image, num_labels = (784, 10)  # Input size and number of classes
batch_size = 128  # Number of samples per batch

# Input image dimensions
input_rows = 28
input_cols = 28

# Convolutional kernel dimensions
kernel_rows = 3
kernel_cols = 3
num_kernels = 16

# Calculate size of hidden layer after convolution
hidden_size = ((input_rows - kernel_rows) * (input_cols - kernel_cols)) * num_kernels

# Initialize weights
kernels = 0.02*np.random.random((kernel_rows*kernel_cols, num_kernels))-0.01
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

# Function to extract a section of the image for convolution
def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[:,row_from:row_to,col_from:col_to]
    return section.reshape(-1,1,row_to-row_from, col_to-col_from)