In [None]:
import numpy as np

def convolve_and_pool(input_image, weight_matrix):
    ih, iw = input_image.shape
    wh, ww = weight_matrix.shape
    output_height = ih - wh + 1
    output_width = iw - ww + 1
    output = np.zeros((output_height, output_width))
    for i in range(output_height):
        for j in range(output_width):
            output[i, j] = np.sum(input_image[i:i+wh, j:j+ww] * weight_matrix)
    average_pooling = np.mean(output)
    return output, average_pooling

I = np.array([[1, 2, 2], 
              [1, 3, 4], 
              [1, 0, 0]])

W = np.array([[1, -1], 
              [1, -1], 
              [0, 1]])
convolved_output, average_pooling_result = convolve_and_pool(I, W)

print("Convolved Output:\n", convolved_output)
print("Average Pooling Result:", average_pooling_result)

In [None]:
# Using Keras (TensorFlow runs on backend)



import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.optimizers import Adam
from keras.utils import to_categorical

# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Preprocess data - we will normalize the data so that all values lie in the range [0,1]
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32') / 255
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32') / 255
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Define CNN architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=128, validation_data=(X_test, y_test))

In [None]:
!pip install --upgrade scikit-learn

In [None]:
# Without using Keras or TensorFlow


import numpy as np
from sklearn.datasets import fetch_openml

# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target

# Normalize and reshape data
X = X / 255.0
X = X.reshape(-1, 28, 28, 1)

# Convert labels to one-hot encoding
y = y.astype(int)
num_classes = 10
y_one_hot = np.eye(num_classes)[y]

# Split dataset into training and testing sets
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y_one_hot[:60000], y_one_hot[60000:]

# Define convolution operation
def convolution(image, filt, bias):
    stride = 1
    num_filters, filter_size, _ = filt.shape
    image_size = image.shape[0]
    output_size = (image_size - filter_size) // stride + 1
    output = np.zeros((output_size, output_size, num_filters))
    
    for f in range(num_filters):
        y = 0
        for i in range(0, image_size - filter_size + 1, stride):
            x = 0
            for j in range(0, image_size - filter_size + 1, stride):
                output[y, x, f] = np.sum(image[i:i+filter_size, j:j+filter_size] * filt[f]) + bias[f]
                x += 1
            y += 1
    
    return output

# Define max pooling operation
def max_pooling(image, size=2):
    image_size = image.shape[0]
    output_size = image_size // size
    output = np.zeros((output_size, output_size, image.shape[2]))
    
    for z in range(image.shape[2]):
        y = 0
        for i in range(0, image_size, size):
            x = 0
            for j in range(0, image_size, size):
                output[y, x, z] = np.max(image[i:i+size, j:j+size, z])
                x += 1
            y += 1
    
    return output

# Define ReLU activation function
def relu(x):
    return np.maximum(0, x)

# Define softmax activation function
def softmax(x):
    exp_x = np.exp(x - np.max(x))  # For numerical stability
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Initialize weights and biases
conv1_filters = np.random.randn(3, 3, 1, 32)
conv1_biases = np.zeros(32)
conv2_filters = np.random.randn(3, 3, 32, 64)
conv2_biases = np.zeros(64)
fc_weights = np.random.randn(7*7*64, 10) / (7*7*64)
fc_biases = np.zeros(10)

# Define forward pass through the network
def forward_pass(image):
    # Convolutional Layer 1
    conv1_output = convolution(image, conv1_filters, conv1_biases)
    conv1_output = relu(conv1_output)
    # Max Pooling Layer 1
    pool1_output = max_pooling(conv1_output, size=2)
    
    # Convolutional Layer 2
    conv2_output = convolution(pool1_output, conv2_filters, conv2_biases)
    conv2_output = relu(conv2_output)
    # Max Pooling Layer 2
    pool2_output = max_pooling(conv2_output, size=2)
    
    # Flatten
    flatten_output = pool2_output.reshape((len(image), -1))
    
    # Fully Connected Layer
    fc_output = np.dot(flatten_output, fc_weights) + fc_biases
    
    # Softmax activation
    output = softmax(fc_output)
    
    return output

# Define cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), np.argmax(y_true, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

# Define accuracy function
def accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

# Training loop
learning_rate = 0.01
epochs = 20
batch_size = 128
num_batches = len(X_train) // batch_size

for epoch in range(epochs):
    for i in range(num_batches):
        start = i * batch_size
        end = start + batch_size
        batch_X = X_train[start:end]
        batch_y = y_train[start:end]
        
        # Forward pass
        output = forward_pass(batch_X)
        
        # Compute loss
        loss = cross_entropy_loss(batch_y, output)
        
        # Compute gradients
        grad_fc_output = output - batch_y
        grad_fc_weights = np.dot(flatten_output.T, grad_fc_output)
        grad_fc_biases = np.sum(grad_fc_output, axis=0)
        
        grad_pool2_output = np.dot(grad_fc_output, fc_weights.T)
        grad_pool2_output = grad_pool2_output.reshape(pool2_output.shape)
        
        grad_conv2_output = np.zeros(conv2_output.shape)
        for z in range(conv2_output.shape[2]):
            grad_conv2_output[:, :, z] = np.kron(grad_pool2_output[:, :, z], np.ones((2, 2))) / 4.0
        
        grad_conv2_filters = np.zeros(conv2_filters.shape)
        for f in range(conv2_filters.shape[0]):
            for c in range(conv2_filters.shape[2]):
                grad_conv2_filters[f, :, :, c] = convolution(pool1_output[:, :, c], grad_conv2_output[:, :, f], np.zeros(1))
        
        grad_conv2_biases = np.sum(grad_conv2_output, axis=(0, 1))
        
        grad_pool1_output = np.zeros(pool1_output.shape)
        for z in range(pool1_output.shape[2]):
            grad_pool1_output[:, :, z] = np.kron(grad_conv2_output[:, :, z], np.ones((2, 2))) / 4.0
        
        grad_conv1_output = np.zeros(conv1_output.shape)
        for z in range(conv1_output.shape[2]):
            grad_conv1_output[:, :, z] = convolution(batch_X[:, :, z], grad_pool1_output[:, :, z], np.zeros(1))
        
        grad_conv1_filters = np.zeros(conv1_filters.shape)
        for f in range(conv1_filters.shape[0]):
            for c in range(conv1_filters.shape[2]):
                grad_conv1_filters[f, :, :, c] = convolution(batch_X[:, :, c], grad_conv1_output[:, :, f], np.zeros(1))
        
        grad_conv1_biases = np.sum(grad_conv1_output, axis=(0, 1))
        
        # Update weights and biases
        conv1_filters -= learning_rate * grad

In [None]:
# We will use the same architecture which was used in Question 2 above



from keras.layers import Dropout, BatchNormalization
from keras.regularizers import l1, l2, l1_l2
from keras.callbacks import EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.datasets import mnist

# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Preprocess data
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32') / 255
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32') / 255
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Define CNN architecture with all regularizers
model_regularized = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), kernel_regularizer=l2(0.01)),
    BatchNormalization(),  # Batch Normalization
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l1(0.01)),  # L1 regularization
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01)),  # L1-L2 regularization
    Dropout(0.5),  # Dropout layer
    BatchNormalization(),  # Batch Normalization
    Dense(10, activation='softmax', kernel_regularizer=l1(0.01))  # L1 regularization
])

# Compile the regularized model
model_regularized.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the regularized model with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
model_regularized.fit(X_train, y_train, epochs=20, batch_size=128, 
                      validation_data=(X_test, y_test), callbacks=[early_stopping])

# Data Augmentation
datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, 
                             height_shift_range=0.1, zoom_range=0.1)
datagen.fit(X_train)
model_regularized.fit(datagen.flow(X_train, y_train, batch_size=128), epochs=20, 
                      validation_data=(X_test, y_test), callbacks=[early_stopping])