In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
from PIL import Image

In [4]:
# Load the training dataset
train_csv_path = './archive/sign_mnist_train.csv'
train_data = pd.read_csv(train_csv_path)

# Load the testing dataset
test_csv_path = './archive/sign_mnist_test.csv'
test_data = pd.read_csv(test_csv_path)

In [5]:
# Preprocess the data
X_train = train_data.drop("label", axis=1).values.reshape(-1, 28, 28) / 255.0  # Normalize pixel values
y_train = train_data["label"].values

X_test = test_data.drop("label", axis=1).values.reshape(-1, 28, 28) / 255.0
y_test = test_data["label"].values

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


Training data shape: (27455, 28, 28)
Testing data shape: (7172, 28, 28)


In [8]:
import numpy as np

class CustomCNN:
    def __init__(self, input_shape, num_classes, learning_rate=0.001):
        """
        Initialize the Custom CNN.
        Parameters:
        - input_shape: Shape of input data (e.g., (28, 28, 1) for grayscale images).
        - num_classes: Number of output classes for classification.
        - learning_rate: Learning rate for gradient descent.
        """
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.learning_rate = learning_rate

        # Initialize filters for the convolution layer
        self.filters = np.random.randn(8, 3, 3) * 0.1  # 8 filters of size 3x3

        # Calculate the size of the flattened layer after convolution and pooling
        conv_output_size = input_shape[0] - 2  # Subtract 2 because of 3x3 filters
        pooled_output_size = conv_output_size // 2  # Divide by 2 because of 2x2 pooling
        flattened_size = pooled_output_size * pooled_output_size * 8  # 8 filters
        
        # Initialize weights and biases for the fully connected layer
        self.fc_weights = np.random.randn(flattened_size, num_classes) * 0.1
        self.fc_biases = np.zeros((1, num_classes))


    def relu(self, x):
        """Apply ReLU activation."""
        return np.maximum(0, x)

    def relu_derivative(self, x):
        """Derivative of ReLU."""
        return (x > 0).astype(float)

    def softmax(self, x):
        """Apply softmax activation."""
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def convolve(self, image, filters):
        """Perform convolution."""
        h, w = image.shape
        fh, fw = filters.shape[1], filters.shape[2]
        output = np.zeros((h - fh + 1, w - fw + 1, filters.shape[0]))

        for f in range(filters.shape[0]):  # Loop over filters
            for i in range(h - fh + 1):
                for j in range(w - fw + 1):
                    region = image[i:i+fh, j:j+fw]
                    output[i, j, f] = np.sum(region * filters[f])
        return output

    def pool(self, feature_map):
        """Apply max pooling."""
        h, w, c = feature_map.shape
        pooled = np.zeros((h // 2, w // 2, c))

        for k in range(c):  # Loop over channels
            for i in range(0, h, 2):
                for j in range(0, w, 2):
                    pooled[i//2, j//2, k] = np.max(feature_map[i:i+2, j:j+2, k])
        return pooled

    def forward(self, x):
        """Forward pass."""
        # Step 1: Convolution + ReLU
        self.conv_output = self.relu(self.convolve(x, self.filters))

        # Step 2: Pooling
        self.pooled_output = self.pool(self.conv_output)

        # Step 3: Flatten
        self.flattened = self.pooled_output.flatten().reshape(1, -1)

        # Step 4: Fully connected layer + Softmax
        self.fc_output = np.dot(self.flattened, self.fc_weights) + self.fc_biases
        self.probs = self.softmax(self.fc_output)

        return self.probs

    def backward(self, x, y_true):
        """Backward pass."""
        # Convert y_true to one-hot encoding
        y_one_hot = np.zeros((1, self.num_classes))
        y_one_hot[0, y_true] = 1

        # Step 1: Gradient of loss w.r.t. fully connected layer
        grad_fc_output = self.probs - y_one_hot
        grad_fc_weights = np.dot(self.flattened.T, grad_fc_output)
        grad_fc_biases = grad_fc_output

        # Step 2: Backprop through flatten
        grad_flattened = np.dot(grad_fc_output, self.fc_weights.T)
        grad_pooled = grad_flattened.reshape(self.pooled_output.shape)

        # Step 3: Backprop through pooling
        grad_conv_output = np.zeros_like(self.conv_output)
        for k in range(self.pooled_output.shape[2]):  # Loop over channels
            for i in range(0, self.conv_output.shape[0], 2):
                for j in range(0, self.conv_output.shape[1], 2):
                    region = self.conv_output[i:i+2, j:j+2, k]
                    max_value = np.max(region)
                    grad_conv_output[i:i+2, j:j+2, k] += (region == max_value) * grad_pooled[i//2, j//2, k]

        # Step 4: Backprop through ReLU
        grad_conv_output *= self.relu_derivative(self.conv_output)

        # Step 5: Backprop through convolution
        grad_filters = np.zeros_like(self.filters)
        for f in range(self.filters.shape[0]):
            for i in range(x.shape[0] - self.filters.shape[1] + 1):
                for j in range(x.shape[1] - self.filters.shape[2] + 1):
                    region = x[i:i+self.filters.shape[1], j:j+self.filters.shape[2]]
                    grad_filters[f] += region * grad_conv_output[i, j, f]

        # Update parameters
        self.filters -= self.learning_rate * grad_filters
        self.fc_weights -= self.learning_rate * grad_fc_weights
        self.fc_biases -= self.learning_rate * grad_fc_biases

    def train(self, X, y, epochs=10):
        """Train the CNN."""
        for epoch in range(epochs):
            total_loss = 0
            for i in range(len(X)):
                # Forward pass
                probs = self.forward(X[i])

                # Compute loss (cross-entropy)
                loss = -np.log(probs[0, y[i]])
                total_loss += loss

                # Backward pass
                self.backward(X[i], y[i])

            print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(X):.4f}")

    def predict(self, X):
        """Predict the class for input data."""
        predictions = []
        for i in range(len(X)):
            probs = self.forward(X[i])
            predictions.append(np.argmax(probs))
        return np.array(predictions)


In [None]:
# Initialize and train the CNN
cnn = CustomCNN(input_shape=(28, 28), num_classes=25, learning_rate=0.001)  # Set number of classes appropriately
cnn.train(X_train[:1000], y_train[:1000], epochs=10)  # Train on a subset for faster results

# Predict and evaluate
y_pred = cnn.predict(X_test[:200])
accuracy = np.mean(y_pred == y_test[:200])
print(f"Test Accuracy: {accuracy:.2f}")


Epoch 1/10, Loss: 3.2367
