In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from PIL import Image

In [3]:
# Load the training dataset
train_csv_path = './archive/sign_mnist_train.csv'
train_data = pd.read_csv(train_csv_path)

# Load the testing dataset
test_csv_path = './archive/sign_mnist_test.csv'
test_data = pd.read_csv(test_csv_path)

In [4]:
# Separate features and labels for training
X_train = train_data.iloc[:, 1:].values  # Pixel values
Y_train = train_data.iloc[:, 0].values  # Labels

# Separate features and labels for testing
X_test = test_data.iloc[:, 1:].values
Y_test = test_data.iloc[:, 0].values

In [5]:
# Normalize the pixel values (scale to 0-1)
X_train = X_train / 255.0
X_test = X_test / 255.0

In [10]:

# Helper functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stability trick
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

def cross_entropy_derivative(y_true, y_pred):
    return y_pred - y_true

# Convolutional Layer
class ConvLayer:
    def __init__(self, num_filters, filter_size):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.filters = np.random.randn(num_filters, filter_size, filter_size) * 0.1  # Initialize filters

    def forward(self, X):
        self.input = X
        h, w = X.shape[1], X.shape[2]
        output_height = h - self.filter_size + 1
        output_width = w - self.filter_size + 1

        self.output = np.zeros((X.shape[0], self.num_filters, output_height, output_width))
        for i in range(self.num_filters):
            for j in range(output_height):
                for k in range(output_width):
                    region = X[:, j:j + self.filter_size, k:k + self.filter_size]
                    self.output[:, i, j, k] = np.sum(region * self.filters[i], axis=(1, 2))

        return self.output

    def backward(self, dL_dout, learning_rate):
        dL_dfilters = np.zeros_like(self.filters)
        for i in range(self.num_filters):
            for j in range(dL_dout.shape[2]):
                for k in range(dL_dout.shape[3]):
                    region = self.input[:, j:j + self.filter_size, k:k + self.filter_size]
                    dL_dfilters[i] += np.sum(dL_dout[:, i, j, k][:, None, None] * region, axis=0)

        # Update filters
        self.filters -= learning_rate * dL_dfilters
        return None  # Pooling and later layers will handle further backpropagation

# MaxPooling Layer
class MaxPoolLayer:
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward(self, X):
        self.input = X
        h, w = X.shape[2], X.shape[3]
        output_height = h // self.pool_size
        output_width = w // self.pool_size

        self.output = np.zeros((X.shape[0], X.shape[1], output_height, output_width))
        for i in range(output_height):
            for j in range(output_width):
                region = X[:, :, i * self.pool_size:(i + 1) * self.pool_size,
                            j * self.pool_size:(j + 1) * self.pool_size]
                self.output[:, :, i, j] = np.max(region, axis=(2, 3))

        return self.output

    def backward(self, dL_dout):
        return None  # Not required in this simplified implementation

# Fully Connected Layer
class FullyConnectedLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.1
        self.biases = np.zeros(output_size)

    def forward(self, X):
        self.input = X
        return np.dot(X, self.weights) + self.biases

    def backward(self, dL_dout, learning_rate):
        dL_dweights = np.dot(self.input.T, dL_dout)
        dL_dbiases = np.sum(dL_dout, axis=0)

        # Update weights and biases
        self.weights -= learning_rate * dL_dweights
        self.biases -= learning_rate * dL_dbiases

        # Return gradient for the next layer
        return np.dot(dL_dout, self.weights.T)

# CNN Model
class CNN:
    def __init__(self):
        self.conv1 = ConvLayer(num_filters=8, filter_size=3)
        self.pool1 = MaxPoolLayer(pool_size=2)
        self.fc = FullyConnectedLayer(input_size=13 * 13 * 8, output_size=25)

    def forward(self, X):
        X = self.conv1.forward(X)
        X = relu(X)
        X = self.pool1.forward(X)
        X = X.reshape(X.shape[0], -1)  # Flatten
        X = self.fc.forward(X)
        return softmax(X)

    def backward(self, X, y, y_pred, learning_rate):
        dL_dout = cross_entropy_derivative(y, y_pred)
        dL_dout = self.fc.backward(dL_dout, learning_rate)
        # Skipping backpropagation through pool1 and conv1 for simplicity

    def train(self, X_train, y_train, epochs, learning_rate):
        for epoch in range(epochs):
            y_pred = self.forward(X_train)
            loss = cross_entropy_loss(y_train, y_pred)
            print(f"Epoch {epoch + 1}, Loss: {loss:.4f}")
            self.backward(X_train, y_train, y_pred, learning_rate)

def preprocess_data(X, Y, one_hot=True):
    # Normalize pixel values to [0, 1]
    X = X / 255.0
    # Reshape for CNN
    X = X.reshape(-1, 28, 28)
    
    if not one_hot:  # Perform one-hot encoding only if required
        # Ensure Y is integer type if not one-hot encoded
        Y = Y.astype(int)
        # One-hot encode labels
        num_classes = np.max(Y) + 1
        Y_one_hot = np.zeros((Y.size, num_classes))
        Y_one_hot[np.arange(Y.size), Y] = 1
        return X, Y_one_hot
    else:
        # If Y is already one-hot encoded, return it as-is
        return X, Y





In [12]:
# Main Script
if __name__ == "__main__":
    # Preprocess test data
    X_test, Y_test = preprocess_data(X_test, Y_test, one_hot=True)  # Adjust `one_hot` flag as needed
    Y_pred = cnn.forward(X_test)

    # Check accuracy based on the format of Y_test
    if Y_test.ndim == 2:  # One-hot encoded
        accuracy = np.mean(np.argmax(Y_pred, axis=1) == np.argmax(Y_test, axis=1))
    else:  # Not one-hot encoded
        accuracy = np.mean(np.argmax(Y_pred, axis=1) == Y_test)

    print(f"Test Accuracy: {accuracy * 100:.2f}%")

    print("Shape of Y_test:", Y_test.shape)
    print("Shape of Y_pred:", Y_pred.shape)




Test Accuracy: 3.72%
Shape of Y_test: (7172,)
Shape of Y_pred: (7172, 25)
