#Task 1: Data Preprocessing

Imports & Dependencies

In [None]:
from google.colab import drive
drive.mount('/content/drive') #will only run once, ignore remount error after

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense,Flatten,Conv2D,MaxPool2D,Dropout
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import LabelBinarizer

Data Preprocessing

In [None]:
for dirname, _, filenames in os.walk('/content/drive/MyDrive/sign-language-mnist'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

train_df=pd.read_csv('/content/drive/MyDrive/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv')
test_df=pd.read_csv('/content/drive/MyDrive/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv')
train_df.info()
test_df.info()
#train_df.head(10) #run this to check data was properly imported

In [None]:
train_label=train_df['label']
trainset=train_df.drop(['label'],axis=1)
X_train = trainset.values
X_train_std = X_train - np.mean(X_train)
X_train = X_train_std / np.std(X_train_std, axis = 0)
test_label=test_df['label']
X_test=test_df.drop(['label'],axis=1)
X_test_std = X_test - np.mean(X_test)
X_test = X_test_std / np.std(X_test_std, axis = 0)
lb=LabelBinarizer()
y_train=lb.fit_transform(train_label)
y_test=lb.fit_transform(test_label)
X_test= X_test.values.reshape(-1,28,28,1)
X_train = X_train.reshape(-1,28,28,1)
print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)

#Task 2: MLP Implementation

In [None]:
import numpy as np
from IPython.core.debugger import set_trace
import warnings
warnings.filterwarnings('ignore')
from typing import List
from tqdm import tqdm

def softmax(a):
    exp_a = np.exp(a - np.max(a, axis=1, keepdims=True))
    return exp_a / np.sum(exp_a, axis=1, keepdims=True)

def relu(a):
    return np.maximum(0, a)

def relu_derivative(a):
    return np.where(a <= 0, 0, 1)

def sigmoid(a):
    return 1 / (1 + np.exp(-a))

def sigmoid_derivative(a):
    return sigmoid(a) * (1 - sigmoid(a))

def leaky_relu(a):
    return np.where(a > 0, a, a * 0.01)

def leaky_relu_derivative(a):
    return np.where(a > 0, 1, 0.01)

class MLP:
    def __init__(self, input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units, l2_lambda=0.01):
        self.activation_function = activation_function
        self.activation_derivative = activation_derivative
        self.hidden_layers = hidden_layers
        self.layer_units = [input_units] + layer_units + [output_units]
        self.weights = [np.random.randn(self.layer_units[i], self.layer_units[i+1]) * np.sqrt(2. / self.layer_units[i]) for i in range(len(self.layer_units) - 1)]
        self.biases = [np.zeros((1, self.layer_units[i+1])) for i in range(len(self.layer_units) - 1)]
        self.l2_lambda = l2_lambda

    def forward_pass(self, X):
          a = X
          activations = [X]
          zs = []
          for i, (w, b) in enumerate(zip(self.weights, self.biases)):
              z = np.dot(a, w) + b
              zs.append(z)
              if i == len(self.weights) - 1:
                  a = softmax(z)
              else:
                  a = activation_function(z)
              activations.append(a)
          return activations, zs

    def backprop(self, X, y):
        m = X.shape[0]
        activations, zs = self.forward_pass(X)
        delta = activations[-1] - y
        dWs = [(np.dot(activations[-2].T, delta) + (self.l2_lambda * self.weights[-1])) / m]
        dbs = [np.sum(delta, axis=0, keepdims=True) / m]
        for l in range(2, self.hidden_layers + 2):
            delta = np.dot(delta, self.weights[-l+1].T) * self.activation_derivative(zs[-l])
            dW = (np.dot(activations[-l-1].T, delta) + (self.l2_lambda * self.weights[-l])) / m
            db = np.sum(delta, axis=0, keepdims=True) / m
            dWs.insert(0, dW)
            dbs.insert(0, db)
        return dWs, dbs

    def update_params(self, dWs, dbs, lr):
        self.weights = [w-lr*dw for w, dw in zip(self.weights, dWs)]
        self.biases = [b-lr*db for b, db in zip(self.biases, dbs)]

    def compute_loss(self, output, y):
        m = y.shape[0]
        output_clipped = np.clip(output, 1e-7, 1 - 1e-7)
        log_probs = -np.log(output_clipped[range(m), y.argmax(axis=1)])
        data_loss = np.sum(log_probs) / m
        l2_penalty = sum([np.sum(w**2) for w in self.weights]) * (self.l2_lambda / (2*m))
        return data_loss + l2_penalty

    def compute_accuracy(self, output, y):
        predictions = np.argmax(output, axis=1)
        accuracy = np.mean(predictions == np.argmax(y, axis=1))
        return accuracy

    def fit(self, X, y, epochs, lr):
        for epoch in tqdm(range(epochs)):
            dWs, dbs = self.backprop(X, y)
            self.update_params(dWs, dbs, lr)
            if epoch % 10 == 0:
                output = self.forward_pass(X)[0][-1]
                loss = self.compute_loss(output, y)
                accuracy = self.compute_accuracy(output, y)
                print(f'Epoch {epoch}, Loss: {loss}, Accuracy: {accuracy}')

    def fit_plot(self, X, y, epochs, lr):
      for epoch in tqdm(range(epochs)):
          dWs, dbs = self.backprop(X, y)
          self.update_params(dWs, dbs, lr)
          if epoch % 10 == 0:
              output = self.forward_pass(X)[0][-1]
              loss = self.compute_loss(output, y)
              accuracy = self.compute_accuracy(output, y)
              print(f'Epoch {epoch}, Loss: {loss}, Accuracy: {accuracy}')
          if epoch == epochs-1:
            return self.compute_accuracy(output, y)

    def predict(self, X):
        output = self.forward_pass(X)[0][-1]
        return np.argmax(output, axis=1)

#Task 3.1

No Hidden Layers:

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_label)
y_test = lb.transform(test_label)
input_units = X_train.shape[1] * X_train.shape[2]
output_units = y_train.shape[1]
hidden_layers = 0
layer_units = []
activation_function = relu
activation_derivative = relu_derivative

mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units)
epochs = 250
learning_rate = 0.01
mlp.fit(X_train.reshape(X_train.shape[0], -1), y_train, epochs, learning_rate)
y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
accuracy = np.mean(y_pred == test_label.values)

print(f'Test Accuracy: {accuracy}')


One Hidden Layer:

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_label)
y_test = lb.transform(test_label)
input_units = X_train.shape[1] * X_train.shape[2]
output_units = y_train.shape[1]
hidden_layers = 1
layer_units = [256]
activation_function = relu
activation_derivative = relu_derivative

mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units)
epochs = 250
learning_rate = 0.01
mlp.fit(X_train.reshape(X_train.shape[0], -1), y_train, epochs, learning_rate)
y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
accuracy = np.mean(y_pred == test_label.values)

print(f'Test Accuracy: {accuracy}')

Two Hidden Layers:

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_label)
y_test = lb.transform(test_label)
input_units = X_train.shape[1] * X_train.shape[2]
output_units = y_train.shape[1]
hidden_layers = 2
layer_units = [256, 256]
activation_function = relu
activation_derivative = relu_derivative

mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units)
epochs = 250
learning_rate = 0.01
mlp.fit(X_train.reshape(X_train.shape[0], -1), y_train, epochs, learning_rate)
y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
accuracy = np.mean(y_pred == test_label.values)

print(f'Test Accuracy: {accuracy}')

#Task 3.2

Sigmoid Activation:

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_label)
y_test = lb.transform(test_label)
input_units = X_train.shape[1] * X_train.shape[2]
output_units = y_train.shape[1]
hidden_layers = 2
layer_units = [256, 256]
activation_function = sigmoid
activation_derivative = sigmoid_derivative

mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units)
epochs = 250
learning_rate = 0.01
mlp.fit(X_train.reshape(X_train.shape[0], -1), y_train, epochs, learning_rate)
y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
accuracy = np.mean(y_pred == test_label.values)

print(f'Test Accuracy: {accuracy}')

Leaky Relu Activation

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_label)
y_test = lb.transform(test_label)
input_units = X_train.shape[1] * X_train.shape[2]
output_units = y_train.shape[1]
hidden_layers = 2
layer_units = [256, 256]
activation_function = leaky_relu
activation_derivative = leaky_relu_derivative

mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units)
epochs = 250
learning_rate = 0.01
mlp.fit(X_train.reshape(X_train.shape[0], -1), y_train, epochs, learning_rate)
y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
accuracy = np.mean(y_pred == test_label.values)

print(f'Test Accuracy: {accuracy}')

#Task 3.3

Lambda = 0

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_label)
y_test = lb.transform(test_label)
input_units = X_train.shape[1] * X_train.shape[2]
output_units = y_train.shape[1]
hidden_layers = 2
layer_units = [256, 256]
activation_function = relu
activation_derivative = relu_derivative

mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units, 0)
epochs = 250
learning_rate = 0.01
mlp.fit(X_train.reshape(X_train.shape[0], -1), y_train, epochs, learning_rate)
y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
accuracy = np.mean(y_pred == test_label.values)

print(f'Test Accuracy: {accuracy}')

Lambda = 1

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train_label)
y_test = lb.transform(test_label)
input_units = X_train.shape[1] * X_train.shape[2]
output_units = y_train.shape[1]
hidden_layers = 2
layer_units = [256, 256]
activation_function = relu
activation_derivative = relu_derivative

mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units, 1)
epochs = 250
learning_rate = 0.01
mlp.fit(X_train.reshape(X_train.shape[0], -1), y_train, epochs, learning_rate)
y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
accuracy = np.mean(y_pred == test_label.values)

print(f'Test Accuracy: {accuracy}')

#Task 3.4

In [None]:
from tensorflow.keras import regularizers, layers, models
from tensorflow.keras.optimizers import Adam, SGD, RMSprop

num_classes = 24

def build_model(hidden_units, dropout_rate=0.5, l2_rate=1e-4, optimizer='adam', lr=1e-3):
    if optimizer == 'adam':
        opt = Adam(learning_rate=lr)
    elif optimizer == 'sgd':
        opt = SGD(learning_rate=lr)
    elif optimizer == 'rmsprop':
        opt = RMSprop(learning_rate=lr)
    else:
        raise ValueError("Unsupported optimizer")

    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(hidden_units, activation='relu', kernel_regularizer=regularizers.l2(l2_rate)),
        layers.Dropout(dropout_rate),
        layers.Dense(hidden_units, activation='relu', kernel_regularizer=regularizers.l2(l2_rate)),
        layers.Dropout(dropout_rate),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

hidden_units_options = [ 32,64,128,256]
optimizer_options = ['adam', 'sgd', 'rmsprop']

best_accuracy = 0
best_model_config = None
config_accuracies = []

for units in hidden_units_options:
    for optimizer in optimizer_options:
        model = build_model(units, optimizer=optimizer)
        history = model.fit(
            X_train, y_train,
            epochs=25,
            validation_split=0.1,
        )
        _, accuracy = model.evaluate(X_test, y_test)
        print(f"Model with {units} hidden units and {optimizer} optimizer has accuracy: {accuracy:.2f}")

        config_accuracies.append({
            'units': units,
            'optimizer': optimizer,
            'train_acc': history.history['accuracy'],
            'val_acc': history.history['val_accuracy'],
            'test_acc': accuracy
        })

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model_config = (units, optimizer)

# After testing all configurations
print(f"Best performing model configuration: {best_model_config} with accuracy: {best_accuracy:.2f}")


#Task 3.5

In [None]:
mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units)
train_accuracy = mlp.fit_plot(X_train.reshape(X_train.shape[0], -1), y_train, 1000, learning_rate)

y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
test_accuracy = np.mean(y_pred == np.argmax(y_test, axis=1))

print("Hidden Layers: "+str(hidden_layers))
print("Hidden Layer Units: "+str(layer_units))
print("Activation Function: ReLU")
print("Training Accuracy: "+str(train_accuracy)+" Testing Accuracy: "+str(test_accuracy))

#Task 3.6

MLP

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer

def train_and_evaluate_mlp(epochs):
    mlp = MLP(input_units, output_units, activation_function, activation_derivative, hidden_layers, layer_units)
    train_accuracy = mlp.fit_plot(X_train.reshape(X_train.shape[0], -1), y_train, epochs, learning_rate)

    y_pred = mlp.predict(X_test.reshape(X_test.shape[0], -1))
    test_accuracy = np.mean(y_pred == np.argmax(y_test, axis=1))

    return train_accuracy, test_accuracy

epoch_list = [10, 50, 100, 200, 500, 1000, 2000, 3000]
train_accuracies = []
test_accuracies = []

for epochs in epoch_list:
    train_acc, test_acc = train_and_evaluate_mlp(epochs)
    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)

# Plotting
bar_width = 0.35
index = np.arange(len(epoch_list))

plt.bar(index, train_accuracies, bar_width, label='Training Accuracy')
plt.bar(index + bar_width, test_accuracies, bar_width, label='Testing Accuracy')

plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training vs Testing Accuracy at Different Epochs')
plt.xticks(index + bar_width / 2, epoch_list)
plt.legend()
plt.tight_layout()
plt.show()


CNN

In [None]:
# put cnn plot code here
import matplotlib.pyplot as plt

# Assuming config_accuracies contains the necessary data
plt.figure(figsize=(14, 8))
for config in config_accuracies:
    label = f"Units: {config['units']}, Optimizer: {config['optimizer']}"
    epochs = range(1, len(config['train_acc']) + 1)
    plt.plot(epochs, config['train_acc'], label=label)
plt.title('Training Accuracy with Differtent Optimization Algorithm')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.figure(figsize=(14, 8))
for config in config_accuracies:
    label = f"Units: {config['units']}, Optimizer: {config['optimizer']}"
    epochs = range(1, len(config['val_acc']) + 1)
    plt.plot(epochs, config['val_acc'], '--', label=label)
plt.title('Validation Accuracy with Differtent Optimization Algorithm')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
