# Coding Exercise #

This Jupyter notebook contains a copy of the exercises used in the penultimate Workshop Meeting hosted by RAISO during the Winter 2025 quarter. 

## Credits ##

The following exercise could not have been created without the following sources

The code in this Jupyter notebook was adapted from the following GitHub repository:
https://github.com/mg343/Sign-Language-Detection?tab=readme-ov-file

The dataset associated with training this model was downloaded from Kaggle:
https://www.kaggle.com/datamunge/sign-language-mnist

## Installing Libraries ##

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import keras
import pandas as pd
import kagglehub
import tensorflow as tf
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Retrieving Data ##

In [1]:
# Download the dataset from Kaggle
path = kagglehub.dataset_download("datamunge/sign-language-mnist")

print("Path to dataset files:", path)

# Reads the csv data from the downloaded files and converts them into a Pandas DataFrame
# This will allow us to train our neural network on the data
train_df = pd.read_csv(path + "/" + "sign_mnist_train.csv")
test_df = pd.read_csv(path + "/" + "sign_mnist_test.csv")

# Separate the labels (or what we want to predict) from the dataset
# This lets us make sure we're not overfitting (we don't want the model to see what we want it to predict)
y_train = train_df['label']
y_test = test_df['label']
del train_df['label']
del test_df['label']

# Modifies the data values:
#   Converts the labels to binary to make it easier for the model to train
#   Converts the features to 28 x 28 pixels
#   Each RGB value is represented on a scale of 0 to 1
label_binarizer = LabelBinarizer()
y_train = label_binarizer.fit_transform(y_train)
y_test = label_binarizer.fit_transform(y_test)

x_train = train_df.values
x_test = test_df.values

x_train = x_train / 255
x_test = x_test / 255

x_train = x_train.reshape(-1,28,28,1)
x_test = x_test.reshape(-1,28,28,1)

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(x_train)

midpoint = len(x_test) // 2
x_test, y_test, x_valid, y_valid = x_test[:midpoint], y_test[:midpoint], x_test[midpoint:], y_test[midpoint:]

NameError: name 'kagglehub' is not defined

## Our Model ##

Here is the code to produce the model we made last week. We've highlighted different sections as hyperparameters and we want to see if you can do better than the final accuracy we got (96.77%). Whoever has the highest accuracy gets the ultimate prize: BRAGGING RIGHTS!

In [None]:
model = Sequential()

# Convolutional layers
model.add(Conv2D(75 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu' , input_shape = (28,28,1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))

model.add(Conv2D(50 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))

model.add(Conv2D(25 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))

# Convolutional -> Linear layers
model.add(Flatten())

# Linear layers
model.add(Dense(units = 512 , activation = 'relu'))
model.add(Dropout(0.3))

# TODO: Add more linear layers using the two lines above as a template!
# TODO: Add a layer with as many neurons (units) as you'd like!
# TODO: Add a layer with a dropout of your choice to see how it affects the accuracy!
# Warning: The more layers and neurons, the more complex the model, and the slower it might train!


# Final layer that converts to different letters
model.add(Dense(units = 24 , activation = 'softmax'))

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

# TODO: change the inputs to see how the learning rate affects the results!
# for reference, review the ReduceLROnPlateau documentation
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience = 2, verbose=1, factor=0.5, min_lr=0.00001)

# TODO: Increase the epochs to see if the validation accuracy levels off!
# Warning: Increasing epochs might increase training time
history = model.fit(datagen.flow(x_train,y_train, batch_size = 128), epochs = 3, validation_data = (x_valid, y_valid), callbacks = [learning_rate_reduction])

# Evaluate the model on the test data. This final accuracy will be your score!
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)

print(f'Loss: {loss:.4f}')
print(f'Accuracy: {accuracy:.4f}')

# TODO: Use a Hyperparameter Tuning Algorithm like Grid Search!

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/3


  self._warn_if_super_not_called()


[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 118ms/step - accuracy: 0.4583 - loss: 1.8312 - val_accuracy: 0.0680 - val_loss: 4.0078 - learning_rate: 0.0010
Epoch 2/3
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 114ms/step - accuracy: 0.9121 - loss: 0.2599 - val_accuracy: 0.6852 - val_loss: 0.9902 - learning_rate: 0.0010
Epoch 3/3
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 104ms/step - accuracy: 0.9674 - loss: 0.1037 - val_accuracy: 0.9621 - val_loss: 0.1313 - learning_rate: 0.0010
Loss: 0.1210
Accuracy: 0.9674


## Extra: Transformers ##

In [None]:
# Run this code block and see if you can understand different parts. Feel free to look up Keras and Tensorflow documentation for more information!
from tensorflow.keras import Model
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, GlobalAveragePooling1D, Reshape, Embedding

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(x, x)
    x = Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dense(inputs.shape[-1])(x)
    x = Dropout(dropout)(x)
    return x + res

# The code might look different, but it is functionally the same as what we've been doing
# Just with a transformer
inputs = keras.Input(shape=(28,28,1))

x = Conv2D(75, (3,3), strides=1, padding='same', activation='relu')(inputs)
x = BatchNormalization()(x)
x = MaxPool2D((2,2), strides=2, padding='same')(x)

x = Conv2D(50, (3,3), strides=1, padding='same', activation='relu')(x)
x = Dropout(0.2)(x)
x = BatchNormalization()(x)
x = MaxPool2D((2,2), strides=2, padding='same')(x)

x = Conv2D(25, (3,3), strides=1, padding='same', activation='relu')(x)
x = BatchNormalization()(x)
cnn_output = MaxPool2D((2,2), strides=2, padding='same')(x)


# Transformer Integration (4x4x25 -> 16x25 sequence)
x = Reshape((16, 25))(cnn_output)

# Add positional embeddings
positions = Embedding(input_dim=16, output_dim=25)(tf.range(start=0, limit=16, delta=1))
x = x + positions

# Transformer Encoder Block
x = transformer_encoder(x, head_size=25, num_heads=4, ff_dim=128, dropout=0.1)


x = GlobalAveragePooling1D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(24, activation='softmax')(x)

model = Model(inputs, outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

history = model.fit(datagen.flow(x_train,y_train, batch_size = 128), epochs = 3, validation_data = (x_valid, y_valid), callbacks = [learning_rate_reduction])
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)

print(f'Loss: {loss:.4f}')
print(f'Accuracy: {accuracy:.4f}')


Epoch 1/3
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 113ms/step - accuracy: 0.2754 - loss: 2.2922 - val_accuracy: 0.0268 - val_loss: 10.4395 - learning_rate: 0.0010
Epoch 2/3
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 108ms/step - accuracy: 0.7695 - loss: 0.6296 - val_accuracy: 0.1564 - val_loss: 4.9145 - learning_rate: 0.0010
Epoch 3/3
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 105ms/step - accuracy: 0.8951 - loss: 0.2928 - val_accuracy: 0.2819 - val_loss: 5.5113 - learning_rate: 0.0010
Loss: 5.5711
Accuracy: 0.2856
