## Data Preparation

The code which follows will be using the data exported from the ASL_Preprocessing phase to train a model. This will be a classification model and will make use of neural network.

In [1]:
import pandas as pd

In [2]:
# let's mount the google drive with the landmark information.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Let's set up some constants we will be using from hear on out
csv_path = '/content/drive/MyDrive/landmarks_v3.csv'

In [4]:
df = pd.read_csv(csv_path)

In [5]:
df.head()

Unnamed: 0,1_X,1_Y,2_X,2_Y,3_X,3_Y,4_X,4_Y,5_X,5_Y,...,17_Y,18_X,18_Y,19_X,19_Y,20_X,20_Y,21_X,21_Y,Label
0,0.91,0.913043,1.0,0.623188,0.91,0.246377,0.72,0.086957,0.55,0.014493,...,0.869565,0.46,0.927536,0.38,0.869565,0.55,0.942029,0.68,1.0,G
1,0.253968,0.221053,0.619048,0.315789,0.777778,0.473684,0.809524,0.673684,0.777778,0.852632,...,1.0,0.0,0.252632,0.174603,0.547368,0.333333,0.736842,0.460317,0.905263,M
2,0.253968,0.221053,0.619048,0.315789,0.777778,0.473684,0.809524,0.673684,0.777778,0.852632,...,1.0,0.0,0.252632,0.174603,0.547368,0.333333,0.736842,0.460317,0.905263,M
3,0.253968,0.221053,0.619048,0.315789,0.777778,0.473684,0.809524,0.673684,0.777778,0.852632,...,1.0,0.0,0.252632,0.174603,0.547368,0.333333,0.736842,0.460317,0.905263,M
4,0.253968,0.221053,0.619048,0.315789,0.777778,0.473684,0.809524,0.673684,0.777778,0.852632,...,1.0,0.0,0.252632,0.174603,0.547368,0.333333,0.736842,0.460317,0.905263,M


In [6]:
from sklearn.preprocessing import OneHotEncoder
# Transform all the labels into dummy variables
encoder = OneHotEncoder(sparse_output=False)
labels  = df[['Label']].values
ohe_labels = encoder.fit_transform(labels)

# Retreive the headers
headers = encoder.get_feature_names_out(['Label'])
ohe_df  = pd.DataFrame(ohe_labels)


ohe_df.columns = headers
ohe_df.head()

Unnamed: 0,Label_A,Label_B,Label_C,Label_D,Label_E,Label_F,Label_G,Label_H,Label_I,Label_J,...,Label_S,Label_T,Label_U,Label_V,Label_W,Label_X,Label_Y,Label_Z,Label_d,Label_s
0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from sklearn.model_selection import train_test_split

X = df.drop('Label', axis=1)
y = ohe_df

print(type(X))
print(type(y))
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.25, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


## Model Testing and Selection

We have 21 data points each with two coordinates - X and Y. Therefore, we have 42 inputs. The output will be 1 of 28 possible outputs, i.e. 26 alphabetical outputs and 2 control charachters. Consequently, we will need a softmax of 28 neurons.

I plan to experiment with a few different dense layer configurations to test which gives the best peroformance. I will be using relu as the main activation layer except the final layer which is a softmax.

|Model|Configuration|Reasoning|
|-------|-------|--------|
|model1|Input(42), Softmax(28)| Test the ANN at its most basic i.e. input and output minimums
|model2|Input(42), Dense(21), Dense(10), Softmax(28)|Ue an autoencoder architecture to reduce the number of neurons to a code of 10 and then inflate the number of neurons in the output
|model3|Input(42), Dense(84), Softmax(28)| Increase the number of neurons for more data representation|


In [8]:
from keras import Sequential
from keras.layers import Input, Dense, Softmax
from sklearn.metrics import f1_score
import numpy as np

In [9]:
model1 = Sequential()

model1.add(Input(shape=(42,)))
model1.add(Dense(28, activation='softmax'))
model1.summary()

In [10]:
model2 = Sequential()

model2.add(Input(shape=(42,)))
model2.add(Dense(21, activation='relu'))
model2.add(Dense(10, activation='relu'))
model2.add(Dense(28, activation='softmax'))
model2.summary()

In [11]:
model3 = Sequential()

model3.add(Input(shape=(42,)))
model3.add(Dense(84, activation='relu'))
model3.add(Dense(28, activation='softmax'))
model3.summary()

#### Model 1 Testing

In [12]:
model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
model1.fit(X_train, y_train, epochs=20, batch_size=32)

Epoch 1/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.2771 - loss: 2.9438
Epoch 2/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8413 - loss: 1.6330
Epoch 3/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9034 - loss: 1.0939
Epoch 4/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9188 - loss: 0.8250
Epoch 5/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9313 - loss: 0.6645
Epoch 6/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9363 - loss: 0.5605
Epoch 7/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9390 - loss: 0.4939
Epoch 8/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9409 - loss: 0.4473
Epoch 9/20
[1m698/698[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x786cfc31f940>

In [14]:
model1_loss, model1_accuracy = model1.evaluate(X_val, y_val)
print('Test loss', model1_loss)
print('Accuracy', model1_accuracy)

[1m233/233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9660 - loss: 0.2250
Test loss 0.23166416585445404
Accuracy 0.9634408354759216


In [15]:
y_pred = model1.predict(X_test)
y_inv_pred = np.argmax(y_pred, axis=1)
y_inv_test = np.argmax(y_test, axis=1)

model1_f1 = f1_score(y_inv_test, y_inv_pred, average='micro')
print('F1 score ', model1_f1 )



[1m310/310[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
F1 score  0.9554390563564875


#### Model 2 Testing

In [16]:
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [17]:
model2.fit(X_train, y_train, epochs=20, batch_size=32)

Epoch 1/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.2309 - loss: 2.8182
Epoch 2/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8712 - loss: 0.6906
Epoch 3/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9201 - loss: 0.4142
Epoch 4/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9364 - loss: 0.3373
Epoch 5/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9411 - loss: 0.2991
Epoch 6/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9516 - loss: 0.2583
Epoch 7/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9532 - loss: 0.2402
Epoch 8/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9562 - loss: 0.2213
Epoch 9/20
[1m698/698[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x786cfc31ead0>

In [18]:
model2_loss, model2_accuracy = model2.evaluate(X_val, y_val)
print('Test loss', model2_loss)
print('Accuracy', model2_accuracy)

[1m233/233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9698 - loss: 0.1335
Test loss 0.13793140649795532
Accuracy 0.9701613187789917


In [19]:
y_pred = model2.predict(X_test)
y_inv_pred = np.argmax(y_pred, axis=1)
y_inv_test = np.argmax(y_test, axis=1)

model2_f1 = f1_score(y_inv_test, y_inv_pred, average='micro')
print('F1 score ', model2_f1 )


[1m310/310[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
F1 score  0.9689484827099506


#### Model 3 Testing

In [20]:
model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [21]:
model3.fit(X_train, y_train, epochs=20, batch_size=32)

Epoch 1/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5878 - loss: 2.0562
Epoch 2/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9326 - loss: 0.4048
Epoch 3/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9523 - loss: 0.2687
Epoch 4/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9617 - loss: 0.2084
Epoch 5/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9646 - loss: 0.1811
Epoch 6/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9677 - loss: 0.1577
Epoch 7/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9681 - loss: 0.1505
Epoch 8/20
[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9737 - loss: 0.1338
Epoch 9/20
[1m698/698[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x786cf540d7e0>

In [22]:
model3_loss, model3_accuracy = model3.evaluate(X_val, y_val)
print('Test loss', model3_loss)
print('Accuracy', model3_accuracy)

[1m233/233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9817 - loss: 0.0777
Test loss 0.08169762045145035
Accuracy 0.9810484051704407


In [23]:
y_pred = model3.predict(X_test)
y_inv_pred = np.argmax(y_pred, axis=1)
y_inv_test = np.argmax(y_test, axis=1)

model3_f1 = f1_score(y_inv_test, y_inv_pred, average='micro')
print('F1 score ', model3_f1 )


[1m310/310[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
F1 score  0.9786268777094466


### Summary

From the results, model3 provides the best model performance of the three configurations.

|Model|Loss|Accuracy|F1|
|----|---|---|---|
|model1|24.02%|95.60%|95.70%|
|model2|14.03%|96.79%|96.70%|
|model3|6.51%|98.67%|98.45%|

In [24]:
model3.save('/content/drive/MyDrive/asl_model.keras')

In [25]:
y_train.columns

Index(['Label_A', 'Label_B', 'Label_C', 'Label_D', 'Label_E', 'Label_F',
       'Label_G', 'Label_H', 'Label_I', 'Label_J', 'Label_K', 'Label_L',
       'Label_M', 'Label_N', 'Label_O', 'Label_P', 'Label_Q', 'Label_R',
       'Label_S', 'Label_T', 'Label_U', 'Label_V', 'Label_W', 'Label_X',
       'Label_Y', 'Label_Z', 'Label_d', 'Label_s'],
      dtype='object')

In [30]:
labels = y_test.columns
pd.DataFrame(labels).to_csv('/content/drive/MyDrive/asl_labels.csv')
