Import dependencies

In [2]:
import data_prep as prep
import random
import numpy as np
import keras
from sklearn.model_selection import train_test_split
from keras import layers
from tensorflow.keras.callbacks import EarlyStopping

Set variables

In [3]:
words = ['deaf', 'eat', 'fish', 'friend', 'like', 'milk', 'nice', 'no', 'orange', 'teacher', 'want', 'what', 'where', 'white', 'yes']
select_words = ['eat', 'teacher', 'want']
path = '../preprocessing/dataset/'
detector_path = '../models/hand_landmarker.task'
num_features = 126
model_name = 'draft_model'
fps = 20

Get data from mediapipe


In [4]:
X, y, num_videos, highest_frame, bad_videos = prep.get_data(select_words, path, detector_path)

print('Number of videos:', num_videos)
print('Highest frame:', highest_frame)
print('Videos with no landmarkers detected: ', bad_videos)


  0%|          | 0/3 [00:00<?, ?it/s]

eat:   0%|          | 0/98 [00:00<?, ?it/s]

teacher:   0%|          | 0/101 [00:00<?, ?it/s]

want:   0%|          | 0/80 [00:00<?, ?it/s]

Number of videos: 251
Highest frame: 230
Videos with no landmarkers detected:  28


Padding and Masking X

In [12]:
padded_X, mask = prep.padX(X, num_videos, highest_frame, num_features)
print(padded_X.shape)

(251, 230, 126)


Split data

In [13]:
X_train, X_test, y_train, y_test = train_test_split(padded_X, y, test_size=0.2, random_state=42)


X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

Create model

In [14]:
model = keras.Sequential()

model.add(keras.Input(shape=(highest_frame, num_features)))
model.add(layers.Masking(mask_value=0.0))
model.add(layers.LSTM(64))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(len(select_words), activation='sigmoid'))


model.summary()

Train model

In [15]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

model.fit(X_train, y_train, epochs=100, callbacks=[early_stopping])

model.save(f'../models/{model_name}.keras')

with open(f"../models/{model_name}.env", "w") as file:
    file.write(f"MAX_FRAMES={highest_frame}\n")
    file.write(f"NUM_FEATURES={num_features}\n")
    file.write(f"WORDS={",".join(select_words)}\n")
    file.write(f"FPS={fps}\n")

Epoch 1/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step - accuracy: 0.4112 - loss: 1.0790
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.4822 - loss: 1.0227
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.4574 - loss: 1.0255
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.5440 - loss: 0.9337
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.6315 - loss: 0.8796
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.6848 - loss: 0.7859
Epoch 7/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.7136 - loss: 0.7417
Epoch 8/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.7462 - loss: 0.6461
Epoch 9/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

Evaluate model

In [16]:
results = model.evaluate(X_test, y_test)

print('Test loss:', results)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.8145 - loss: 0.5129 
Test loss: [0.5353675484657288, 0.7843137383460999]


Use model

In [19]:
i = random.randint(0,X_test.shape[0]-1)


X_prediction = X_test[i,:,:]
y_prediction = select_words[y_test[i]]

print(model.predict(np.array([X_prediction])))
print("should be", y_prediction)
print("predicted", select_words[np.argmax(model.predict(np.array([X_prediction])))])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[[0.8321267  0.19363368 0.64248514]]
should be eat
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
predicted eat


Get test accuracy for each word

In [18]:
dic = {}
for word in select_words:
    dic[word] = [0,0]

for i in range(X_test.shape[0]):
    X_prediction = X_test[i,:,:]
    y_prediction = select_words[y_test[i]]
    prediction = select_words[np.argmax(model.predict(np.array([X_prediction]), verbose=0))]
    if y_prediction == prediction:
        dic[y_prediction][0] += 1
    dic[y_prediction][1] += 1

for key in dic:
    print(f"{key}: {dic[key][0]}/{dic[key][1]}")

eat: 16/22
teacher: 14/15
want: 10/14
