Import dependencies

In [20]:
import landmark_detector as ld
import data_prep as prep
import numpy as np
import keras
from keras import layers

Set variables

In [21]:
words = ['deaf', 'eat', 'fish', 'friend', 'like', 'milk', 'nice', 'no', 'orange', 'teacher', 'want', 'what', 'where', 'white', 'yes']
select_words = ['no', 'eat', 'teacher']
modes = ['train', 'val', 'test']
path = '../preprocessing/dataset/'
num_features = 126
model_name = 'draft_model'
detector_path = '../models/hand_landmarker.task'

Get data from mediapipe


In [22]:
training_X, training_y, num_videos, highest_frame = prep.get_data('train', select_words, path, detector_path)

print('Number of videos:', num_videos)
print('Highest frame:', highest_frame)

Number of videos: 91
Highest frame: 47


Padding and Masking X

In [23]:
padded_X, mask = prep.padX(training_X, num_videos, highest_frame, num_features)
print(padded_X.shape)

(91, 47, 126)


Create model

In [None]:
model = keras.Sequential()

model.add(keras.Input(shape=(highest_frame, num_features)))
model.add(layers.Masking(mask_value=0.0))
model.add(layers.LSTM(64))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(len(select_words), activation='softmax'))


model.summary()

Train model

In [16]:
val_X, val_y, num_val_videos, highest_frame_val = prep.get_data('val', select_words, path, detector_path)
padded_val_X, val_mask = prep.padX(val_X, num_val_videos, highest_frame, num_features)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(padded_X, np.array(training_y), epochs=10, validation_data=(padded_val_X, np.array(val_y)))

model.save(f'../models/{model_name}.keras')

with open(f"../models/{model_name}.env", "w") as file:
    file.write(f"MAX_FRAMES={highest_frame}\n")
    file.write(f"NUM_FEATURES={num_features}\n")
    file.write(f"WORDS={",".join(select_words)}\n")

Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 165ms/step - accuracy: 0.4463 - loss: 1.1011 - val_accuracy: 0.5833 - val_loss: 1.0585
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.5834 - loss: 1.0028 - val_accuracy: 0.6250 - val_loss: 1.0135
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.4644 - loss: 0.9965 - val_accuracy: 0.5833 - val_loss: 0.9849
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.5255 - loss: 0.9488 - val_accuracy: 0.5833 - val_loss: 0.9543
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.5709 - loss: 0.9227 - val_accuracy: 0.5833 - val_loss: 0.9237
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.6860 - loss: 0.8029 - val_accuracy: 0.6667 - val_loss: 0.9022
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━

Evaluate model

In [17]:
test_X, test_y, num_test_videos, highest_frame_test = prep.get_data('test', select_words, path, detector_path)
padded_test_X, test_mask = prep.padX(test_X, num_test_videos, highest_frame, num_features)
results = model.evaluate(padded_test_X, np.array(test_y))

print('Test loss:', results)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.6667 - loss: 0.7309
Test loss: [0.7308791279792786, 0.6666666865348816]


Use model

In [18]:
detector = ld.get_detector(detector_path)

prediction_X = []
prediction_y = []

video_path = path + 'test/teacher/0004.mp4'

video_X = []
landmarks, frame_count = ld.get_landmarks(video_path, detector)
if len(landmarks) == 0:
    print('No landmarks detected')
else:
    for frame in range(len(landmarks)):
        features = np.array(landmarks[frame]).flatten()
        features = np.pad(features, (0, num_features - len(features)), 'constant')
        video_X.append(features)
    for i in range(highest_frame-len(video_X)):
        temp = np.zeros((num_features))
        video_X.append(temp)

    prediction_X.append(video_X)
    prediction_y.append(select_words.index('teacher'))


    print(np.shape(prediction_X))
    print(model.predict(np.array(prediction_X)))

(1, 47, 126)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
[[0.00360834 0.01320584 0.98318577]]
