Import dependencies

In [59]:
import landmark_detector as ld
import os
import numpy as np
import json
import cv2 as cv

import tensorflow as tf
import keras
from keras import layers

Set variables

In [60]:
words = ['deaf', 'eat', 'fish', 'friend', 'like', 'milk', 'nice', 'no', 'orange', 'teacher', 'want', 'what', 'where', 'yes']
select_words = ['deaf', 'eat', 'fish']
path = '../preprocessing/dataset/train/'
num_features = 126

Get the number of videos

In [61]:
num_videos = 0

for word in select_words:
    i = 1
    video_path = path + word + '/0001.mp4'
    while os.path.exists(video_path):
        try:
            num_videos += 1
            i += 1
        except:
            break
        video_path = path + word + '/' + str(i).zfill(4) + '.mp4'

print('Number of videos:', num_videos)

Number of videos: 118


Check highest number of frames

In [62]:
highest_frame = 0

for word in select_words:
    i = 1
    video_path = path + word + '/0001.mp4'
    while os.path.exists(video_path):
        try:
            cap = cv.VideoCapture(video_path)
            if not cap.isOpened():
                raise FileNotFoundError("The video file not found")
            curr_frame= int(cap.get(cv.CAP_PROP_FRAME_COUNT))
            if curr_frame > highest_frame:
                highest_frame = curr_frame
            i += 1
        except:
            break
        video_path = path + word + '/' + str(i).zfill(4) + '.mp4'

print('Highest frame count:', highest_frame)


Highest frame count: 42


Data schema:

(number of videos,max frames, 2, 21, 3)


In [64]:
detector = ld.get_detector('../models/hand_landmarker.task')

training_X = np.zeros((num_videos, highest_frame, num_features))
training_y = np.zeros(num_videos)

video = 0

for word in select_words:
    i = 1
    video_path = path + word + '/0001.mp4'
    while os.path.exists(video_path):
        try:
            landmarks = ld.get_landmarks(video_path, detector)
            for frame in range(len(landmarks)):
                features = np.array(landmarks[frame]).flatten()
                training_X[video,frame,:] = np.pad(features, (0, num_features - len(features)), 'constant')
            training_y[video] = words.index(word)
            video += 1
            i += 1
        except Exception as e:
            print(e)
            break
        video_path = path + word + '/' + str(i).zfill(4) + '.mp4'

print(training_X)
print(training_y)

[[[ 5.39468527e-01  6.34025156e-01  6.37598561e-08 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 5.81657350e-01  6.59672081e-01 -3.04205514e-08 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 6.96257055e-01  7.73334503e-01 -4.23549665e-07 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  ...
  [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]]

 [[ 2.85132617e-01  4.56699103e-01 -2.08529291e-10 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 3.37124765e-01  6.62779331e-01 -2.83490976e-07 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 2.86495477e-01  6.92171931e-01 -1.70053028e-07 ...  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  ...
  [ 0.00000000e+00  0.0

Normalize X

In [65]:
def  featureNormalize(X):
    X_norm = X.copy()
    mu = np.zeros(X.shape[1])
    sigma = np.zeros(X.shape[1])

    for i in range (X.shape[1]):
        mu[i] = np.mean(X[:,i])
        X_norm[:,i] -= mu[i]
        sigma[i] = np.std(X[:,i])
        X_norm[:,i] /= sigma[i]

    return X_norm, mu, sigma

Create model

In [None]:
model = keras.Sequential()

model.add(keras.Input(shape=(highest_frame, num_features)))

model.add(keras.SimpleRNN(len(select_words), activation='relu'))