# 1. Imports and Seed

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import random
import tensorflow as tf
import math

In [2]:
def seed_everything(seed):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  tf.random.set_seed(seed)

SEED = 22
seed_everything(seed=SEED)

# 2. Keypoints using MP Holistic

In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [5]:
def draw_landmarks(image, results):
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [6]:
def draw_styled_landmarks(image, results):
    # # Draw face connections
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
    #                          mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
    #                          mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    #                          ) 
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

# 3. Extract Keypoint Values

In [7]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# 4. Collect Keypoint Values

In [3]:
ASL_ORI_PATH = 'videos'
ASL_MP_PATH = 'ASL_MP'

actions = os.listdir(ASL_ORI_PATH)
if not os.path.exists(ASL_MP_PATH):
  print("create dir")
  os.makedirs(os.path.join(ASL_MP_PATH))
else:
  print("exists")

exists


In [9]:
for action in actions:
  lendir = len((os.listdir(os.path.join(ASL_ORI_PATH, action))))
  for video in range(1, lendir+1):
    try:
      os.makedirs(os.path.join(ASL_MP_PATH, action, str(video)))
    except:
      pass

In [21]:
max_frames = 64

In [62]:
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
  
  for action in actions:
    video_dir_list = os.listdir(os.path.join(ASL_ORI_PATH, action))
    
    for index, video in enumerate(video_dir_list):
      cap = cv2.VideoCapture(os.path.join(ASL_ORI_PATH, action, video))
      len_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
      
      if len_video > max_frames: # if video longer
        sample_frames = sorted(random.sample(range(0, len_video), max_frames))
      else: # shorter
        diff = max_frames - len_video
        begin = [1 for i in range(math.floor(diff/2))] # repeat first frame 
        mid = [i+1 for i in range(len_video)] # real frames
        end = [len_video for i in range(max_frames - len(begin) - len(mid))] # repeat rest of end frames until 64 frames
        sample_frames = begin + mid + end
        
      # Read sample 64 frames
      for frame_num in sample_frames:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num-1)
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        keypoints = extract_keypoints(results)
        npy_path = os.path.join(ASL_MP_PATH, action, str(index+1), str(frame_num))
        np.save(npy_path, keypoints)
        
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
          break
  cap.release()

      

# 6. Preprocess Data and Create Labels and Features

In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [7]:
label_map = {label:num for num, label in enumerate(actions)}

In [8]:
label_map

{'deaf': 0,
 'fine': 1,
 'good_morning': 2,
 'hearing': 3,
 'hello': 4,
 'i_love_you': 5,
 'nice_to_meet_you': 6,
 'no': 7,
 'thanks': 8,
 'yes': 9}

In [9]:
os.listdir(os.path.join(ASL_MP_PATH, 'deaf'))

['1',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '2',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '3',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '4',
 '40',
 '41',
 '42',
 '43',
 '44',
 '5',
 '6',
 '7',
 '8',
 '9']

In [10]:
import re
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)

In [None]:
features, labels = [], []
for action in actions:
  video_dir_list = os.listdir(os.path.join(ASL_MP_PATH, action))
  
  print('=================')
  print(action)  
  for index, video in enumerate(video_dir_list):
    window = []
    print('video:', video)
    
    frame_list = sorted_alphanumeric(os.listdir(os.path.join(ASL_MP_PATH, action, video)))
    if len(frame_list) < max_frames:
      diff = max_frames - len(frame_list)
      
      # Repeat first frame
      for i in range(diff):
        res = np.load(os.path.join(ASL_MP_PATH, action, video, frame_list[0]))
        window.append(res.tolist())
         
    # Append frames
    for frame_num in frame_list:
      print('frame:', frame_num)
      res = np.load(os.path.join(ASL_MP_PATH, action, video, frame_num))
      window.append(res.tolist())
    features.append(window)
    labels.append(label_map[action])

# Augment Data

In [24]:
np.array(features).shape

(319, 64, 1662)

In [27]:
np.array(labels).shape

(319,)

In [28]:
X = np.array(features)
X.shape

(319, 64, 1662)

In [29]:
y = to_categorical(labels).astype(int)
y.shape

(319, 10)

In [31]:
np.save('Xy/X', X)

In [32]:
np.save('Xy/y', y)

In [41]:
seed_everything(seed=SEED)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=SEED)

In [42]:
seed_everything(seed=SEED)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.075, random_state=SEED)

In [43]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(280, 64, 1662)
(23, 64, 1662)
(16, 64, 1662)


# 7. Build and Train LSTM Neural Network

In [48]:
len(actions)

10

In [107]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow import keras

In [113]:
TRIAL = 5
log_dir = os.path.join('Logs/{}'.format(TRIAL))
tb_callback = TensorBoard(log_dir=log_dir)
es_callback = EarlyStopping(monitor='val_loss', patience=20)
lr_callback = ReduceLROnPlateau(monitor='val_loss', patience=25)

In [114]:
seed_everything(seed=SEED)

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(max_frames, 1662)))
model.add(LSTM(32, return_sequences=False, activation='relu', input_shape=(max_frames, 1662)))
# model.add(LSTM(128, return_sequences=False, activation='relu'))
# model.add(LSTM(64, return_sequences=False, activation='relu'))
# model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(actions), activation='softmax'))

In [115]:
opt = keras.optimizers.Adam()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy', 'accuracy'])

In [116]:
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_26 (LSTM)              (None, 64, 64)            442112    
                                                                 
 lstm_27 (LSTM)              (None, 32)                12416     
                                                                 
 dense_32 (Dense)            (None, 32)                1056      
                                                                 
 dense_33 (Dense)            (None, 10)                330       
                                                                 
Total params: 455,914
Trainable params: 455,914
Non-trainable params: 0
_________________________________________________________________


In [117]:
seed_everything(seed=SEED)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

KeyboardInterrupt: 