In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
import cv2
import mediapipe as mp

### DEFINE FUNCTION

In [3]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [4]:
def mediapipe_detection(image,model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #converted color of image from bgr to rgb
    image.flags.writeable = False #Image is no longer to writeable
    results = model.process(image) #Make prediction
    image.flags.writeable = True #Image is now longer to writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) #backconverted color image rgb2bgr
    return image, results

In [5]:
def draw_landmarks(image,results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                             mp_drawing.DrawingSpec(color=(0,0,255), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(thickness=1, circle_radius=1))
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(0,0,255),thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76),thickness=4, circle_radius=1),
                             mp_drawing.DrawingSpec(color = (121,44,250), thickness=2, circle_radius=1))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,123,55),thickness=4, circle_radius=1),
                             mp_drawing.DrawingSpec(color = (245,66,230),thickness=2, circle_radius=1))

In [78]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark] if results.left_hand_landmarks else np.zeros([33,4])).flatten()  
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark] if results.left_hand_landmarks else np.zeros([21,3])).flatten()  
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark] if results.right_hand_landmarks else np.zeros([21,3])).flatten()  
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark] if results.face_landmarks else np.zeros([468,3])).flatten()  
    return np.concatenate([pose,face,lh,rh])

(1662,)

In [45]:
#Run webcam and save the picture per frame

cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    while cap.isOpened():
        ret,frame = cap.read()

        image,results = mediapipe_detection(frame, holistic)

        draw_landmarks(image,results)
        
        cv2.imshow("OpenCV Feed", image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break;

    cap.release()
    cv2.destroyAllWindows()

### GETTING DATA

In [89]:
DATA_PATH = os.path.join("MP_DATA")
actions = np.array(['hello', 'thanks', 'iloveyou'])
no_sequences = 30
sequence_length = 30

In [92]:
def build_fold():
    for action in actions:
        for sequence in range(no_sequences):
            try:
                os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
            except:
                pass
build_fold()

In [93]:
#Run webcam and save the picture per frame

cap = cv2.VideoCapture(0)
build_fold()
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    for action in actions:
        for sequence in range(no_sequences):
            for frame_num in range(sequence_length):
                ret,frame = cap.read()

                image,results = mediapipe_detection(frame, holistic)

                draw_landmarks(image,results)
                
                if frame_num == 0:
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video number {}'.format(action, sequence), (15,12), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0,0,255),1, cv2.LINE_AA)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(image, 'Collecting frames for {} Video number {}'.format(action, sequence), (15,12), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0,0,255),1, cv2.LINE_AA)
                    
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                cv2.imshow("OpenCV Feed", image)
        

                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break;

    cap.release()
    cv2.destroyAllWindows()
    

### PREPROCESS DATA AND CREATE LABELS

In [94]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [97]:
label_map = {label:num for num,label in enumerate(actions)}

sequences, labels = [], []

for action in actions:
    for sequence in range(no_sequences):
        win = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), '{}.npy'.format(frame_num)))
            win.append(res)
        sequences.append(win)
        labels.append(label_map[action ])

In [116]:
X = np.array(sequences)
y = to_categorical(labels).astype(int)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.05)

### BUILD AND TRAIN LSTM NETWORK

In [117]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [118]:
log_dir = os.path.join('Logs')
tb_callbacks = TensorBoard(log_dir = log_dir)

In [119]:
model = Sequential()
model.add(LSTM(64, input_shape = (30,1662), activation = 'relu', return_sequences = True))
model.add(LSTM(128, activation = 'relu', return_sequences = True))
model.add(LSTM(64,activation = 'relu', return_sequences = False))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(actions.shape[0], activation = 'softmax'))

In [120]:
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['categorical_accuracy'])

In [122]:
model.fit(X_train, y_train, epochs = 2000, callbacks = [tb_callbacks])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

Epoch 74/2000
Epoch 75/2000
Epoch 76/2000
Epoch 77/2000
Epoch 78/2000
Epoch 79/2000
Epoch 80/2000
Epoch 81/2000
Epoch 82/2000
Epoch 83/2000
Epoch 84/2000
Epoch 85/2000
Epoch 86/2000
Epoch 87/2000
Epoch 88/2000
Epoch 89/2000
Epoch 90/2000
Epoch 91/2000
Epoch 92/2000
Epoch 93/2000
Epoch 94/2000
Epoch 95/2000
Epoch 96/2000

KeyboardInterrupt: 