# 1. Import and Install Dependencies

In [None]:
#!pip install tensorflow opencv-python mediapipe sklearn matplotlib

In [1]:
import os
import cv2
import mediapipe as mp
import numpy as np
from matplotlib import pyplot as plt
import time

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    # Transfer image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    # Make prediction
    results = model.process(image)
    return results

In [4]:
def draw_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [5]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(
        image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
        mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
        mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    )
    # Draw pose connections
    mp_drawing.draw_landmarks(
        image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
    )
    # Draw left hand connections
    mp_drawing.draw_landmarks(
        image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
        mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
    ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(
        image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
        mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
    ) 

### Capture Landmarks with VideoCam

In [6]:
def capture_landmarks():
    cap = cv2.VideoCapture(0)
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

        # Read feed
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Make detections
            results = mediapipe_detection(frame, holistic)
            # Draw landmarks
            draw_styled_landmarks(frame, results)
            # Show to screen
            cv2.imshow('OpenCV Feed', frame)

            # Break gracefully
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

In [None]:
# capture_landmarks()

# 3. Extract Keypoint Values

In [7]:
def extract_keypoints(results):
    # Extract keypoints if landmarks captured, else fill with zeros
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# 4. Collect Keypoints Values from Videos

In [None]:
def slice_video(path):
    cap = cv2.VideoCapture(path)
    
    count = 0
    action = {}
    # Set mediapipe model
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

        # Read feed
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print("No frame received...")
                break

            # Make detections
            results = mediapipe_detection(frame, holistic)
            # Draw landmarks
            draw_styled_landmarks(frame, results)

            # Draw frame counter
            count += 1
            cv2.putText(frame, f'Frame {count}', (10,30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA) 

            # Show to screen
            cv2.imshow('OpenCV Feed', frame)

            # Extract frame
            img_path = os.path.join('MP_Video', video_name, f'{count}.jpg')
            cv2.imwrite(img_path, frame)

            # Extract keypoints
            keypoints = extract_keypoints(results)
            npy_path = os.path.join('MP_Video', video_name, str(count))
            np.save(npy_path, keypoints)

            # Break & pause
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            if key == ord(' '):
                i = len(action) + 1
                action[i] = count

        print(f"{video_name} = {action}")
        cap.release()
        cv2.destroyAllWindows()

In [None]:
def check_collection(filename, start_frame, length=29):
    end_frame = start_frame + length
    i = start_frame
    while True:

        # Reply logic
        i = start_frame if i == end_frame else i + 1

        # Show image
        img_path = os.path.join('MP_Video', filename, f"{i}.jpg")
        frame = cv2.imread(img_path)
        cv2.imshow(f'Frame {start_frame} to {end_frame}', frame)

        # Break gracefully
        key = cv2.waitKey(10) & 0xFF
        if key == ord('q'):
            break

    cv2.destroyAllWindows()

In [None]:
def compile_data(dic, file_name, action_name):
    data_num = int(file_name.split('_')[-1]) * 10 - 10
    for key, value in dic.items():
        if type(value) == tuple:
            value = value[0]
        print(key + data_num, value)
        seq = []
        for i in range(value, value+30):
            frame = np.load(os.path.join('MP_Video', file_name, "{}.npy".format(i)))
            seq.append(frame)
        seq_npy = np.array(seq)
        np.save(os.path.join('SL_Data', action_name, "{}.npy".format(key + data_num)), seq_npy)

# 5. Preprocess Data and Create Labels and Features

In [8]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [9]:
actions = np.array(['i',
                    'my',
                    'children',
                    'have',
                    'dont_have',
                    'only_have',
                    'is',
                    'good',
                    'need',
                    'deal', 
                    'forget',
                    'bring',
                    'replace',
                    'help',
                    'single',
                    'below',
                    'stamp',
                    'card',
                    'account'])

In [10]:
label_map = {label:num for num, label in enumerate(actions)}

In [11]:
label_map

{'end': 0, 'meeting': 1, 'thank': 2}

In [12]:
sequences, labels = None, []
for action in actions:
    for i in range(40):
        data = np.load(os.path.join('SL_Data', action, i, "{}.npy".format(i+1)))
        if sequences is None:
            sequences = data
        else:
            sequences = np.append(sequences, data, axis=0)
        labels.append(label_map[action])

In [13]:
X = sequences.reshape(-1,30,1662)
X.shape

(120, 30, 1662)

In [14]:
y = to_categorical(labels).astype(int)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [16]:
y_test.shape

(12, 3)

# 6. Build and Train LSTM Neural Network

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [18]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [19]:
# Layer
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(30,1662)))
model.add(Dense(64, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [20]:
# optimizer, loss, accuracy
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [21]:
# epochs, callback
model.fit(X_train, y_train, epochs=1000, callbacks=[tb_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

Epoch 147/1000
Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/

Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000
Epoch 300/1000
Epoch 301/1000
Epoch 302/1000
Epoch 303/1000
Epoch 304/1000
Epoch 305/1000
Epoch 306/1000
Epoch 307/1000
Epoch 308/1000
Epoch 309/1000
Epoch 310/1000
Epoch 311/1000
Epoch 312/1000
Epoch 313/1000
Epoch 314/1000
Epoch 315/1000
Epoch 316/1000
Epoch 317/1000
Epoch 318/1000
Epoch 319/1000
Epoch 320/1000
Epoch 321/1000
Epoch 322/1000
Epoch 323/1000
Epoch 324/1000
Epoch 325/1000
Epoch 326/1000
Epoch 327/1000
Epoch 328/1000
Epoch 329/1000
Epoch 330/1000
Epoch 331/1000
Epoch 332/1000
Epoch 333/1000
Epoch 334/1000
Epoch 335/1000
Epoch 336/1000
Epoch 337/1000
Epoch 338/1000
Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/1000
Epoch 351/1000
Epoch 352/1000
Epoch 353/1000
Epoch 354/1000
Epoch 355/1000
Epoch 356/1000
Epoch 357/

KeyboardInterrupt: 

In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                442112    
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 3)                 195       
                                                                 
Total params: 446,467
Trainable params: 446,467
Non-trainable params: 0
_________________________________________________________________


In [23]:
def accuracy(X, y):
    res = model.predict(X)
    accuracy = (np.argmax(res, axis=1) == np.argmax(y, axis=1)).sum()/len(res)
    print(actions[np.argmax(res, axis=1)])
    print(actions[np.argmax(y, axis=1)])
    return accuracy

In [24]:
accuracy(X_train, y_train)

['meeting' 'meeting' 'meeting' 'end' 'meeting' 'thank' 'meeting' 'end'
 'thank' 'thank' 'thank' 'thank' 'end' 'meeting' 'thank' 'end' 'thank'
 'thank' 'end' 'thank' 'thank' 'meeting' 'thank' 'end' 'end' 'thank'
 'thank' 'thank' 'thank' 'thank' 'meeting' 'thank' 'end' 'thank' 'end'
 'meeting' 'end' 'meeting' 'end' 'end' 'end' 'thank' 'thank' 'thank'
 'thank' 'end' 'meeting' 'thank' 'thank' 'end' 'end' 'thank' 'end' 'end'
 'thank' 'thank' 'thank' 'end' 'end' 'thank' 'end' 'thank' 'meeting'
 'thank' 'thank' 'thank' 'thank' 'thank' 'end' 'end' 'meeting' 'thank'
 'end' 'meeting' 'end' 'meeting' 'meeting' 'end' 'meeting' 'thank' 'thank'
 'end' 'thank' 'thank' 'thank' 'meeting' 'end' 'meeting' 'meeting'
 'meeting' 'thank' 'thank' 'end' 'end' 'thank' 'end' 'end' 'thank' 'end'
 'end' 'end' 'meeting' 'end' 'end' 'meeting' 'end' 'thank' 'end']
['meeting' 'meeting' 'meeting' 'end' 'meeting' 'thank' 'meeting' 'end'
 'thank' 'thank' 'thank' 'meeting' 'end' 'meeting' 'thank' 'end' 'thank'
 'thank' 'e

0.8611111111111112

In [25]:
accuracy(X_test, y_test)

['end' 'end' 'meeting' 'end' 'end' 'end' 'meeting' 'thank' 'thank'
 'meeting' 'end' 'thank']
['end' 'thank' 'meeting' 'end' 'end' 'end' 'meeting' 'meeting' 'meeting'
 'meeting' 'end' 'meeting']


0.6666666666666666

In [None]:
res = model.predict(X[17].reshape(-1, 30, 1662))
actions[np.argmax(res, axis=1)]

In [None]:
actions[np.argmax(y[17])]

In [26]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [27]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Make detections
        results = mediapipe_detection(frame, holistic)
        # Draw landmarks
        draw_styled_landmarks(frame, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            frame = prob_viz(res, actions, frame, colors)
            
        cv2.rectangle(frame, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(frame, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', frame)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
meeting
end
end
end
end
thank
thank
thank
thank
thank
thank
thank
thank
thank
meeting
end
end
end
end
end
end
end
end
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
meeting
end
end
end
end
end
thank
thank
thank
meeting
meeting
thank
meeting
thank
thank
thank
meeting
thank
meeting
meeting
meeting
thank
thank
thank
thank
thank
thank
meeting
end
end
end
end
end
end
end
end
thank
thank
meeting
meeting
meeting
meeting
meeting
meeting
meeting
meeting
meeting
meeting
meeting
meeting
meeting
meeting
thank
thank
meeting
meeting
end
end
end
end
end
thank
thank
thank
end
end
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
thank
meeting
end
end
end
end
end
end
end
end
end
end
end
end
end
thank
meeting
meeting
meeting
end
end
end
end
end
thank
thank
thank
end
end
end
end
thank
end
thank
thank
thank
thank
thank
meeting
th