In [13]:
import os
import cv2
import mediapipe as mp
import numpy as np
from matplotlib import pyplot as plt
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

## Load Data

In [30]:
actions = np.array(['i', 'my', 'children', 'have', 'dont_have', 
                    'only_have', 'is', 'good', 'need', 'deal', 
                    'forget', 'bring', 'replace', 'help', 'single',
                    'below', 'stamp', 'card', 'account'])
label_map = {label:num for num, label in enumerate(actions)}

In [7]:
def get_sequences(path, action):
    seqs = os.listdir(os.path.join('M_Data', action))
    if '.ipynb_checkpoints' in seqs: seqs.remove('.ipynb_checkpoints')
    if '.DS_Store' in seqs: seqs.remove('.DS_Store')
    for i in range(len(seqs)):
        seqs[i] = int(seqs[i])
    return sorted(seqs)

In [31]:
def load_data(actions, label_map):
    sequences, labels = None, []
    for action in actions:
        seqs = get_sequences('M_Data', action)
        for s in seqs:
            print(f"{action} [{s}/{len(seqs)}]", end="\r")
            for i in range(30):
                data = np.load(os.path.join('M_Data', action, str(s), f"{i}.npy"))
                if sequences is None:
                    sequences = data
                else:
                    sequences = np.append(sequences, data, axis=0)
            labels.append(label_map[action])
        print(f"{action} [{len(seqs)}/{len(seqs)}]")
    return sequences, labels

In [29]:
sequences, labels = load_data(actions)

i [30/30]
my [30/30]
children [30/30]
have [30/30]
dont_have [30/30]
only_have [30/30]
is [30/30]
good [30/30]
need [30/30]
deal [30/30]
forget [50/50]
bring [30/30]
replace [30/30]
help [30/30]
single [30/30]
below [30/30]
stamp [30/30]
card [30/30]
account [40/40]


In [12]:
X = sequences.reshape(-1,30,258)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
print(X.shape)
print(X_train.shape)

(600, 30, 258)

# Train Model

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [18]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [22]:
# Layer
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(30,258)))
model.add(Dense(64, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [23]:
# optimizer, loss, accuracy
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [25]:
# epochs, callback
model.fit(X_train, y_train, epochs=1000, callbacks=[tb_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
 1/17 [>.............................] - ETA: 0s - loss: 1.7274e-05 - categorical_accuracy: 1.0000

KeyboardInterrupt: 

In [32]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 64)                82688     
                                                                 
 dense_2 (Dense)             (None, 64)                4160      
                                                                 
 dense_3 (Dense)             (None, 19)                1235      
                                                                 
Total params: 88,083
Trainable params: 88,083
Non-trainable params: 0
_________________________________________________________________


In [42]:
def accuracy(X, y):
    res = model.predict(X)
    accuracy = (np.argmax(res, axis=1) == np.argmax(y, axis=1)).sum()/len(res)
#     print(actions[np.argmax(res, axis=1)])
#     print(actions[np.argmax(y, axis=1)])
    return accuracy

In [43]:
print(accuracy(X_train, y_train))
print(accuracy(X_test, y_test))

1.0
0.8833333333333333


In [50]:
model.save("model_0605")

INFO:tensorflow:Assets written to: model_0605/assets




# Realtime Test

In [46]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [59]:
colors = [(245,117,16)] * 19
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [51]:
def mediapipe_detection(image, model):
    # Transfer image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    # Make prediction
    results = model.process(image)
    return results

In [52]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(
        image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
        mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
        mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    )
    # Draw pose connections
    mp_drawing.draw_landmarks(
        image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
    )
    # Draw left hand connections
    mp_drawing.draw_landmarks(
        image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
        mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
    ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(
        image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
        mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
    ) 

In [55]:
def extract_keypoints_without_face(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

In [60]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Make detections
        results = mediapipe_detection(frame, holistic)
        # Draw landmarks
        draw_styled_landmarks(frame, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints_without_face(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            frame = prob_viz(res, actions, frame, colors)
            
        cv2.rectangle(frame, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(frame, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', frame)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
good
good
good
good
good
good
good
good
good
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
below
below
below
below
below
below
below
below
below
below
only_have
only_have
only_have
replace
account
account
account
replace
replace
replace
replace
replace
replace
bring
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
below
below
below
i
i
i
i
i
i
good
good
good
good
good
good
good
good
good
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
forget
below
below
below
below
below
below
below
below
below
below
my
forget
forget
for

KeyboardInterrupt: 