# 1. Import and Install Dependencies

In [None]:
!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib

In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import copy
import itertools

# 2. Keypoints using MP Holistic

In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_drawing_styles = mp.solutions.drawing_styles

In [4]:
def mediapipe_detection(image, model):
    image = cv2.flip(image, 1)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [5]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [6]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    #mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             #mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                            # mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             #) 

    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing_styles.get_default_hand_landmarks_style(),
                              mp_drawing_styles.get_default_hand_connections_style())
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                              mp_drawing_styles.get_default_hand_landmarks_style(),
                              mp_drawing_styles.get_default_hand_connections_style())                            
                            

In [6]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(image, results)
        print(f'shape: {np.shape(keypoints)}')
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

[ WARN:0@2.364] global /io/opencv/modules/videoio/src/cap_v4l.cpp (889) open VIDEOIO(V4L2:/dev/video0): can't open camera by index
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [7]:
cap.release()
cv2.destroyAllWindows()

In [30]:
draw_landmarks(frame, results)

# 3. Extract Keypoint Values

In [341]:
len(results.left_hand_landmarks.landmark)

AttributeError: 'NoneType' object has no attribute 'landmark'

In [253]:
nose_landmark = results.face_landmarks.landmark[0]
image_width, image_height = image.shape[1], image.shape[0]
nose_landmark_x = min(int(nose_landmark.x * image_width), image_width - 1)
nose_landmark_y = min(int(nose_landmark.y * image_height), image_height - 1)
nose_landmarks = [nose_landmark_x, nose_landmark_y]

calc_landmark_list(image, results.right_hand_landmarks, nose_landmarks)

AttributeError: 'NoneType' object has no attribute 'landmark'

In [227]:
nose_landmark_x

344

In [60]:
pose = []
for res in results.pose_landmarks.landmark:
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

In [95]:
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [101]:
len(lh)

63

In [423]:
results.pose_landmarks.landmark[0]

x: 0.7505885362625122
y: 0.40703168511390686
z: -0.3763485550880432
visibility: 0.9999094009399414

In [None]:
calc_lh = np.random.rand(21)
calc_rh = np.random.rand(21)
pre_lh = np.random.rand(42)
pre_rh = np.random.rand(42)
nose_landmark_x = 0
nose_landmark_y = 0

calc_landmark_list(image, results.left_hand_landmarks, [])

In [247]:
def calc_landmark_list2(image, landmarks, nose_landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

      # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        #landmark_z = abs(landmark.z)
        landmark_point.append([landmark_x, landmark_y, nose_landmarks[0], nose_landmarks[1]])
    return landmark_point

In [7]:
def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

      # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        #landmark_z = abs(landmark.z)
        landmark_point.append([landmark_x, landmark_y])
    return landmark_point

In [248]:
def pre_process_landmark2(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)
  
      # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]
  
        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
        temp_landmark_list[index][2] = temp_landmark_list[index][0] - temp_landmark_list[index][2]
        temp_landmark_list[index][3] = temp_landmark_list[index][1] - temp_landmark_list[index][3]
      # Convert to a one-dimensional list
    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))
      
      # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))
  
    def normalize_(n):
        return n / max_value
  
    temp_landmark_list = list(map(normalize_, temp_landmark_list))
      
    return temp_landmark_list

In [8]:
def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)
  
      # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]
  
        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
       
    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))
      
      # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))
  
    def normalize_(n):
        return n / max_value
  
    temp_landmark_list = list(map(normalize_, temp_landmark_list))
      
    return temp_landmark_list

In [9]:
def extract_keypoints2(image, results):
    calc_lh = np.random.rand(21)
    calc_rh = np.random.rand(21)
    pre_lh = np.random.rand(84)
    pre_rh = np.random.rand(84)
    nose_landmark_x = 0
    nose_landmark_y = 0
    
    if results.pose_world_landmarks:
        nose_landmark_x = results.pose_world_landmarks.landmark[0].x
        nose_landmark_y = results.pose_world_landmarks.landmark[0].y
    nose_landmarks = [nose_landmark_x, nose_landmark_y]
    if results.left_hand_landmarks:
        calc_lh = calc_landmark_list(image, results.left_hand_landmarks, nose_landmarks)
        pre_lh = pre_process_landmark(calc_lh)
    if results.right_hand_landmarks:
        calc_rh = calc_landmark_list(image, results.right_hand_landmarks, nose_landmarks)
        pre_rh = pre_process_landmark(calc_rh)

    return np.concatenate((np.array(pre_lh), np.array(pre_rh)))


In [88]:
def extract_keypoints(image, results):
    calc_lh = np.random.rand(21)
    calc_rh = np.random.rand(21)
    pre_lh = np.random.rand(42)
    pre_rh = np.random.rand(42)
    nose_landmark_x = 0
    nose_landmark_y = 0
    rh_world_x = 0
    lh_world_x = 0
    rh_world_y = 0
    lh_world_y = 0
    
    if results.pose_world_landmarks:
        nose_landmark_x = results.pose_world_landmarks.landmark[0].x
        nose_landmark_y = results.pose_world_landmarks.landmark[0].y
        rh_world_x = results.pose_world_landmarks.landmark[16].x
        lh_world_x = results.pose_world_landmarks.landmark[15].x
        rh_world_y = results.pose_world_landmarks.landmark[16].y
        lh_world_y = results.pose_world_landmarks.landmark[15].y
    nose_landmarks = [nose_landmark_x, nose_landmark_y]
    hand_world_landmarks = [rh_world_x, rh_world_y, lh_world_x, lh_world_y]
    if results.left_hand_landmarks:
        calc_lh = calc_landmark_list(image, results.left_hand_landmarks)
        pre_lh = pre_process_landmark(calc_lh)
    if results.right_hand_landmarks:
        calc_rh = calc_landmark_list(image, results.right_hand_landmarks)
        pre_rh = pre_process_landmark(calc_rh)

    return np.concatenate((np.array(pre_lh), np.array(pre_rh), nose_landmarks, hand_world_landmarks))


In [35]:
def extract_keypoints42(image, results):
    calc_lh = np.random.rand(21)
    calc_rh = np.random.rand(21)
    pre_lh = np.random.rand(21)
    pre_rh = np.random.rand(21)
    nose_landmark_x = 0
    nose_landmark_y = 0
    rh_world_x = 0
    lh_world_x = 0
    rh_world_y = 0
    lh_world_y = 0
    
    if results.pose_world_landmarks:
        nose_landmark_x = results.pose_world_landmarks.landmark[0].x
        nose_landmark_y = results.pose_world_landmarks.landmark[0].y
        rh_world_x = results.pose_world_landmarks.landmark[16].x
        lh_world_x = results.pose_world_landmarks.landmark[15].x
        rh_world_y = results.pose_world_landmarks.landmark[16].y
        lh_world_y = results.pose_world_landmarks.landmark[15].y
    nose_landmarks = [nose_landmark_x, nose_landmark_y]
    hand_world_landmarks = [rh_world_x, rh_world_y, lh_world_x, lh_world_y]
    if results.left_hand_landmarks:
        calc_lh = calc_landmark_list(image, results.left_hand_landmarks)
        pre_lh = pre_process_landmark(calc_lh)
    if results.right_hand_landmarks:
        calc_rh = calc_landmark_list(image, results.right_hand_landmarks)
        pre_rh = pre_process_landmark(calc_rh)

    return np.concatenate((np.array(pre_lh), np.array(pre_rh), nose_landmarks, hand_world_landmarks))


In [36]:
result_test = extract_keypoints42(image, results)

In [37]:
result_test

array([ 0.        ,  0.        , -0.09836066,  0.16393443, -0.03278689,
        0.40983607,  0.09836066,  0.6557377 ,  0.18032787,  0.85245902,
        0.32786885,  0.19672131,  0.31147541,  0.60655738,  0.2295082 ,
        0.81967213,  0.16393443,  0.96721311,  0.50819672,  0.21311475,
        0.44262295,  0.63934426,  0.3442623 ,  0.85245902,  0.24590164,
        1.        ,  0.57377049,  0.24590164,  0.50819672,  0.6557377 ,
        0.40983607,  0.83606557,  0.32786885,  0.96721311,  0.55737705,
        0.31147541,  0.49180328,  0.62295082,  0.39344262,  0.7704918 ,
        0.32786885,  0.8852459 ,  0.        ,  0.        ,  0.32786885,
       -0.04918033,  0.63934426,  0.06557377,  0.80327869,  0.26229508,
        0.8852459 ,  0.44262295,  0.86885246,  0.04918033,  1.        ,
        0.44262295,  1.        ,  0.6557377 ,  0.96721311,  0.78688525,
        0.7704918 ,  0.1147541 ,  0.90163934,  0.54098361,  0.90163934,
        0.7704918 ,  0.86885246,  0.91803279,  0.63934426,  0.21

In [38]:

np.shape(result_test)

(90,)

In [124]:
np.save('0', result_test)

In [125]:
np.load('0.npy')

array([ 0.3835876 ,  0.47759178, -0.77978629, ...,  0.        ,
        0.        ,  0.        ])

# 4. Setup Folders for Collection

In [107]:
np.save('0', result_test)

In [80]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect
actions = np.array(['nothing', 'hey'])
#actions = np.array(['play', 'hey', 'record', 'feedback', 'stop', 'follow', 'nothing',
#                   'left', 'right', 'up', 'down', 'move'])

# Thirty videos worth of data
no_sequences = 5

# Videos are going to be 30 frames in length
sequence_length = 30


In [81]:
len(actions)

2

In [82]:
for action in actions: 
    dirmax = 0
    if os.path.exists(DATA_PATH) and len(np.array(os.listdir(DATA_PATH))) == len(actions):
        dirmax = np.max(np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int))
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(1+dirmax+sequence)))
        except:
            pass

In [14]:
# For adding a new actino to pre-existing dataset
action_to_add = 'move'
start_folder = 1
end_folder = 170
for sequence in range(start_folder,end_folder+1):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action_to_add, str(sequence)))
        except:
            pass

In [83]:
start_folder = dirmax if dirmax == 1 else dirmax+1

In [84]:
start_folder

1

In [15]:
start_folder

216

# 5. Collect Keypoint Values for Training and Testing

In [89]:
cap = cv2.VideoCapture(0)
ret, image = cap.read()
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    cont = True
    # NEW LOOP
    # Loop through actions
    for action in actions:
        if not cont: break
        cv2.putText(image, 'New sign get ready for: {}'.format(action.upper()), (120,200), 
        cv2.FONT_HERSHEY_SIMPLEX, 1, (255,29, 22), 4, cv2.LINE_AA)
        cv2.putText(image, 'Press "c" to continue'.format(action.upper()), (120,240), 
        cv2.FONT_HERSHEY_SIMPLEX, 1, (255,29, 22), 2, cv2.LINE_AA)
    # Show to screen
        cv2.imshow('OpenCV Feed', image)
        key = cv2.waitKey(100)
        while key != ord('c'):
            key = cv2.waitKey(10)
        cv2.waitKey(2000)
        # Loop through sequences aka videos
        for sequence in range(start_folder, start_folder+no_sequences):
            if not cont: break
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):
                if not cont: break

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # Draw landmarks
                draw_styled_landmarks(image, results)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'RESET for sign: {}'.format(action.upper()), (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)

                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(800)
                    
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(image, results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    cont = False
                    break

    cap.release()
    cv2.destroyAllWindows()

In [16]:
# For appending action to dataset
cap = cv2.VideoCapture(0)
ret, image = cap.read()
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    cont = True
    # NEW LOOP
    # Loop through actions
    action = action_to_add
    cv2.putText(image, 'New sign get ready for: {}'.format(action.upper()), (120,200), 
    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,29, 22), 4, cv2.LINE_AA)
    cv2.putText(image, 'Press "c" to continue'.format(action.upper()), (120,240), 
    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,29, 22), 2, cv2.LINE_AA)
# Show to screen
    cv2.imshow('OpenCV Feed', image)
    key = cv2.waitKey(100)
    while key != ord('c'):
        key = cv2.waitKey(10)
    cv2.waitKey(500)
    # Loop through sequences aka videos
    for sequence in range(start_folder, end_folder+1):
        if not cont: break
        # Loop through video length aka sequence length
        for frame_num in range(sequence_length):
            if not cont: break

            # Read feed
            ret, frame = cap.read()

            # Make detections
            image, results = mediapipe_detection(frame, holistic)

            # Draw landmarks
            draw_styled_landmarks(image, results)

            # NEW Apply wait logic
            if frame_num == 0: 
                cv2.putText(image, 'RESET for sign: {}'.format(action.upper()), (120,200), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)

                cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', image)
                cv2.waitKey(850)

            else: 
                cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', image)

            # NEW Export keypoints
            keypoints = extract_keypoints(image, results)
            npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
            np.save(npy_path, keypoints)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                cont = False
                break

    cap.release()
    cv2.destroyAllWindows()

In [86]:
cap.release()
cv2.destroyAllWindows()

# 6. Preprocess Data and Create Labels and Features

In [90]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [91]:
label_map = {label:num for num, label in enumerate(actions)}

In [92]:
label_map

{'nothing': 0, 'hey': 1}

In [93]:
sequences, labels = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [94]:
np.array(sequences).shape

(10, 30, 90)

In [95]:
np.array(labels).shape

(10,)

In [96]:
X = np.array(sequences)

In [97]:
X.shape

(10, 30, 90)

In [98]:
y = to_categorical(labels).astype(int)

In [99]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [100]:
y_test.shape

(2, 2)

# 7. Build and Train LSTM Neural Network

In [101]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [102]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [103]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(sequence_length,90)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [104]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [105]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, callbacks=[tb_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f37af998940>

In [None]:
model.summary()

# 8. Make Predictions

In [28]:
res = model.predict(X_test)

In [29]:
actions[np.argmax(res[4])]

'hello'

In [30]:
actions[np.argmax(y_test[4])]

'hello'

# 9. Save Weights

In [80]:
model.save('06131658.h5')

In [217]:
del model

In [13]:
model.load_weights('05221720.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [77]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [78]:
yhat = model.predict(X_test)



In [79]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [80]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[44,  0],
        [ 1,  3]],

       [[40,  1],
        [ 0,  7]],

       [[41,  1],
        [ 1,  5]],

       [[44,  0],
        [ 0,  4]],

       [[40,  0],
        [ 0,  8]],

       [[40,  0],
        [ 0,  8]],

       [[44,  0],
        [ 0,  4]],

       [[41,  0],
        [ 0,  7]]])

In [81]:
accuracy_score(ytrue, yhat)

0.9583333333333334

# 11. Test in Real Time

In [31]:
from scipy import stats

In [113]:
colors = [(245,117,16), (117,245,16), (16,117,245), (123,85,25), (11,42,180), (51,142,50), (75,75,50), 
          (200, 150, 200), (42, 42, 42), (80, 12, 200), (22, 200, 10), (255, 1, 225), (10, 180, 180)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [107]:
len(colors)

13

In [108]:
plt.figure(figsize=(18,18))
plt.imshow(prob_viz(res, actions, image, colors))

IndexError: index 2 is out of bounds for axis 0 with size 2

<Figure size 1296x1296 with 0 Axes>

In [112]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.65

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        #print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(image, results)
        sequence.append(keypoints)
        sequence = sequence[-sequence_length:]
        
        if len(sequence) == sequence_length:
            res = model.predict(np.expand_dims(sequence, axis=0), verbose=0)[0]
#             print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [110]:
cap.release()
cv2.destroyAllWindows()