# 1. Imports and Seed

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import random
import tensorflow as tf
import math
from numba import jit
import wandb

In [2]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices())

Num GPUs Available:  1
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [5]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    print(logical_gpus)
    print(gpus)
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPU
[LogicalDevice(name='/device:GPU:0', device_type='GPU')]
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [6]:
def seed_everything(seed):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  tf.random.set_seed(seed)

SEED = 22
seed_everything(seed=SEED)

# 2. Keypoints using MP Holistic

In [7]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [8]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [9]:
def draw_landmarks(image, results):
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [10]:
def draw_styled_landmarks(image, results):
    # # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

# 3. Extract Keypoint Values

In [11]:
# Original extracts
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# Augment flip
def extract_keypoints_flip(results):
    pose = np.array([[1-res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[1-res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[1-res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[1-res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# def a(results):    
#     pose = []
#     if results.pose_landmarks:
#         for res in results.pose_landmarks.landmark:
#             test = np.array([res.x, res.y, res.z, res.visibility])
#             pose.append(test)
#     else:
#         pose.append(np.zeros(33*4))

# Augment translation
def extract_keypoints_trans(results, transx, transy):
    pose = np.array([[res.x + transx, res.y + transy, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x + transx, res.y + transy, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x + transx, res.y + transy, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x + transx, res.y + transy, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])


# 4. Collect Keypoint Values

In [12]:
ASL_ORI_PATH = 'videos'
ASL_MP_PATH = 'data'

actions = os.listdir(ASL_ORI_PATH)
augments = ['original', 'flip']

for i in range(8):
  augments.append('trans{}'.format(i))

if not os.path.exists(ASL_MP_PATH):
  print("create dir")
  os.makedirs(os.path.join(ASL_MP_PATH))
else:
  print("exists")

exists


In [13]:
for action in actions:
  for augment in augments:
    try:
      os.makedirs(os.path.join(ASL_MP_PATH, action, augment))
      
      lendir = len((os.listdir(os.path.join(ASL_ORI_PATH, action))))
      for video in range(1, lendir+1):
        try:
          os.makedirs(os.path.join(ASL_MP_PATH, action, augment, str(video)))
        except:
          pass
    except:
      pass

In [14]:
max_frames = 64

In [15]:
seed_everything(seed=SEED)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
  
  for action in actions:
    print("=" * 20)
    print(action)
    video_dir_list = os.listdir(os.path.join(ASL_ORI_PATH, action))
    
    for video_idx, video in enumerate(video_dir_list):
      cap = cv2.VideoCapture(os.path.join(ASL_ORI_PATH, action, video))
      len_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
      
      if len_video > max_frames: # if video longer
        seed_everything(seed=SEED)
        sample_frames = sorted(random.sample(range(0, len_video), max_frames))
      else: # shorter
        diff = max_frames - len_video
        begin = [1 for i in range(math.floor(diff/2))] # repeat first frame 
        mid = [i+1 for i in range(len_video)] # real frames
        end = [len_video for i in range(max_frames - len(begin) - len(mid))] # repeat rest of end frames until 64 frames
        sample_frames = begin + mid + end
        
      # Read sample 64 frames
      for frame_idx, frame_num in enumerate(sample_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num-1)
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        ori_keypoints = extract_keypoints(results)
        npy_path = os.path.join(ASL_MP_PATH, action, 'original', str(video_idx+1), str(frame_idx))
        np.save(npy_path, ori_keypoints)
        
        # Augment flip
        flip_keypoints = extract_keypoints_flip(results)
        npy_path = os.path.join(ASL_MP_PATH, action, 'flip', str(video_idx+1), str(frame_idx))
        np.save(npy_path, flip_keypoints)
        
        # Augment translation
        trans = 0.2
        trans_list = [[trans, 0], [0, trans], [-trans, 0], [0, -trans], [trans, trans], [-trans, trans], [-trans, -trans], [trans, -trans]]
        for idx, item in enumerate(trans_list):
          trans_keypoints = extract_keypoints_trans(results, item[0], item[1])
          npy_path = os.path.join(ASL_MP_PATH, action, 'trans{}'.format(idx), str(video_idx+1), str(frame_idx))
          np.save(npy_path, trans_keypoints)
        
                
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
          break
  cap.release()

      

deaf


# 6. Preprocess Data and Create Labels and Features

In [15]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [16]:
label_map = {label:num for num, label in enumerate(actions)}

In [17]:
label_map

{'deaf': 0,
 'fine': 1,
 'good_morning': 2,
 'hearing': 3,
 'hello': 4,
 'i_love_you': 5,
 'nice_to_meet_you': 6,
 'no': 7,
 'thanks': 8,
 'yes': 9}

In [18]:
import re
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)

## a. Original Data

In [19]:
features, labels = [], []
for action in actions:
  for augment in augments:
    if augment == "original":
      video_dir_list = os.listdir(os.path.join(ASL_MP_PATH, action, augment))
    
      print('=================')
      print(action, augment)  
      for index, video in enumerate(video_dir_list):
        window = []
        # print('video:', video)
        
        frame_list = sorted_alphanumeric(os.listdir(os.path.join(ASL_MP_PATH, action, augment, video)))
            
        # Append frames
        for frame_num in frame_list:
          res = np.load(os.path.join(ASL_MP_PATH, action, augment, video, frame_num))
          window.append(res.tolist())
        features.append(window)
        labels.append(label_map[action])

deaf original
fine original
good_morning original
hearing original
hello original
i_love_you original
nice_to_meet_you original
no original
thanks original
yes original


In [20]:
np.array(features).shape

(319, 64, 1662)

In [21]:
np.array(labels).shape

(319,)

In [24]:
X = np.array(features)
X.shape

(319, 64, 1662)

In [25]:
y = to_categorical(labels).astype(int)
y.shape

(319, 10)

In [26]:
np.save('Xy/X-ori', X)

In [27]:
np.save('Xy/y-ori', y)

In [72]:
# X = np.load('Xy/X.npy')
# y = np.load('Xy/y.npy')

In [28]:
seed_everything(seed=SEED)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, stratify=y, random_state=SEED)

In [29]:
seed_everything(seed=SEED)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.075, random_state=SEED)

In [30]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print(y_train.shape)

(280, 64, 1662)
(23, 64, 1662)
(16, 64, 1662)
(280, 10)


## b. Augment Data

In [28]:
aug_features, aug_labels = [], []
for action in actions:
  for augment in augments:
    if augment != "original":
      video_dir_list = os.listdir(os.path.join(ASL_MP_PATH, action, augment))
    
      print('=================')
      print(action, augment)  
      for index, video in enumerate(video_dir_list):
        window = []
        # print('video:', video)
        
        frame_list = sorted_alphanumeric(os.listdir(os.path.join(ASL_MP_PATH, action, augment, video)))
            
        # Append frames
        for frame_num in frame_list:
          # print('frame:', frame_num)
          res = np.load(os.path.join(ASL_MP_PATH, action, augment, video, frame_num))
          window.append(res.tolist())
        aug_features.append(window)
        aug_labels.append(label_map[action])

deaf flip
deaf trans0
deaf trans1
deaf trans2
deaf trans3
deaf trans4
deaf trans5
deaf trans6
deaf trans7
fine flip
fine trans0
fine trans1
fine trans2
fine trans3
fine trans4
fine trans5
fine trans6
fine trans7
good_morning flip
good_morning trans0
good_morning trans1
good_morning trans2
good_morning trans3
good_morning trans4
good_morning trans5
good_morning trans6
good_morning trans7
hearing flip
hearing trans0
hearing trans1
hearing trans2
hearing trans3
hearing trans4
hearing trans5
hearing trans6
hearing trans7
hello flip
hello trans0
hello trans1
hello trans2
hello trans3
hello trans4
hello trans5
hello trans6
hello trans7
i_love_you flip
i_love_you trans0
i_love_you trans1
i_love_you trans2
i_love_you trans3
i_love_you trans4
i_love_you trans5
i_love_you trans6
i_love_you trans7
nice_to_meet_you flip
nice_to_meet_you trans0
nice_to_meet_you trans1
nice_to_meet_you trans2
nice_to_meet_you trans3
nice_to_meet_you trans4
nice_to_meet_you trans5
nice_to_meet_you trans6
nice_to_meet

In [29]:
X_aug = np.array(aug_features)
X_aug.shape

(2871, 64, 1662)

In [30]:
y_aug = to_categorical(aug_labels).astype(int)
y_aug.shape

(2871, 10)

In [31]:
np.save('Xy/X-9aug-all', X_aug)
np.save('Xy/y-9aug-all', y_aug)

### Only take augmented train data

In [23]:
len(augments)

10

In [33]:
datalen = 319
trainlen = 280

X_aug_train = np.zeros(shape=(9*trainlen, 64, 1662))
y_aug_train = np.zeros(shape=(9*trainlen, 10))
print(X_aug_train.shape)

for i in range(len(augments) - 1):
  cur_x = X_aug[i*datalen:(i+1)*datalen]
  cur_y = y_aug[i*datalen:(i+1)*datalen]
  # print(i, cur_x.shape, cur_y.shape)
  
  # Only take X_aug_train
  seed_everything(seed=SEED)
  X_aug_train_tmp, X_aug_test_tmp, y_aug_train_tmp, y_aug_test_tmp = train_test_split(cur_x, cur_y, test_size=0.05, stratify=cur_y, random_state=SEED)
  seed_everything(seed=SEED)
  X_aug_train_tmp, X_aug_val_tmp, y_aug_train_tmp, y_aug_val_tmp = train_test_split(X_aug_train_tmp, y_aug_train_tmp, stratify=y_aug_train_tmp, test_size=0.075, random_state=SEED)
  
  if i == 0:
    X_aug_train = X_aug_train[i*trainlen:(i+1)*trainlen]
    y_aug_train = y_aug_train[i*trainlen:(i+1)*trainlen]
  else:
    X_aug_train = np.vstack((X_aug_train, X_aug_train_tmp))
    y_aug_train = np.vstack((y_aug_train, y_aug_train_tmp))
  
  print(i, X_aug_train.shape)

(2520, 64, 1662)
0 (280, 64, 1662)
1 (560, 64, 1662)
2 (840, 64, 1662)
3 (1120, 64, 1662)
4 (1400, 64, 1662)
5 (1680, 64, 1662)
6 (1960, 64, 1662)
7 (2240, 64, 1662)
8 (2520, 64, 1662)


In [34]:
np.save('Xy/X-9aug', X_aug_train)
np.save('Xy/y-9aug', y_aug_train)

In [35]:
# X_aug_train = np.load('Xy/X-9aug.npy')
# y_aug_train = np.load('Xy/y-9aug.npy')

In [36]:
X_train = np.append(X_train, X_aug_train, 0)
X_train.shape

(2800, 64, 1662)

In [37]:
y_train = np.append(y_train, y_aug_train, 0)
y_train.shape

(2800, 10)

# 7. Build and Train LSTM Neural Network

In [31]:
len(actions)

10

In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow import keras
from wandb.keras import WandbCallback
import wandb

In [33]:
# del model

In [34]:
seed_everything(seed=SEED)

wandb.init(
  project="bidirectional-LSTM",
  entity="richardsonqiu", 
  config={
    "lstm_1": 64,
    "layer_1": 64,
    "act_1": "relu",
    
    "lstm_2": 64,
    "dropout_2": 0.1,
    "layer_2": 64,
    "act_2": "relu",
        
    "lstm_3": 32,
    "dropout_3": 0.1,
    
    "last_layer": len(actions),
    "last_act": "softmax",
    
    "optimizer": "adam",
    "init_lr": 0.01,
    "loss": "categorical_crossentropy",
    "metric": "accuracy",
    "epoch": 1000,
    "batch_size": 32  
    })
config = wandb.config

model = Sequential()
model.add(Bidirectional(LSTM(config.lstm_1, return_sequences=True), input_shape=(max_frames, 1662)))
model.add(Dense(config.layer_1, activation=config.act_1))

model.add(Bidirectional(LSTM(config.lstm_2, return_sequences=True, dropout=config.dropout_2)))
model.add(Dense(config.layer_2, activation=config.act_2))

model.add(Bidirectional(LSTM(config.lstm_3, return_sequences=False, dropout=config.dropout_3)))
model.add(Dense(config.last_layer, activation=config.last_act))

opt = keras.optimizers.Adam(learning_rate=config.init_lr)
model.compile(optimizer=opt, loss=config.loss, metrics=config.metric)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrichardsonqiu[0m (use `wandb login --relogin` to force relogin)


In [35]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 64, 128)          884224    
 l)                                                              
                                                                 
 dense (Dense)               (None, 64, 64)            8256      
                                                                 
 bidirectional_1 (Bidirectio  (None, 64, 128)          66048     
 nal)                                                            
                                                                 
 dense_1 (Dense)             (None, 64, 64)            8256      
                                                                 
 bidirectional_2 (Bidirectio  (None, 64)               24832     
 nal)                                                            
                                                        

In [36]:
TRIAL = 24
log_dir = os.path.join('Logs/{}'.format(TRIAL))
tb_callback = TensorBoard(log_dir=log_dir)
wandb_callback = WandbCallback(log_evaluation=True)

es_callback = EarlyStopping(monitor='val_loss', patience=20)
lr_callback = ReduceLROnPlateau(monitor='val_loss', patience=100, factor=0.5, min_lr=0.0001)

In [37]:
seed_everything(seed=SEED)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=config.epoch, callbacks=[wandb_callback])

wandb.finish()

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

KeyboardInterrupt: 

# 8. Make Predictions

In [72]:
res = model.predict(X_test)

In [73]:
actions[np.argmax(res[4])]

'hello'

In [74]:
actions[np.argmax(y_test[4])]

'thanks'

# 9. Save Weights

In [75]:
model.save('sign{}.h5'.format(TRIAL))

In [49]:
del model

In [56]:
model.load_weights('sign{}.h5'.format(TRIAL))

# 10. Evaluation using Confusion Matrix and Accuracy

In [76]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [77]:
yhat = model.predict(X_test)

In [78]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [79]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[14,  0],
        [ 2,  0]],

       [[14,  1],
        [ 1,  0]],

       [[15,  0],
        [ 1,  0]],

       [[13,  1],
        [ 2,  0]],

       [[13,  1],
        [ 0,  2]],

       [[15,  0],
        [ 1,  0]],

       [[13,  2],
        [ 0,  1]],

       [[11,  3],
        [ 2,  0]],

       [[13,  1],
        [ 2,  0]],

       [[11,  3],
        [ 1,  1]]], dtype=int64)

In [61]:
accuracy_score(ytrue, yhat)

0.9375

# Test Real Time

In [62]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [63]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-max_frames:]
        
        if len(sequence) == max_frames:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

IndexError: list index out of range