In [1]:
import tensorflow as tf
import os
import random
import numpy as np

def seed_everything(seed):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  tf.random.set_seed(seed)

SEED = 22
seed_everything(seed=SEED)

In [2]:
from functools import reduce
from itertools import accumulate

landmark_lens = (
    (33, 4),
    (468, 3),
    (21, 3),
    (21, 3)
)
landmark_locs = list(accumulate(landmark_lens, lambda a, b: a + b[0]*b[1], initial=0))
landmarks_len = reduce(lambda r, loc: r + loc[0] * loc[1], landmark_lens, 0)
print(landmark_locs, landmarks_len)

[0, 132, 1536, 1599, 1662] 1662


In [3]:
# data_folder = 'tracks_binary_manual'
data_folder = 'tracks_binary'
labels = [label for label in os.listdir(data_folder) if os.path.isdir(f'{data_folder}/{label}')]
NUM_CLASSES = len(labels)

labels_tensor = tf.constant(labels)
ids_tensor = tf.constant(range(len(labels)))

ids_from_labels = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(
        labels_tensor,
        ids_tensor
    ),
    default_value=-1
)

labels_from_ids = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(
        ids_tensor,
        labels_tensor
    ),
    default_value=""
)

def to_categorical(label):
    return tf.one_hot(
        ids_from_labels.lookup(label),
        depth=NUM_CLASSES
    )

In [4]:
def process_binary(file_path):
    label = tf.strings.split(file_path, os.sep)[-2]

    raw = tf.io.read_file(file_path)
    data = tf.io.decode_raw(raw, tf.float32)
    data = tf.reshape(data, [-1, landmarks_len])

    pose = tf.reshape(data[:, 0:132], [-1, 33, 4])
    # lh = tf.reshape(data[:, 132:195], [-1, 21, 3])
    # rh = tf.reshape(data[:, 195:258], [-1, 21, 3])
    
    face = tf.reshape(data[:, 132:1536], [-1, 468, 3])
    lh = tf.reshape(data[:, 1536:1599], [-1, 21, 3])
    rh = tf.reshape(data[:, 1599:1662], [-1, 21, 3])
    
    # without z
    new_pose = tf.concat((pose[:, :, :2], pose[:, :, -1:]), axis=2)
    new_lh = lh[:, :, :2]
    new_rh = rh[:, :, :2]

    return (new_pose, new_lh, new_rh), to_categorical(label)
    # return (pose, face, lh, rh), to_categorical(label)

In [5]:
from tensorflow import reduce_max, reduce_min

FRAMES = 64

def flatten(x):
    pose = tf.reshape(x[0], shape=[-1, 33*3])
    # face = tf.reshape(x[1], shape=[-1, 1404])
    lh = tf.reshape(x[1], shape=[-1, 21*2])
    rh = tf.reshape(x[2], shape=[-1, 21*2])
    return tf.concat([pose, lh, rh], axis=1)


def random_window(x):
    def pad(x):
        missing = FRAMES - size
        start_pad = tf.math.ceil(missing / 2)
        end_pad = tf.math.floor(missing / 2)
        return tf.concat([
            tf.tile([x[0]], [start_pad, 1]),
            x,
            tf.tile([x[-1]], [end_pad, 1])
        ], axis=0)

    def random_slice(x):
        i = tf.random.uniform(shape=(), maxval=size+1-FRAMES, dtype=tf.int32)
        return x[i: i+FRAMES]

    size = tf.shape(x)[0]
    print(size)
    return tf.cond(
        size < FRAMES,
        lambda: pad(x),
        lambda: random_slice(x)
    )
    
def calc_bounding(pose, lh, rh):
    max_x = reduce_max(tf.stack([reduce_max(pose[:, :, :1]), reduce_max(lh[:, :, :1]), reduce_max(rh[:, :, :1])], axis=0))
    min_x = reduce_min(tf.stack([reduce_min(pose[:, :, :1]), reduce_min(lh[:, :, :1]), reduce_min(rh[:, :, :1])], axis=0))
    
    max_y = reduce_max(tf.stack([reduce_max(pose[:, :, 1:2]), reduce_max(lh[:, :, 1:2]), reduce_max(rh[:, :, 1:2])], axis=0))
    min_y = reduce_min(tf.stack([reduce_min(pose[:, :, 1:2]), reduce_min(lh[:, :, 1:2]), reduce_min(rh[:, :, 1:2])], axis=0))

    window = tf.cast((max_x - min_x, max_y - min_y), dtype=tf.float32)
    mid = ((max_x + min_x)/2, (max_y + min_y)/2)
    return (window, mid)

def scale(x, factor):
    pose, lh, rh = x[0], x[1], x[2]
    window, mid = calc_bounding(pose, lh, rh)
    scale = factor * window
    pose_shape, lh_shape, rh_shape = tf.shape(pose), tf.shape(lh), tf.shape(rh)
    
    pose_center = tf.tile([[[mid[0], mid[1], 0]]], [pose_shape[0], pose_shape[1], 1])
    lh_center = tf.tile([[[mid[0], mid[1]]]], [lh_shape[0], lh_shape[1], 1])
    rh_center = tf.tile([[[mid[0], mid[1]]]], [rh_shape[0], rh_shape[1], 1])
    
    pose_scale = tf.tile([[[scale[0], scale[1], 1]]], [pose_shape[0], pose_shape[1], 1])
    lh_scale = tf.tile([[[scale[0], scale[1]]]], [lh_shape[0], lh_shape[1], 1])
    rh_scale = tf.tile([[[scale[0], scale[1]]]], [rh_shape[0], rh_shape[1], 1])
    
    scaled_pose = pose_center + (pose - pose_center) * pose_scale
    scaled_lh = lh_center + (lh - lh_center) * lh_scale
    scaled_rh = rh_center + (rh - rh_center) * rh_scale

    return (scaled_pose, scaled_lh, scaled_rh)


def random_translation(x):
    pose, lh, rh = x[0], x[1], x[2]
    magnitude = tf.random.uniform(shape=[2], minval=-0.25, maxval=0.25)
    pose_shape, lh_shape, rh_shape = tf.shape(pose), tf.shape(lh), tf.shape(rh)
    
    pose_trans = tf.tile([[[magnitude[0], magnitude[1], 0]]], [pose_shape[0], pose_shape[1], 1])
    lh_trans = tf.tile([[[magnitude[0], magnitude[1]]]], [lh_shape[0], lh_shape[1], 1])
    rh_trans = tf.tile([[[magnitude[0], magnitude[1]]]], [rh_shape[0], rh_shape[1], 1])

    return (pose+pose_trans, lh+lh_trans, rh+rh_trans)
    
def flip(x):
    pose, lh, rh = x[0], x[1], x[2]
    pose_shape, lh_shape, rh_shape = tf.shape(pose), tf.shape(lh), tf.shape(rh)
    
    pose_neg = tf.tile([[[-1.0, 1, 1]]], [pose_shape[0], pose_shape[1], 1])
    lh_neg = tf.tile([[[-1.0, 1]]], [lh_shape[0], lh_shape[1], 1])
    rh_neg = tf.tile([[[-1.0, 1]]], [rh_shape[0], rh_shape[1], 1])
    
    pose_trans = tf.tile([[[1.0, 0, 0]]], [pose_shape[0], pose_shape[1], 1])
    lh_trans = tf.tile([[[1.0, 0]]], [lh_shape[0], lh_shape[1], 1])
    rh_trans = tf.tile([[[1.0, 0]]], [rh_shape[0], rh_shape[1], 1])

    flipped_pose = pose_trans + pose * pose_neg
    flipped_lh = lh_trans + lh * lh_neg
    flipped_rh = rh_trans + rh * rh_neg
    
    return (flipped_pose, flipped_lh, flipped_rh)
    
def prepare(ds, shuffle=False, augment=False):
    if augment:
        ds = ds.map(lambda x, y: (random_translation(x), y), num_parallel_calls=tf.data.AUTOTUNE)
        ds = ds.map(lambda x, y: (scale(x, 0.1), y), num_parallel_calls=tf.data.AUTOTUNE)
        ds = ds.map(lambda x, y: (flip(x), y), num_parallel_calls=tf.data.AUTOTUNE)
               
    ds = ds.map(lambda x, y: (flatten(x), y), num_parallel_calls=tf.data.AUTOTUNE)

    ds = ds.map(lambda x, y: (random_window(x), y), num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        ds = ds.shuffle(1000, seed=SEED, reshuffle_each_iteration=False)
        
    ds = ds.batch(32)

    return ds.prefetch(buffer_size=tf.data.AUTOTUNE)

In [6]:
a = tf.data.Dataset.list_files(f'{data_folder}/*/*')
a = a.map(process_binary)
# a = a.map(lambda x, y: (concat_joints(x), y), num_parallel_calls=tf.data.AUTOTUNE)
# a = prepare(a)
a = a.map(lambda x, y: (flatten(x), y), num_parallel_calls=tf.data.AUTOTUNE)

a = a.map(lambda x, y: (random_window(x), y), num_parallel_calls=tf.data.AUTOTUNE)

a

Tensor("strided_slice:0", shape=(), dtype=int32)


<ParallelMapDataset shapes: ((None, 183), (10,)), types: (tf.float32, tf.float32)>

In [7]:
for i in a.take(1):
  print(i)

(<tf.Tensor: shape=(64, 183), dtype=float32, numpy=
array([[0.58130395, 0.300053  , 0.9995698 , ..., 0.        , 0.        ,
        0.        ],
       [0.58130395, 0.300053  , 0.9995698 , ..., 0.        , 0.        ,
        0.        ],
       [0.58130395, 0.300053  , 0.9995698 , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.567169  , 0.25922188, 0.99978614, ..., 0.        , 0.        ,
        0.        ],
       [0.567169  , 0.25922188, 0.99978614, ..., 0.        , 0.        ,
        0.        ],
       [0.567169  , 0.25922188, 0.99978614, ..., 0.        , 0.        ,
        0.        ]], dtype=float32)>, <tf.Tensor: shape=(10,), dtype=float32, numpy=array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.], dtype=float32)>)


In [6]:
def get_ds_split(ds, ds_size, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=1000):
  assert (train_split + test_split + val_split) == 1
  
  if shuffle:
    ds = ds.shuffle(shuffle_size, seed=SEED, reshuffle_each_iteration=False)
  
  train_size = int(train_split * ds_size)
  val_size = int(val_split * ds_size)
  
  train_ds = ds.take(train_size)
  val_ds = ds.skip(train_size).take(val_size)
  test_ds = ds.skip(train_size).skip(val_size)
  
  return train_ds, val_ds, test_ds

In [7]:
from sklearn.model_selection import train_test_split
import shutil

# Split dataset into folders
def split_dataset(train_size=0.8, val_size=0.2):
    filenames = tf.io.matching_files(f"{data_folder}/*/*")
    filenames = tf.random.shuffle(filenames, seed=SEED)

    video = filenames.numpy()
    label = tf.strings.split(filenames, os.sep)[:, 1:2].flat_values.numpy()

    video_train, video_test, label_train, label_test = train_test_split(video, label, train_size=train_size, test_size=val_size)

    shutil.rmtree('dataset', ignore_errors=True)
    
    for video_path, label in zip(video_train, label_train):
        dest = os.path.join(b'dataset', b'train', label)
        os.makedirs(dest, exist_ok=True)
        shutil.copy2(video_path, dest)

    for video_path, label in zip(video_test, label_test):
        dest = os.path.join(b'dataset', b'test', label)
        os.makedirs(dest, exist_ok=True)
        shutil.copy2(video_path, dest)

In [8]:
split_dataset()

In [9]:
train_ds = tf.data.Dataset.list_files('dataset/train/*/*')
train_ds = train_ds.map(process_binary)
train_ds = prepare(train_ds)

test_ds = tf.data.Dataset.list_files('dataset/test/*/*')
test_ds = test_ds.map(process_binary)
test_ds = prepare(test_ds)

Tensor("strided_slice:0", shape=(), dtype=int32)
Tensor("strided_slice:0", shape=(), dtype=int32)


In [10]:
# isAugment = False

# ds = tf.data.Dataset.list_files(f'{data_folder}/*/*')
# ds = ds.map(process_binary)
# train_ds, val_ds, test_ds = get_ds_split(ds, len(ds))

# train_ds = prepare(train_ds, augment=isAugment)
# val_ds = prepare(val_ds)
# test_ds = prepare(test_ds)

In [11]:
print(train_ds)
print(test_ds)
# print(len(train_ds), len(val_ds), len(test_ds))
print(len(train_ds), len(test_ds))

<PrefetchDataset shapes: ((None, None, 183), (None, 10)), types: (tf.float32, tf.float32)>
<PrefetchDataset shapes: ((None, None, 183), (None, 10)), types: (tf.float32, tf.float32)>
8 2


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow import keras
from wandb.keras import WandbCallback
import wandb

In [13]:
isAugment = False

In [None]:
wandb.init(
  project="HCN",
  entity="richardsonqiu",
  config={
    "input_shape": (FRAMES, 183),
    
    "lstm_1": 40,
    "layer_1": 40,
    "act_1": "relu",
    
    "lstm_2": 50,
    # "dropout_2": 0.1,
    "layer_2": 50,
    "act_2": "relu",
        
    "lstm_3": 80,
    "dropout_3": 0.2,
    
    "last_layer": NUM_CLASSES,
    "last_act": "softmax",
    
    "optimizer": "adam",
    "init_lr": 0.01,
    "loss": "categorical_crossentropy",
    "metric": "accuracy",
    "epoch": 500,
    "batch_size": 32,
    "data": "default",
    "landmarks": "pose, lh, rh",
    "landmarks_metadata": "without z",
    "augment": isAugment
    })
config = wandb.config

model = Sequential()
model.add(Bidirectional(LSTM(config.lstm_1, return_sequences=True), input_shape=(config.input_shape[0], config.input_shape[1])))
model.add(Dense(config.layer_1, activation=config.act_1))
model.add(Bidirectional(LSTM(config.lstm_2, return_sequences=True)))
model.add(Dense(config.layer_2, activation=config.act_2))
model.add(Bidirectional(LSTM(config.lstm_3, return_sequences=False, dropout=config.dropout_3)))
model.add(Dense(config.last_layer, activation=config.last_act))

In [19]:
# wandb.init(
#   project="bi-LSTM",
#   entity="richardsonqiu", 
#   config={
#     "input_shape": (FRAMES, 183),
    
#     "lstm_1": 40,
#     "layer_1": 40,
#     "act_1": "relu",
    
#     "lstm_2": 50,
#     # "dropout_2": 0.1,
#     "layer_2": 50,
#     "act_2": "relu",
        
#     "lstm_3": 80,
#     "dropout_3": 0.2,
    
#     "last_layer": NUM_CLASSES,
#     "last_act": "softmax",
    
#     "optimizer": "adam",
#     "init_lr": 0.01,
#     "loss": "categorical_crossentropy",
#     "metric": "accuracy",
#     "epoch": 500,
#     "batch_size": 32,
#     "data": "default",
#     "landmarks": "pose, lh, rh",
#     "landmarks_metadata": "without z",
#     "augment": isAugment
#     })
# config = wandb.config

# model = Sequential()
# model.add(Bidirectional(LSTM(config.lstm_1, return_sequences=True), input_shape=(config.input_shape[0], config.input_shape[1])))
# model.add(Dense(config.layer_1, activation=config.act_1))
# model.add(Bidirectional(LSTM(config.lstm_2, return_sequences=True)))
# model.add(Dense(config.layer_2, activation=config.act_2))
# model.add(Bidirectional(LSTM(config.lstm_3, return_sequences=False, dropout=config.dropout_3)))
# model.add(Dense(config.last_layer, activation=config.last_act))

VBox(children=(Label(value=' 17.01MB of 17.01MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
accuracy,▁▁▂▂▃▄▅▅▅▆▆▆▆▇▇▆▇▇▇▇▇█▇▇▇▇▇▇▇█▆▇█▇▇▇▆▇▇▇
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,█▇▇▆▆▅▄▄▄▃▃▃▂▂▂▂▂▁▂▂▂▁▂▁▂▂▁▁▁▁▁▂▁▁▂▂▂▂▂▂
lr,███▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▂▁▁▁▃▅▃▂▄▅▃▄▅▅▅█▆▇▅▅▃▅▃▆▃▃▅█▅▅▇▄▆▆▇█▅▆▇▆
val_loss,▃▂▄▅▄▃▄█▅▆▄▆▇▃▃▃▄▄▃▄▅▅▅▂▃▄▄▃▅▂▃▄▃▂▄▁▃▃▃▃

0,1
accuracy,0.38039
best_epoch,355.0
best_val_loss,2.12573
epoch,499.0
loss,1.63779
lr,0.0
val_accuracy,0.21875
val_loss,2.3643


[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [20]:
if config.optimizer == "adam":
  opt = keras.optimizers.Adam(learning_rate=config.init_lr)
elif config.optimizer == "sgd":
  opt = keras.optimizers.SGD(learning_rate=config.init_lr, nesterov=True)
  
model.compile(optimizer=opt, loss=config.loss, metrics=[config.metric])

In [21]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_3 (Bidirectio  (None, 64, 80)           71680     
 nal)                                                            
                                                                 
 dense_3 (Dense)             (None, 64, 40)            3240      
                                                                 
 bidirectional_4 (Bidirectio  (None, 64, 100)          36400     
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 64, 50)            5050      
                                                                 
 bidirectional_5 (Bidirectio  (None, 160)              83840     
 nal)                                                            
                                                      

In [22]:
es_callback = EarlyStopping(monitor='val_loss', patience=20)
lr_callback = ReduceLROnPlateau(monitor='val_loss', patience=20, factor=0.5, min_lr=1e-6)
wandb_callback = WandbCallback(log_evaluation=True)

In [23]:
# history = model.fit(train_ds, validation_data=val_ds, epochs=config.epoch, callbacks=[lr_callback, wandb_callback])
history = model.fit(train_ds, validation_data=test_ds, epochs=config.epoch, callbacks=[lr_callback, wandb_callback])



Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [None]:
landmarks_len

258

In [49]:
FRAMES

64

In [42]:
# model.load_weights('models/chocolate-salad.h5')

## Evaluate Model

In [44]:
test_ds

<PrefetchDataset shapes: ((None, None, 183), (None, 10)), types: (tf.float32, tf.float32)>

In [52]:
y_pred = []  # store predicted labels
y_true = []  # store true labels

# iterate over the dataset
for image_batch, label_batch in test_ds:   # use dataset.unbatch() with repeat
   # append true labels
   y_true.append(label_batch)
   # compute predictions
   preds = model.predict(image_batch)
   # append predicted labels
   y_pred.append(np.argmax(preds, axis = - 1))

# convert the true and predicted labels into tensors
correct_labels = tf.argmax(tf.concat([item for item in y_true], axis = 0), axis=1)
predicted_labels = tf.concat([item for item in y_pred], axis = 0)

In [53]:
correct_labels

<tf.Tensor: shape=(33,), dtype=int64, numpy=
array([0, 4, 2, 0, 9, 3, 7, 4, 4, 2, 5, 7, 8, 9, 3, 7, 8, 2, 8, 8, 6, 4,
       8, 3, 0, 9, 2, 0, 7, 2, 7, 9, 3], dtype=int64)>

In [54]:
predicted_labels

<tf.Tensor: shape=(33,), dtype=int64, numpy=
array([3, 4, 2, 0, 9, 3, 7, 4, 4, 2, 9, 0, 8, 9, 3, 7, 8, 2, 8, 8, 6, 4,
       8, 3, 0, 9, 2, 0, 7, 9, 7, 9, 3], dtype=int64)>

In [55]:
from sklearn.metrics import classification_report

print(classification_report(correct_labels, predicted_labels))

              precision    recall  f1-score   support

           0       0.75      0.75      0.75         4
           2       1.00      0.80      0.89         5
           3       0.80      1.00      0.89         4
           4       1.00      1.00      1.00         4
           5       0.00      0.00      0.00         1
           6       1.00      1.00      1.00         1
           7       1.00      0.80      0.89         5
           8       1.00      1.00      1.00         5
           9       0.67      1.00      0.80         4

    accuracy                           0.88        33
   macro avg       0.80      0.82      0.80        33
weighted avg       0.87      0.88      0.87        33



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Test in Real Time

In [96]:
import mediapipe as mp
import matplotlib.pyplot as plt

mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results
  
def draw_styled_landmarks(image, results):
    # # Draw face connections
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
    #                          mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
    #                          mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    #                          ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [97]:
import cv2
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [70]:
# colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, labels, input_frame):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), (16,117,245), -1)
        cv2.putText(output_frame, labels[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [85]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    # face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*2)
    rh = np.array([[res.x, res.y] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*2)
    return np.concatenate([pose, lh, rh])

In [92]:
extract_keypoints(results)

array([ 3.77938807e-01,  6.12750232e-01,  9.99868512e-01,  4.06760544e-01,
        5.59190691e-01,  9.99771535e-01,  4.22686428e-01,  5.59526205e-01,
        9.99799371e-01,  4.33141530e-01,  5.60620964e-01,  9.99741375e-01,
        3.51988286e-01,  5.59091389e-01,  9.99775529e-01,  3.35909486e-01,
        5.59482276e-01,  9.99814749e-01,  3.21899831e-01,  5.61391056e-01,
        9.99787807e-01,  4.49979484e-01,  5.75336993e-01,  9.99755383e-01,
        3.10312897e-01,  5.82234025e-01,  9.99865532e-01,  4.10654008e-01,
        6.69661939e-01,  9.99866188e-01,  3.43995690e-01,  6.70501947e-01,
        9.99899507e-01,  5.67868054e-01,  8.57343435e-01,  9.98632193e-01,
        2.06030875e-01,  8.38915288e-01,  9.99098778e-01,  6.03285670e-01,
        1.02711034e+00,  2.05536246e-01,  6.01150170e-02,  9.84925926e-01,
        6.24212265e-01,  6.24025524e-01,  1.36145413e+00,  5.17988503e-02,
       -5.26338443e-02,  1.25665522e+00,  1.53916195e-01,  6.33318186e-01,
        1.43683290e+00,  

In [87]:
actions = labels

In [113]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.85

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-64:] # last 32 frames
        
        if len(sequence) == 64:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0] == np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 4: 
                sentence = sentence[-4:]

            # Viz probabilities
            image = prob_viz(res, actions, image)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti