## Reference

https://weekiat-lim.medium.com/hand-gesture-detection-sequence-recognition-7f3215f88dde

## Import Libraries

In [62]:
"""
    Standard Python Machine Learning Libraries, for array structure and dataset loading
"""
import pandas as pd
import numpy as np


"""
    MediaPipe - used for handlandmark detection
    OpenCv - Video Image Data Extraction
    JSON | Protobuf - Data Formating
"""
import mediapipe as mp
import cv2
import json
from google.protobuf.json_format import MessageToJson


"""
    Libraries for file name processing and preparation
"""
import random
import string
import shutil
import os


"""
    Libraries to keep sequence consistency
"""
from keras.preprocessing.sequence import pad_sequences
import math

"""
    Libraries for splitting training and testing data and also transform our label classes
"""

from keras.utils import to_categorical
from sklearn.model_selection import train_test_split


"""
    Neural Network Libraries
"""

from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, BatchNormalization, Activation
from keras.callbacks import LearningRateScheduler

## Extract Features

In [63]:
"""
    Given the hand landmark positions transform our features so that we get distances between each hand landmark for training
"""
def distance_between(p1_loc, p2_loc):
    jsonObj = MessageToJson(results.multi_hand_landmarks[0])
    lmk = json.loads(jsonObj)['landmark']
    p1 = pd.DataFrame(lmk).to_numpy()[p1_loc]
    p2 = pd.DataFrame(lmk).to_numpy()[p2_loc]
    squared_dist = np.sum((p1-p2)**2, axis=0)
    return np.sqrt(squared_dist)

"""
    Utility function to calculate the distance between each hand landmark
"""
def landmark_to_dist_emb(results):
    jsonObj = MessageToJson(results.multi_hand_landmarks[0])
    lmk = json.loads(jsonObj)['landmark']

    emb = np.array([
        # thumb to finger tip
        distance_between(4, 8),
        distance_between(4, 12),
        distance_between(4, 16),
        distance_between(4, 20),
        # wrist to finger tip
        distance_between(4, 0),
        distance_between(8, 0),
        distance_between(12, 0),
        distance_between(16, 0),
        distance_between(20, 0),
        # tip to tip (specific to this application)
        distance_between(8, 12),
        distance_between(12, 16),
        # within finger joint (detect bending)
        distance_between(1, 4),
        distance_between(8, 5),
        distance_between(12, 9),
        distance_between(16, 13),
        distance_between(20, 17),
        # distance from each tip to thumb joint
        distance_between(2, 8),
        distance_between(2, 12),
        distance_between(2, 16),
        distance_between(2, 20)
    ])

    emb_norm = emb / np.linalg.norm(emb)
    return emb_norm

####

## Visualize and Test Hand Landmark Traking for MediaPipe using OpenCV

In [54]:
"""
    MediaPipe has many utility features, this will help us utilize the hand landmark detection.
"""
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [55]:
"""
  For testing purposes, Utilize OpenCV to display Hand Landmark recognition done by MediaPipe
"""

with mp_hands.Hands(max_num_hands=2,
                    min_detection_confidence=0.5,
                    min_tracking_confidence=0.5) as hands:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")

    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = hands.process(image)

    # DETECT LANDMARKS
    if results.multi_hand_landmarks:

      #print(landmark_to_dist_emb(results).shape) process input
      jsonObj = MessageToJson(results.multi_hand_landmarks[0])
      lmk = json.loads(jsonObj)['landmark']
      
      #print(len(lmk))  # lmk = hand's landmark

# Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
    cv2.imshow('MediaPipe Hands', image)
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()


In [4]:
"""
    Utility Function for UID generation and renaming of files
"""

def generate_random_uid():
    """Generate a random UID."""
    return ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))



"""
    Remake files for class labeling
"""
def rename_videos(source_directory, destination_directory, idx):
    """Rename videos in the specified directory."""
    for root, dirs, files in os.walk(source_directory):
        for file in files:
            if file.endswith(".MOV"):
                old_path = os.path.join(root, file)
                new_uid = generate_random_uid()
                new_name = f"{new_uid}_{idx}.MOV"
                new_path = os.path.join(destination_directory, new_name)

                shutil.copy(old_path, new_path)
                print(f"Renamed and copied: {old_path} to {new_path}")

""" 
    Update Target Classes Filenames and move them to a single directory for dataset processing
"""
# Specify the source directory containing the videos
source_directory = './dataset/clips/'

# Specify the destination directory for the renamed videos
destination_directory = './dataset/processed/'

# Create the destination directory if it doesn't exist
os.makedirs(destination_directory, exist_ok=True)

# Call the function to rename and copy the videos
targetSets = [85, 86, 89, 94]

for idx, target in enumerate(targetSets):
    print("Renaming Target Set: ", target)
    rename_videos(source_directory + str(target), destination_directory, idx)
    print("\n\n")


Renaming Target Set:  85
Renamed and copied: ./dataset/clips/85\11taoceh_0.MOV to ./dataset/processed/kjkvopt0_0.MOV
Renamed and copied: ./dataset/clips/85\4asdlwz1_0.MOV to ./dataset/processed/qtww2hem_0.MOV
Renamed and copied: ./dataset/clips/85\4wjsa5wq_0.MOV to ./dataset/processed/8i5ntven_0.MOV
Renamed and copied: ./dataset/clips/85\940bqr4z_0.MOV to ./dataset/processed/qmfbonj9_0.MOV
Renamed and copied: ./dataset/clips/85\9dppbec3_0.MOV to ./dataset/processed/a7kgnkqf_0.MOV
Renamed and copied: ./dataset/clips/85\f9r9bsva_0.MOV to ./dataset/processed/nyp3snlr_0.MOV
Renamed and copied: ./dataset/clips/85\fdpnld0t_0.MOV to ./dataset/processed/bbd6kbwj_0.MOV
Renamed and copied: ./dataset/clips/85\fx5jgfby_0.MOV to ./dataset/processed/5hqc7gj1_0.MOV
Renamed and copied: ./dataset/clips/85\gwa534to_0.MOV to ./dataset/processed/v3vnkow7_0.MOV
Renamed and copied: ./dataset/clips/85\i6641z1q_0.MOV to ./dataset/processed/hkvo9l68_0.MOV
Renamed and copied: ./dataset/clips/85\inl5n07c_0.MOV t

## Feature Extraction & Processing

In [64]:
"""
    Given all video data, analyze it using OpenCV and detect hand landmarks on the video, afterwards extraft important features such as distances between each segment.
"""

arr = os.listdir('./dataset/processed')
video_class_all = []
landmark_npy_all = []
handnn = mp.solutions.hands.Hands(
    max_num_hands=2, min_detection_confidence=0.6, min_tracking_confidence=0.6)

for idx, eachVideo in enumerate(arr):
    landmark_npy_single = []  # Reset for each video
    video = './dataset/processed/' + eachVideo
    cap = cv2.VideoCapture(video)
    video_class_all.append(int(video.split('_')[1].split('.')[0]))
    print(video)
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            break

        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = handnn.process(image)
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                landmark_npy_single.append(landmark_to_dist_emb(results))

    # Append landmarks for each video only once
    landmark_npy_all.append(landmark_npy_single)

    cap.release()

    if ((idx + 1) % 10) == 0:
        print(f'Finished for {(idx + 1)} videos')

print(f'Finished for total {len(arr)} videos. Completed.')

./dataset/processed/065kdgwe_1.MOV
./dataset/processed/0pfsgw9z_1.MOV
./dataset/processed/1nlndxyg_2.MOV
./dataset/processed/1qkjrxmx_1.MOV
./dataset/processed/1v4bggyw_2.MOV
./dataset/processed/27npt95d_1.MOV
./dataset/processed/34rd17y8_1.MOV
./dataset/processed/35z9bqff_3.MOV
./dataset/processed/3bpyl4rn_1.MOV
./dataset/processed/3n4kpfnk_3.MOV
Finished for 10 videos
./dataset/processed/3sa851bk_2.MOV
./dataset/processed/3sxbzdo5_0.MOV
./dataset/processed/42bqlsuv_3.MOV
./dataset/processed/4vf5637m_2.MOV
./dataset/processed/4x9hsurv_1.MOV
./dataset/processed/51ktgnv4_2.MOV
./dataset/processed/52c8ry5q_0.MOV
./dataset/processed/5hqc7gj1_0.MOV
./dataset/processed/5oqq3sxx_2.MOV
./dataset/processed/61bfyw5j_2.MOV
Finished for 20 videos
./dataset/processed/6imffdjk_1.MOV
./dataset/processed/73ud2nmr_1.MOV
./dataset/processed/87ka6p0q_2.MOV
./dataset/processed/8i5ntven_0.MOV
./dataset/processed/a37t1dqq_2.MOV
./dataset/processed/a7kgnkqf_0.MOV
./dataset/processed/aabz9a7a_3.MOV
./dataset

## Make sure Data Sequence is consistent

In [65]:
def skip_frame(landmark_npy_all, frame=50):
    new_lmk_array = []


    for each in landmark_npy_all:
        if len(each) <= frame:
        # if its less than frame, dont need to skip
            new_lmk_array.append(each)
        else:
        # skip frame by ceiling
            to_round = math.ceil(len(each)/frame)
            new_lmk_array.append(each[::to_round])
    return new_lmk_array

new_lmk_array = skip_frame(landmark_npy_all)
train_x = pad_sequences(new_lmk_array, padding='post', maxlen=10, dtype='float32')


## Prepare Training and Testing Data

In [69]:
classes = len(set(video_class_all))
feature_len = 20
max_len = 10

# hot encode output
train_y = to_categorical([i-1 for i in video_class_all], num_classes=classes)

print('Training y with shape of: ', train_y.shape)
print('Training x with shape of: ', train_x.shape)
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, test_size=0.2)
print("----")
print("Shape of X_train: ", X_train.shape)
print("Shape of y_train: ", y_train.shape)
print("Shape of X_test: ", X_train.shape)
print("Shape of y_test: ", y_train.shape)

Training y with shape of:  (80, 4)
Training x with shape of:  (80, 10, 20)
----
Shape of X_train:  (64, 10, 20)
Shape of y_train:  (64, 4)
Shape of X_test:  (64, 10, 20)
Shape of y_test:  (64, 4)


## Build Model Architecture

In [70]:
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(max_len, feature_len)))
model.add(Dropout(0.25))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.25))
model.add(LSTM(128, return_sequences=False))
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


def lrSchedule(epoch):
    lr = 0.001
    if epoch > 200:
        lr *= 0.0005
    elif epoch > 120:
        lr *= 0.005
    elif epoch > 50:
        lr *= 0.01
    elif epoch > 30:
        lr *= 0.1

    print('Learning rate: ', lr)
    return lr


LRScheduler = LearningRateScheduler(lrSchedule)
callbacks_list = [LRScheduler]

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 10, 256)           283648    
                                                                 
 dropout_2 (Dropout)         (None, 10, 256)           0         
                                                                 
 lstm_4 (LSTM)               (None, 10, 256)           525312    
                                                                 
 dropout_3 (Dropout)         (None, 10, 256)           0         
                                                                 
 lstm_5 (LSTM)               (None, 128)               197120    
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 batch_normalization_1 (Bat  (None, 64)               

## Train Model

In [71]:
verbose, epochs, batch_size = 1, 300, 8
model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs=epochs, batch_size=batch_size, verbose=verbose, shuffle=True, callbacks=callbacks_list)

Learning rate:  0.001
Epoch 1/300












Learning rate:  0.001
Epoch 2/300
Learning rate:  0.001
Epoch 3/300
Learning rate:  0.001
Epoch 4/300
Learning rate:  0.001
Epoch 5/300
Learning rate:  0.001
Epoch 6/300
Learning rate:  0.001
Epoch 7/300
Learning rate:  0.001
Epoch 8/300
Learning rate:  0.001
Epoch 9/300
Learning rate:  0.001
Epoch 10/300
Learning rate:  0.001
Epoch 11/300
Learning rate:  0.001
Epoch 12/300
Learning rate:  0.001
Epoch 13/300
Learning rate:  0.001
Epoch 14/300
Learning rate:  0.001
Epoch 15/300
Learning rate:  0.001
Epoch 16/300
Learning rate:  0.001
Epoch 17/300
Learning rate:  0.001
Epoch 18/300
Learning rate:  0.001
Epoch 19/300
Learning rate:  0.001
Epoch 20/300
Learning rate:  0.001
Epoch 21/300
Learning rate:  0.001
Epoch 22/300
Learning rate:  0.001
Epoch 23/300
Learning rate:  0.001
Epoch 24/300
Learning rate:  0.001
Epoch 25/300
Learning rate:  0.001
Epoch 26/300
Learning rate:  0.001
Epoch 27/300
Learning rate:  0.001
Epoch 28/300
Learning rate:  0.001
Epoch 29/300
Learning rate:  0.001
Epoch 

<keras.src.callbacks.History at 0x201dd329450>

In [73]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
cap = cv2.VideoCapture(0)

# Use a list to store individual 1D arrays
sequential_list = []
with mp_hands.Hands(max_num_hands=2,
                    min_detection_confidence=0.5,
                    min_tracking_confidence=0.5) as hands:
    print(cap.isOpened())
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")

        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = hands.process(image)

        # DETECT LANDMARKS
        if results.multi_hand_landmarks:
            # Append the values to the list
            sequential_list.append(landmark_to_dist_emb(results))
            # print(np.array(sequential_list).shape)

            jsonObj = MessageToJson(results.multi_hand_landmarks[0])
            lmk = json.loads(jsonObj)['landmark']
        else:
            # print("Hand Gesture Recognition Interrupted.")
            sequential_list.clear()
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        cv2.imshow('MediaPipe Hands', image)

        # Check if the length of the list exceeds 10
        if len(sequential_list) >= 10:
            # Convert the list to a 3D NumPy array
            sequential = np.expand_dims(np.array(sequential_list), axis=0)
            threshold = 0.5  # You can adjust this threshold based on your needs

            # Classify based on the threshold
            prediction = np.argmax(model.predict(
                sequential)[0])
            if prediction == 0:
                print("Egg")
            elif prediction == 1:
                print("Chicken")
            elif prediction == 2:
                print("Crab")
            elif prediction == 3:
                print("Bread")
            # Clear the list
            sequential_list.clear()

        if cv2.waitKey(5) & 0xFF == 27:
            break

cap.release()
cv2.destroyAllWindows()


True
Egg
Egg
Bread
Egg
Egg
Egg
Egg
Egg
Egg
Egg
Egg
Bread
Bread
Egg
Bread
Chicken
Chicken
Chicken
Chicken
Chicken
Chicken
Chicken
Crab
Crab
Crab
Crab
Crab
Crab
Crab
Crab
Bread
Bread
Chicken
Egg
Egg
Egg


## Save Trained Model

In [74]:
model.save("fsl-pi.h5")

  saving_api.save_model(


## Load Trained Model [Run This For Testing and Deployment Purposes]

#### Libraries

In [75]:
from keras.models import load_model
import pandas as pd
import numpy as np
from google.protobuf.json_format import MessageToJson
import cv2
import mediapipe as mp
import json

#### Load Model

In [76]:
trained_model = load_model('./fsl-pi.h5')

#### Feature Extractor for when getting realtime input data

In [77]:
def distance_between(p1_loc, p2_loc):
    jsonObj = MessageToJson(results.multi_hand_landmarks[0])
    lmk = json.loads(jsonObj)['landmark']
    p1 = pd.DataFrame(lmk).to_numpy()[p1_loc]
    p2 = pd.DataFrame(lmk).to_numpy()[p2_loc]
    squared_dist = np.sum((p1-p2)**2, axis=0)
    return np.sqrt(squared_dist)


def landmark_to_dist_emb(results):
    jsonObj = MessageToJson(results.multi_hand_landmarks[0])
    lmk = json.loads(jsonObj)['landmark']

    emb = np.array([
        # thumb to finger tip
        distance_between(4, 8),
        distance_between(4, 12),
        distance_between(4, 16),
        distance_between(4, 20),
        # wrist to finger tip
        distance_between(4, 0),
        distance_between(8, 0),
        distance_between(12, 0),
        distance_between(16, 0),
        distance_between(20, 0),
        # tip to tip (specific to this application)
        distance_between(8, 12),
        distance_between(12, 16),
        # within finger joint (detect bending)
        distance_between(1, 4),
        distance_between(8, 5),
        distance_between(12, 9),
        distance_between(16, 13),
        distance_between(20, 17),
        # distance from each tip to thumb joint
        distance_between(2, 8),
        distance_between(2, 12),
        distance_between(2, 16),
        distance_between(2, 20)
    ])
    # use np normalize, as min_max may create confusion that the closest fingers has 0 distance
    emb_norm = emb / np.linalg.norm(emb)
    return emb_norm

## Deployment and Testing of Model

In [78]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
cap = cv2.VideoCapture(0)

# Use a list to store individual 1D arrays
sequential_list = []
with mp_hands.Hands(max_num_hands=2,
                    min_detection_confidence=0.5,
                    min_tracking_confidence=0.5) as hands:
    print(cap.isOpened())
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")

        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = hands.process(image)

        # DETECT LANDMARKS
        if results.multi_hand_landmarks:
            # Append the values to the list
            sequential_list.append(landmark_to_dist_emb(results))
            #print(np.array(sequential_list).shape)

            jsonObj = MessageToJson(results.multi_hand_landmarks[0])
            lmk = json.loads(jsonObj)['landmark']
        else:
            #print("Hand Gesture Recognition Interrupted.")
            sequential_list.clear()
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        cv2.imshow('MediaPipe Hands', image)

        # Check if the length of the list exceeds X Frames
        if len(sequential_list) >= 10:
            # Convert the list to a 3D NumPy array
            sequential = np.expand_dims(np.array(sequential_list), axis=0)
            # Assuming 'predictions' is the array [8.5479594e-05, 9.9991453e-01]
            threshold = 0.5  # You can adjust this threshold based on your needs

            # Classify based on the threshold
            prediction = np.argmax(trained_model.predict(
                sequential)[0])
            if prediction == 0:
                print("Egg")
            elif prediction == 1:
                print("Chicken")
            elif prediction == 2:
                print("Crab")
            elif prediction == 3:
                print("Bread")
            # Clear the list
            sequential_list.clear()

        if cv2.waitKey(5) & 0xFF == 27:
            break

cap.release()
cv2.destroyAllWindows()

True
Egg
Egg
Egg
Chicken
Egg
Bread
Bread
Bread
Bread
Bread
Chicken
Chicken
Chicken
Egg
Bread
Bread


## Model Metrics and Quantization

In [80]:
model = load_model('./fsl-pi.h5')
model_path = './fsl-pi.h5'

#### Model Size

In [81]:
import os

model_size_bytes = os.path.getsize(model_path)
model_size_kb = model_size_bytes / 1024
model_size_mb = model_size_kb / 1024

print(f"Model size: {model_size_mb:.2f} MB")

Model size: 11.68 MB


#### Convert Model into TFLite

In [82]:
import tensorflow as tf


model = tf.keras.models.load_model("fsl-pi.h5")
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False
tflite_model = converter.convert()
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\shuan\AppData\Local\Temp\tmpdk3vm9b9\assets


INFO:tensorflow:Assets written to: C:\Users\shuan\AppData\Local\Temp\tmpdk3vm9b9\assets


#### Check TFLite Model Size

In [83]:
import os

model_size_bytes = os.path.getsize('model.tflite')
model_size_kb = model_size_bytes / 1024
model_size_mb = model_size_kb / 1024

print(f"Model size: {model_size_mb:.2f} MB")

Model size: 3.90 MB


In [84]:
import os
import tensorflow as tf

original_model = tf.keras.models.load_model('fsl-pi.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(original_model)

converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model = converter.convert()
with open('quantized_model.tflite', 'wb') as f:
    f.write(quantized_tflite_model)
quantized_model_size_bytes = os.path.getsize('quantized_model.tflite')
quantized_model_size_kb = quantized_model_size_bytes / 1024
quantized_model_size_mb = quantized_model_size_kb / 1024
print(f"Quantized Model size: {quantized_model_size_mb:.2f} MB")

INFO:tensorflow:Assets written to: C:\Users\shuan\AppData\Local\Temp\tmpl3o9j1yo\assets


INFO:tensorflow:Assets written to: C:\Users\shuan\AppData\Local\Temp\tmpl3o9j1yo\assets


Quantized Model size: 1.01 MB


#### Test Quantized Model

In [85]:
import tensorflow as tf

interpreter = tf.lite.Interpreter(model_path='./quantized_model.tflite')
interpreter.allocate_tensors()

In [86]:

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
cap = cv2.VideoCapture(0)


sequential_list = []

with mp_hands.Hands(max_num_hands=2,
                    min_detection_confidence=0.5,
                    min_tracking_confidence=0.5) as hands:
    print(cap.isOpened())
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")

        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = hands.process(image)


        if results.multi_hand_landmarks:
            sequential_list.append(landmark_to_dist_emb(results))
            jsonObj = MessageToJson(results.multi_hand_landmarks[0])
            lmk = json.loads(jsonObj)['landmark']
        else:
            sequential_list.clear()

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        cv2.imshow('MediaPipe Hands', image)

        if len(sequential_list) >= 10:
            sequential = np.expand_dims(
                np.array(sequential_list), axis=0).astype(np.float32)

            interpreter.set_tensor(interpreter.get_input_details()[
                                0]['index'], sequential)
            interpreter.invoke()
            output = interpreter.get_tensor(
                interpreter.get_output_details()[0]['index'])
            prediction = np.argmax(output[0])
            if prediction == 0:
                print("Egg")
            elif prediction == 1:
                print("Chicken")
            elif prediction == 2:
                print("Crab")
            elif prediction == 3:
                print("Bread")

            sequential_list.clear()

        if cv2.waitKey(5) & 0xFF == 27:
            break

cap.release()
cv2.destroyAllWindows()

True
Egg
Egg
Bread
Egg
Egg
Egg
Egg
Egg
Bread
Bread
Bread
Bread
Bread
Chicken
Chicken
Chicken
Chicken
Chicken
Chicken
Chicken
Chicken
Egg
Chicken
Chicken
Bread
Bread
Bread
Bread
Bread
Bread
Egg
Chicken
Bread
Bread
Crab
Crab
Bread
