In [1]:

import cv2
import numpy as np
import os

from matplotlib import pyplot as plt
import time
import mediapipe as mp
from IPython.display import display, Javascript, Image
from base64 import b64decode, b64encode
import PIL
import io
import html
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.models import load_model
from mediapipe.framework.formats import landmark_pb2
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [4]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [5]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             )
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

In [6]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [7]:
folder_path = './model-top-15'

h5_file = None
npy_file = None

for file in os.listdir(folder_path):
    if file.endswith('.h5') and not h5_file:
        h5_file = file
    elif file.endswith('.npy') and not npy_file:
        npy_file = file
    if h5_file and npy_file:
        break

if h5_file and npy_file:
    actions = np.load(os.path.join(folder_path, npy_file), allow_pickle=True)

    # model = load_model(os.path.join(folder_path, h5_file))
    num_classes = actions.shape[0]
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(10, 1662)))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())
    model.add(LSTM(256, return_sequences=False, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    model.load_weights(os.path.join(folder_path, h5_file))
    print("Model and actions loaded successfully.")
else:
    print("Required files not found in the folder.")

Model and actions loaded successfully.


In [9]:
from IPython.display import display, clear_output
cap = cv2.VideoCapture(0)

holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=0)

bbox = ''
count = 0

sequence = []
left_hand_located = []
right_hand_located = []
printed = False

count = 0
message = 'Loading...'

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # ... [other code]
    image, results = mediapipe_detection(frame, holistic)
    left_hand_located.append(bool(results.left_hand_landmarks))
    left_hand_located = left_hand_located[-10:]
    right_hand_located.append(bool(results.right_hand_landmarks))
    right_hand_located = right_hand_located[-10:]
    all_left_hands_detected = all(left_hand_located)
    all_right_hands_detected = all(right_hand_located)
    keypoints = extract_keypoints(results)
    # keypoints = np.nan_to_num(keypoints)
    sequence.append(keypoints)
    sequence = sequence[-10:]

    if len(sequence) == 10:
        if (not (all_left_hands_detected or all_right_hands_detected)):
          message = "Not enough hand data"
        else:
            infer = np.nan_to_num(sequence)
            count += 1
            if (count > 5):
                predictions  = model.predict(np.expand_dims(infer, axis=0), verbose=0)[0]
                top_indices = predictions.argsort()[-3:][::-1]  # Indices of top 3 predictions
                top_predictions = [(actions[i], predictions[i]) for i in top_indices]  # (action, confidence)

                message = ''
                for action, confidence in top_predictions:
                    message += f'{action}: {confidence:.2f} | '  # Format the message
                count = 0

    draw_styled_landmarks(image, results)
    
    # Add text to the image
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    font_color = (255, 255, 255)
    line_type = 2
    position = (50, 50) 
    cv2.putText(image, message, position, font, font_scale, font_color, line_type)

    _, jpeg_image = cv2.imencode('.jpg', image)
    i = Image(data=jpeg_image.tobytes())
    display(i)
    clear_output(wait=True)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

KeyboardInterrupt: 