**IMPORT DEPENDENCIES**

In [None]:
import os
import cv2
import numpy as np
import mediapipe as mp

**DEFINE KEY FUNCTIONS**

In [None]:
# # Mediapipe hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results):
    for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())

def extract_keypoints(image, results):
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        return np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]) if hand_landmarks.landmark else np.zeros((21,3))

In [None]:
# Mediapipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             )
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

**TEST STATIC MODEL**

In [None]:
from keras.models import load_model
DATA_PATH = "./data/alphabets"

# Load trained model
model = load_model("models/alphabets.keras")
threshold = 0.8

# Class labels
all_folders = os.listdir(DATA_PATH)
alphabets = sorted(set([f.split("_")[0] for f in all_folders]))

cap = cv2.VideoCapture(0)
with mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, hands)
        
        if results.multi_hand_landmarks:
            draw_landmarks(image, results)
            keypoints = extract_keypoints(image, results)
            if keypoints is not None and keypoints.size > 0:
                prediction = model.predict(np.expand_dims(keypoints, axis=0), verbose=0)
                class_id = np.argmax(prediction)
                confidence = np.max(prediction)

                if confidence > threshold:
                    label = alphabets[class_id]
                    cv2.putText(image, f'{label} {int(confidence * 100)}%', (10, 70),
                                cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3)

        cv2.imshow('VSL Alphabets Recognition', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

**TEST DYNAMIC MODEL**

In [None]:
from keras.models import load_model
DATA_PATH = "./data/alphabets"

# Load trained model
model = load_model("models/alphabets.keras")
threshold = 0.8

# Class labels
all_folders = os.listdir(DATA_PATH)
actions = sorted(set([f.split("_")[0] for f in all_folders]))
# actions = np.array(os.listdir(DATA_PATH))

# Frame buffer
sequence = []
SEQ_LENGTH = 30

cap = cv2.VideoCapture(0)


with mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    waiting = True
    start_recording = False
    countdown = 3
    countdown_start_time = None
    show_waiting_status = True

    last_label = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image, results = mediapipe_detection(frame, hands)
        key = cv2.waitKey(10) & 0xFF

        # -------- WAITING STATE --------
        if waiting:
            if show_waiting_status:
                cv2.putText(image, "Waiting... (press 's')", (20, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
                
            if last_label is not None:
                cv2.putText(image, last_label, (20, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3)
                
            if key == ord("s"):
                waiting = False
                show_waiting_status = False
                sequence = []
                start_recording = True
                countdown_start_time = cv2.getTickCount()
                last_label = None

        # -------- COUNTDOWN + CAPTURE STATE --------
        elif start_recording:
            # ensure countdown timer exists
            if countdown_start_time is None:
                countdown_start_time = cv2.getTickCount()

            elapsed_time = (cv2.getTickCount() - countdown_start_time) / cv2.getTickFrequency()

            if elapsed_time < countdown:
                number = countdown - int(elapsed_time)
                cv2.putText(image, f"{number}", (20, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
            else:
                if results.multi_hand_landmarks:
                    draw_landmarks(image, results)
                    keypoints = extract_keypoints(image, results)

                    if keypoints is not None and keypoints.size > 0:
                        keypoints = keypoints.flatten()
                        sequence.append(keypoints)

                        cv2.putText(image, f"Capturing... {len(sequence)}/{SEQ_LENGTH}", (20, 40),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                        if len(sequence) == SEQ_LENGTH:
                            # Predict
                            res = model.predict(np.expand_dims(sequence, axis=0), verbose=0)
                            class_id = int(np.argmax(res))
                            confidence = float(np.max(res))

                            if confidence > threshold:
                                label = actions[class_id]
                                cv2.putText(image, f'{label} {int(confidence * 100)}%',
                                            (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3)
                            else:
                                cv2.putText(image, "No confident enough to guess",
                                            (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)

                            # Reset to waiting
                            waiting = True
                            start_recording = False
                            countdown_start_time = None
                            sequence = []

        cv2.imshow('VSL Recognition', image)
        if key == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
