In [None]:
from tensorflow.keras.models import load_model
model_load_path = "C:/projects/SLT_project_MP_images/model.keras"

# Load the model
lstm_model = load_model(model_load_path)

In [None]:
def process_multistream_features(frame_sequence):
    pose = frame_sequence[:, 21:54, :]       # Pose (33 landmarks)
    left_hand = frame_sequence[:, :21, :]    # Left hand (21 landmarks)
    right_hand = frame_sequence[:, 54:, :]   # Right hand (21 landmarks)
    
    return pose, left_hand, right_hand

In [None]:
# Target glosses (words)
target_glosses = ['any', 'thank you', 'bye', 'question']

# Create bidirectional mappings (label to gloss & gloss to label)
folder_to_label = {folder: idx for idx, folder in enumerate(target_glosses)}
label_to_folder = {idx: folder for folder, idx in folder_to_label.items()}

In [None]:
import mediapipe as mp
import cv2
import numpy as np
import os

mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose

def extract_landmarks(image, hands, pose):
    # Initialize landmarks with zeros
    left_hand_landmarks = np.zeros((21, 3))
    right_hand_landmarks = np.zeros((21, 3))
    pose_landmarks = np.zeros((33, 3))
    
    with mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5) as hands, \
         mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5) as pose:
        
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Process hands and pose
        hand_results = hands.process(image_rgb)
        pose_results = pose.process(image_rgb)

        # Extract hand landmarks
        if hand_results.multi_hand_landmarks:
            for idx, hand_landmarks_set in enumerate(hand_results.multi_hand_landmarks):
                extracted = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks_set.landmark])
                if hand_results.multi_handedness[idx].classification[0].label == 'Left':
                    left_hand_landmarks = extracted
                else:
                    right_hand_landmarks = extracted

        # Extract pose landmarks
        if pose_results.pose_landmarks:
            pose_landmarks = np.array([[lm.x, lm.y, lm.z] for lm in pose_results.pose_landmarks.landmark])
        
        # Combine all landmarks
        all_landmarks = np.vstack([left_hand_landmarks, pose_landmarks, right_hand_landmarks])
        print(all_landmarks)
        hand_count = len(hand_results.multi_hand_landmarks) if hand_results.multi_hand_landmarks else 0
        pose_detected = 1 if pose_results.pose_landmarks else 0
        
        print(all_landmarks.shape)
        return all_landmarks, hand_count, pose_detected

In [None]:
window_size = 30
confidence_threshold = 0.90
sliding_window = []

def predict_real_time(frame):
    global sliding_window
    
    # Extract landmarks
    landmarks = extract_landmarks(frame)
    sliding_window.append(landmarks)
    
    # Maintain window size
    if len(sliding_window) > window_size:
        sliding_window.pop(0)

    # Only predict when enough frames are collected
    if len(sliding_window) == window_size:
        pose, left, right = process_multistream_features(np.array(sliding_window))
        predictions = lstm_model.predict([pose[np.newaxis, :], left[np.newaxis, :], right[np.newaxis, :]])
        
        # Get the predicted class and confidence
        predicted_class = np.argmax(predictions, axis=1)[0]
        confidence = np.max(predictions)

        if confidence > confidence_threshold:
            print(f"Prediction: {label_to_folder[predicted_class]}, Confidence: {confidence:.2f}")
            return label_to_folder[predicted_class]

    return "No Prediction"

In [None]:
cap = cv2.VideoCapture(0)  # Webcam feed

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Predict in real-time
    prediction = predict_real_time(frame)

    # Display predictions on the frame
    cv2.putText(frame, f"Prediction: {prediction}", (50, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
    cv2.imshow("Sign Language Detection", frame)

    # Break loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()