In [None]:
import cv2
import mediapipe as mp
import joblib
import numpy as np
from tensorflow.keras.models import load_model

model = load_model('../model/BiLSTM/model_bilstm.keras')
label_encoder = joblib.load('../model/BiLSTM/label_encoder.pkl')

frames_buffer = []

recognized_sign = ''

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")
    exit()

with mp.solutions.holistic.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.7) as holistic:
    hand_present = False

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        hand_detected = holistic.process(frame)

        if hand_detected:
            if not hand_present:
                hand_present = True
                skip_counter = skip_frames
            elif skip_counter > 0:
                skip_counter -=1
                continue

            frames_buffer.append(frame)



            cv2.imshow('Real-time Sign Prediction', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

cap.release()
cv2.destroyAllWindows()

In [None]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from collections import deque

# Załaduj model
model = tf.keras.models.load_model("../model/BiLSTM/model_bilstm.keras")

# Załaduj LabelEncoder (zakładam pickle)
import pickle
with open("../model/BiLSTM/label_encoder.pkl", "rb") as f:
    le = pickle.load(f)

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=1,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# Bufor na 30 klatek (kolekcja punktów)
sequence_length = 30
frame_buffer = deque(maxlen=sequence_length)

cap = cv2.VideoCapture(0)

def extract_landmarks(results):
    if results.multi_hand_landmarks:
        landmarks = results.multi_hand_landmarks[0].landmark
        return np.array([[lm.x, lm.y, lm.z] for lm in landmarks]).flatten()
    else:
        return None

while True:
    ret, frame = cap.read()
    if not ret:
        break

    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    landmarks = extract_landmarks(results)
    if landmarks is not None:
        frame_buffer.append(landmarks)
        mp_drawing.draw_landmarks(image, results.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS)

        # Kiedy jest 30 klatek, robimy predykcję
        if len(frame_buffer) == sequence_length:
            # Normalizacja względem pierwszej klatki w buforze
            first_frame = frame_buffer[0]
            normalized_sequence = np.array(frame_buffer) - first_frame

            normalized_sequence = normalized_sequence[np.newaxis, ...]  # shape (1,30,63)
            pred_probs = model.predict(normalized_sequence)
            pred_label = le.inverse_transform([np.argmax(pred_probs)])[0]
            confidence = np.max(pred_probs)

            text = f"{pred_label} ({confidence:.2f})"
            cv2.putText(image, text, (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

            cv2.imshow('Hand Gesture Recognition', image)

    if cv2.waitKey(1) & 0xFF == ord('x'):
        break

cap.release()
cv2.destroyAllWindows()
