In [10]:
!pip install mediapipe opencv-python tensorflow numpy


Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.12 -m pip install --upgrade pip[0m


In [13]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

# Wczytanie modelu
model = load_model('../model/2025-07-14/cnn_1d_model.keras')

# Wczytanie etykiet z pliku tekstowego
with open('../model/2025-07-14//labels.txt', 'r') as f:
    label_names = [line.strip() for line in f.readlines()]

# MediaPipe inicjalizacja
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=False, model_complexity=1)
mp_drawing = mp.solutions.drawing_utils

# Bufor sekwencji
sequence = []
SEQ_LENGTH = 30
THRESHOLD = 0.8

# Normalizacja względem nadgarstka pierwszej klatki
def normalize_sequence(sequence_array):
    sequence_array = np.array(sequence_array)
    origin = sequence_array[0][0]  # Nadgarstek 1. klatki
    return sequence_array - origin

# Ekstrakcja 21 punktów prawej dłoni
def extract_hand_landmarks(results):
    if results.right_hand_landmarks:
        return np.array([[lm.x, lm.y, lm.z] for lm in results.right_hand_landmarks.landmark])
    return None

# Kamera
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(image_rgb)

    # Rysowanie dłoni
    mp_drawing.draw_landmarks(frame, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

    hand_landmarks = extract_hand_landmarks(results)

    if hand_landmarks is not None:
        sequence.append(hand_landmarks)

        if len(sequence) == SEQ_LENGTH:
            norm_seq = normalize_sequence(sequence)
            input_seq = norm_seq.reshape(1, SEQ_LENGTH, 63)

            pred = model.predict(input_seq)[0]
            max_prob = np.max(pred)
            label = label_names[np.argmax(pred)]

            if max_prob > THRESHOLD:
                cv2.putText(frame, f'{label} ({max_prob:.2f})', (10, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            sequence.pop(0)

    else:
        sequence = []

    cv2.imshow('Real-Time Sign Recognition (1DCNN)', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1752488075.874602   19333 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1752488075.876785   23141 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.7), renderer: Mesa Intel(R) UHD Graphics 620 (KBL GT2)
W0000 00:00:1752488075.955480   23137 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1752488076.003272   23138 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1752488076.007339   23135 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1752488076.007619   23132 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:0

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22

In [19]:
import cv2
import mediapipe as mp
import numpy as np
import time
from tensorflow.keras.models import load_model

# Wczytanie modelu
model = load_model('../model/2025-07-14/cnn_1d_model.keras')

# Etykiety
with open('../model/2025-07-14/labels.txt', 'r') as f:
    label_names = [line.strip() for line in f.readlines()]

# MediaPipe
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=False, model_complexity=1)
mp_drawing = mp.solutions.drawing_utils

# Parametry
sequence = []
recognized_text = ""
last_prediction_time = 0
cooldown_seconds = 2
SEQ_LENGTH = 30
THRESHOLD = 0.8

def normalize_sequence(sequence_array):
    sequence_array = np.array(sequence_array)
    origin = sequence_array[0][0]
    return sequence_array - origin

def extract_hand_landmarks(results):
    if results.right_hand_landmarks:
        return np.array([[lm.x, lm.y, lm.z] for lm in results.right_hand_landmarks.landmark])
    return None

# Kamera
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(image_rgb)
    mp_drawing.draw_landmarks(frame, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

    hand_landmarks = extract_hand_landmarks(results)

    current_time = time.time()
    time_since_last = current_time - last_prediction_time
    cooldown_remaining = max(0, cooldown_seconds - time_since_last)

    if hand_landmarks is not None:
        sequence.append(hand_landmarks)

        if len(sequence) == SEQ_LENGTH:
            norm_seq = normalize_sequence(sequence)
            input_seq = norm_seq.reshape(1, SEQ_LENGTH, 63)

            pred = model.predict(input_seq, verbose=0)[0]
            max_prob = np.max(pred)
            label = label_names[np.argmax(pred)]

            if max_prob > THRESHOLD and cooldown_remaining == 0:
                recognized_text += label
                last_prediction_time = current_time

            sequence.pop(0)
    else:
        sequence = []

    # Pasek cooldownu
    bar_x, bar_y = 10, 80
    bar_width, bar_height = 300, 20
    progress = int((1 - cooldown_remaining / cooldown_seconds) * bar_width)

    bar_color = (0, 255, 0) if cooldown_remaining == 0 else (0, 0, 255)
    cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_width, bar_y + bar_height), (255, 255, 255), 2)
    cv2.rectangle(frame, (bar_x, bar_y), (bar_x + progress, bar_y + bar_height), bar_color, -1)
    cv2.putText(frame, f'Cooldown: {cooldown_remaining:.1f}s', (bar_x, bar_y - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, bar_color, 2)

    # Napis
    cv2.putText(frame, f'Text: {recognized_text}', (10, 130),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)

    cv2.imshow('Real-Time Sign Recognition (Cooldown)', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


ValueError: The `function` of this `Lambda` layer is a Python lambda. Deserializing it is unsafe. If you trust the source of the config artifact, you can override this error by passing `safe_mode=False` to `from_config()`, or calling `keras.config.enable_unsafe_deserialization().