In [16]:
import cv2
import os
import numpy as np
import mediapipe as mp

In [17]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [18]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = model.process(image)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [19]:
def extract_keypoints_normalized(results):
    if results.left_hand_landmarks:
        left_wrist = np.array([results.left_hand_landmarks.landmark[0].x,
                               results.left_hand_landmarks.landmark[0].y,
                               results.left_hand_landmarks.landmark[0].z])
        left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]) - left_wrist
    else:
        left_hand = np.zeros((21, 3))

    if results.right_hand_landmarks:
        right_wrist = np.array([results.right_hand_landmarks.landmark[0].x,
                                results.right_hand_landmarks.landmark[0].y,
                                results.right_hand_landmarks.landmark[0].z])
        right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]) - right_wrist
    else:
        right_hand = np.zeros((21, 3))

    return np.concatenate([left_hand.flatten(), right_hand.flatten()])

In [20]:
def calculate_shape_similarity(user_keypoints, reference_keypoints, threshold=0.05):
    errors = []
    shape_differences = []
    fingers_indices = {
        'Thumb': range(1, 5),
        'Index': range(5, 9),
        'Middle': range(9, 13),
        'Ring': range(13, 17),
        'Pinky': range(17, 21)
    }

    left_hand_user = user_keypoints[:63].reshape(21, 3)
    left_hand_ref = reference_keypoints[:63].reshape(21, 3)
    right_hand_user = user_keypoints[63:].reshape(21, 3)
    right_hand_ref = reference_keypoints[63:].reshape(21, 3)

    for hand, (user_hand, ref_hand) in zip(['Left', 'Right'], [(left_hand_user, left_hand_ref), (right_hand_user, right_hand_ref)]):
        for finger, indices in fingers_indices.items():
            user_finger_shape = np.linalg.norm(user_hand[indices] - user_hand[0], axis=1)
            ref_finger_shape = np.linalg.norm(ref_hand[indices] - ref_hand[0], axis=1)
            shape_difference = np.linalg.norm(user_finger_shape - ref_finger_shape)
            if shape_difference > threshold:
                errors.append(f"{hand} {finger} Shape Error: {shape_difference:.2f}")
                shape_differences.append(shape_difference)

    score = 1 - np.mean(shape_differences) if shape_differences else 1
    return score, errors

In [None]:
cap = cv2.VideoCapture(1)

KEYPOINTS_PATH = 'Data/colectedkeypoints'

#Nhập vào chữ muốn kiểm tra
label = input("Enter keypoints folder: ")

label_path = os.path.join(KEYPOINTS_PATH, label)

if not os.path.exists(label_path):
    print(f"Path {label_path} does not exist.")
else:
    for file in os.listdir(label_path):
        if file.endswith('.npy'):
            reference_keypoints = np.load(os.path.join(label_path, file))

In [33]:
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results)
        
        #Dữ liệu của người dùng đưa vào 
        user_keypoints = extract_keypoints_normalized(results)
        
        score, errors = calculate_shape_similarity(user_keypoints, reference_keypoints)

        # cv2.putText(image, f"Score: {score:.2f}", (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        y_position = 80
        for error in errors:
            cv2.putText(image, error, (10, y_position), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
            y_position += 30

        cv2.imshow("Sign Language Practice", image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1731068226.290906   11452 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1731068226.292052   18200 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) UHD Graphics (CML GT2)
W0000 00:00:1731068226.368478   18191 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731068226.393464   18191 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731068226.394993   18190 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731068226.395445   18195 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tens