1: Import thư viện

In [None]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.resnet50 import preprocess_input
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

2: Khởi tạo MediaPipe và các thông số

In [None]:
mp_hands = mp.solutions.hands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mp_face_mesh = mp.solutions.face_mesh.FaceMesh(max_num_faces=1, min_detection_confidence=0.7)

MODEL_PATH = r"D:\file\project\Sign-Language-Classification-v1.0\train\vsl_cnn_pose_keras.h5"
VIETNAMESE_ALPHABET = ['a', 'b', 'c', 'd', 'đ', 'e', 'g', 'h', 'i', 'k', 'l', 'm', 'n',
                       'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y']
model = load_model(MODEL_PATH)

3: Hàm hỗ trợ (heatmap + hiển thị tiếng Việt)

In [None]:
def create_heatmap(landmarks, img_size=(224, 224), sigma=5):
    heatmap = np.zeros(img_size, dtype=np.float32)
    for i in range(0, len(landmarks), 3):
        x, y = landmarks[i], landmarks[i+1]
        if x == 0 and y == 0:
            continue
        x_px = int(x * img_size[1])
        y_px = int(y * img_size[0])
        if 0 <= x_px < img_size[1] and 0 <= y_px < img_size[0]:
            for j in range(-sigma * 3, sigma * 3 + 1):
                for k in range(-sigma * 3, sigma * 3 + 1):
                    if 0 <= y_px + k < img_size[0] and 0 <= x_px + j < img_size[1]:
                        heatmap[y_px + k, x_px + j] += np.exp(-(j**2 + k**2) / (2 * sigma**2))
    if np.max(heatmap) > 0:
        heatmap = heatmap / np.max(heatmap)
    heatmap = (heatmap * 255).astype(np.uint8)
    return np.stack([heatmap] * 3, axis=2)

def draw_vietnamese_text(image, text, position, font_size=30, color=(0, 255, 0)):
    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(image_pil)
    try:
        font = ImageFont.truetype("arial.ttf", font_size)
    except IOError:
        print("Không tìm thấy font Arial, dùng mặc định.")
        font = ImageFont.load_default()
    draw.text(position, text, font=font, fill=color)
    return cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)

4: Chạy webcam và hiển thị ảnh trong notebook

In [None]:
cap = cv2.VideoCapture(0)

print("Nhấn Ctrl+C để dừng.")
try:
    while True:
        success, image = cap.read()
        if not success:
            print("Không thể đọc frame.")
            break

        image = cv2.flip(image, 1)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Nhận diện tay
        hands_results = mp_hands.process(image_rgb)
        hand_landmarks = []
        if hands_results.multi_hand_landmarks:
            for hand_landmark in hands_results.multi_hand_landmarks:
                for landmark in hand_landmark.landmark:
                    hand_landmarks.extend([landmark.x, landmark.y, landmark.z])

        # Nhận diện mặt
        face_results = mp_face_mesh.process(image_rgb)
        face_landmarks = []
        if face_results.multi_face_landmarks:
            for face_landmark in face_results.multi_face_landmarks:
                key_points = [10, 152, 234, 454, 33, 263, 1, 61, 291, 199]
                for idx in key_points:
                    landmark = face_landmark.landmark[idx]
                    face_landmarks.extend([landmark.x, landmark.y, landmark.z])

        all_landmarks = hand_landmarks + face_landmarks

        if all_landmarks and len(all_landmarks) == 93:
            heatmap = create_heatmap(all_landmarks)
            heatmap_input = np.expand_dims(heatmap, axis=0)
            heatmap_input = preprocess_input(heatmap_input)

            predictions = model.predict(heatmap_input)
            predicted_class = np.argmax(predictions, axis=1)[0]
            predicted_letter = VIETNAMESE_ALPHABET[predicted_class]
            confidence = np.max(predictions) * 100

            text = f'Letter: {predicted_letter} ({confidence:.2f}%)'
            image = draw_vietnamese_text(image, text, position=(10, 30), font_size=30)

        # Hiển thị trong notebook
        clear_output(wait=True)
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()

except KeyboardInterrupt:
    print("Dừng camera.")

cap.release()
cv2.destroyAllWindows()
