### Dự đoán trạng thái, lỗi sai của động tác Lunge

In [1]:
# Thêm autoreload vào để tự động reload lại module nếu có thay đổi code trong module
%load_ext autoreload
%autoreload 2

import mediapipe as mp
import numpy as np
import pandas as pd
import cv2
import warnings
warnings.filterwarnings('ignore')

import os, sys
sys.path.append(os.path.abspath(".."))
from utils.common import load_model

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

### Thực hiện việc dự đoán với các model Scikit learn có độ chính xác cao nhất

In [2]:
# Load model
RF_model = load_model('./best_models/RF_model.pkl')
XGB_model = load_model('./best_models/XGB.pkl')

RF_model.set_params(verbose=0)
XGB_model.set_params(verbose=0)

# Load input scaler
input_scaler = load_model("./best_models/input_scaler.pkl")

### Các landmarks quan trọng
![image.png](attachment:image.png)

In [3]:
IMPORTANT_LMS = [
    "NOSE",
    "LEFT_SHOULDER",
    "RIGHT_SHOULDER",
    "LEFT_ELBOW",
    "RIGHT_ELBOW",
    "LEFT_WRIST",
    "RIGHT_WRIST",
    "LEFT_HIP",
    "RIGHT_HIP",
]

# Tạo các cột cho dữ liệu đầu vào
HEADERS = ["label"]
for landmark in IMPORTANT_LMS:
    for dim in ['x', 'y', 'z']:
        HEADERS.append(f"{landmark.lower()}_{dim}")

In [4]:
import os
import soundfile as sf

# Khởi tạo dictionary để lưu các đối tượng audio
error_types_audio = {}

# Thư mục chứa các file âm thanh
folder_path = "audios"
current_path = os.getcwd()

# Duyệt qua các file trong thư mục
for filename in os.listdir(folder_path):
    file_path = os.path.join(current_path, folder_path, filename)
    if os.path.isfile(file_path):
        data, samplerate = sf.read(file_path)
        filename = filename.replace(".wav", "")
        error_types_audio[filename] = (data, samplerate)

In [5]:
def extract_and_recalculate_landmarks(pose_landmarks):
    """
    Tịnh tiến thân người vào giữa bức hình, đồng thời dời lại trục toạ độ
    """
    hip_center_x = float((pose_landmarks[23].x + pose_landmarks[24].x) / 2)
    hip_center_y = float((pose_landmarks[23].y + pose_landmarks[24].y) / 2)

    new_center = (0.5, 0.5)
    delta_x = new_center[0] - hip_center_x
    delta_y = new_center[1] - hip_center_y

    # Khởi tạo mảng NumPy với kích thước đã biết trước
    data = np.zeros((len(IMPORTANT_LMS), 3))

    for idx, landmark in enumerate(IMPORTANT_LMS):
        key_point_id = mp_pose.PoseLandmark[landmark].value
        key_point = pose_landmarks[key_point_id]
        data[idx] = [key_point.x + delta_x - 0.5, key_point.y + delta_y - 0.5, key_point.z]

    return data.flatten().tolist()

def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation =cv2.INTER_AREA)

In [6]:
def get_class(encode_label: float):
    """
    Chuyển một label được encode thành class tương ứng
    """
    return {
        0: "Down",
        1: "Middle",
        2: "Up"
    }.get(encode_label)

### Dùng phương pháp hình học để xác định lỗi sai

In [14]:
import math
def calculate_angle(a, b, c, size_of_image):
    # Lấy tọa độ của 3 điểm
    a = (a[0] * size_of_image[0], a[1] * size_of_image[1])
    b = (b[0] * size_of_image[0], b[1] * size_of_image[1])
    c = (c[0] * size_of_image[0], c[1] * size_of_image[1])

    # Tính góc giữa 3 điểm
    ba_vector = [a[0] - b[0], a[1] - b[1]]
    bc_vector = [c[0] - b[0], c[1] - b[1]]
    ba_length = math.sqrt(ba_vector[0] ** 2 + ba_vector[1] ** 2)
    bc_length = math.sqrt(bc_vector[0] ** 2 + bc_vector[1] ** 2)

    return math.degrees(math.acos((ba_vector[0] * bc_vector[0] + ba_vector[1] * bc_vector[1]) / (ba_length * bc_length)))    


In [44]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation =cv2.INTER_AREA)

def get_image_size(image):
    return image.shape[1], image.shape[0]

def define_errors(key_points, image_size):
    errors = []
    left_shoulder = [key_points[11].x, key_points[11].y]
    right_shoulder = [key_points[12].x, key_points[12].y]
    left_hip = [key_points[23].x, key_points[23].y]
    right_hip = [key_points[24].x, key_points[24].y]
    left_knee = [key_points[25].x, key_points[25].y]
    right_knee = [key_points[26].x, key_points[26].y]
    left_elbow = [key_points[13].x, key_points[13].y]
    right_elbow = [key_points[14].x, key_points[14].y]
    left_wrist = [key_points[15].x, key_points[15].y]
    right_wrist = [key_points[16].x, key_points[16].y]
    left_index = [key_points[19].x, key_points[19].y]
    right_index = [key_points[20].x, key_points[20].y]

    # Góc giữa vector vai, hông và đầu gối
    angle = calculate_angle(left_shoulder, left_hip, left_knee, image_size)
    angle = max(angle, calculate_angle(right_shoulder, right_hip, right_knee, image_size))
    if angle < 170:
        errors.append("body not straight")

    # Góc giữa vai, khuỷu tay và hông
    angle = calculate_angle(left_shoulder, left_hip, left_elbow, image_size)
    angle = max(angle, calculate_angle(right_shoulder, right_hip, right_elbow, image_size))
    if angle > 25:
        errors.append("arm not straight")

    # Góc giữa khuỷu tay, cổ tay và ngón tay trỏ
    angle = calculate_angle(left_elbow, left_wrist, left_index, image_size)
    angle = 180 - max(angle, calculate_angle(right_elbow, right_wrist, right_index, image_size))
    if angle >= 50:
        errors.append("wrist not straight")

    if errors == []:
        return "None"
    else:
        return ", ".join(errors)

In [42]:
def determine_stage(key_points, image_size):
    # Dựa vào góc giữa khuỷu tay, cổ tay và vai để xác định giai đoạn của động tác Bicep Curl
    left_shoulder = [key_points[11].x, key_points[11].y]
    right_shoulder = [key_points[12].x, key_points[12].y]
    left_elbow = [key_points[13].x, key_points[13].y]
    right_elbow = [key_points[14].x, key_points[14].y]
    left_wrist = [key_points[15].x, key_points[15].y]
    right_wrist = [key_points[16].x, key_points[16].y]

    # Tính góc giữa 2 vector
    left_angle = calculate_angle(left_shoulder, left_elbow, left_wrist, image_size)
    right_angle = calculate_angle(right_shoulder, right_elbow, right_wrist, image_size)
    
    if abs(left_angle - right_angle) < 10:
        angle = max(left_angle, right_angle)
    else:
        angle = min(left_angle, right_angle)
    print(left_angle, right_angle)

    if angle > 160:
        return "Down"
    elif angle > 90:
        return "Middle"
    else:
        return "Up"

### Detection

In [10]:
VIDEO_TEST = "./manh.mp4"

In [11]:
import threading
import sounddevice as sd

is_playing = False

def play_audio(data, samplerate):
    global is_playing
    is_playing = True
    sd.play(data, samplerate)
    sd.wait()
    is_playing = False

# Hàm bắt đầu một luồng để phát âm thanh
def start_audio_thread(data, samplerate):
    threading.Thread(target=play_audio, args=(data, samplerate,), daemon=True).start()

In [None]:
import copy

cap = cv2.VideoCapture(VIDEO_TEST)
current_stage = "Unknown"
prediction_probability_threshold = 0.55

# Số frame được bỏ qua
frame_skip = 1
frame_count = 0

# Số rep tập được
counter = 0
direction = "Up"

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, image = cap.read()

        if not ret:
            print("Ignoring empty camera frame.")
            break
        
        frame_count += 1
        
        # Bỏ qua frame nếu không phải frame được xử lý
        if frame_count % frame_skip != 0:
            continue

        image = rescale_frame(image, percent=60)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = pose.process(image)

        if not results.pose_landmarks:
            print("No human found")
            continue

        initial_pose_landmarks = copy.deepcopy(results.pose_landmarks)
        image.flags.writeable = True

        # Cần khôi phục lại màu gốc của ảnh
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw landmarks and connections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, 
                                  mp_pose.POSE_CONNECTIONS, 
                                  mp_drawing.DrawingSpec(color=(244, 117, 66), thickness=2, circle_radius=2), 
                                  mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=1))
        
        # Get landmarks
        try:
            errors = define_errors(results.pose_landmarks.landmark)
            key_points = extract_and_recalculate_landmarks(results.pose_landmarks.landmark)
            X = pd.DataFrame([key_points], columns=HEADERS[1:])
            X = input_scaler.transform(X)

            predicted_stage = RF_model.predict(X)[0]
            predicted_stage = get_class(predicted_stage)
            prediction_probability_max = RF_model.predict_proba(X)[0].max()

            errors_list = errors.split(", ")
            if len(errors_list) == 1:
                error_types = errors_list[0].replace(" ", "_")
            elif len(errors_list) == 2:
                error_types = "_".join(errors_list).replace(" ", "_").replace("_not_straight", "", 1)
            else:
                error_types = "_".join([errors_list[0], errors_list[1]]).replace(" ", "_").replace("_not_straight", "", 1)

            if errors != "None" and error_types in error_types_audio and not is_playing:
                start_audio_thread(*error_types_audio[error_types])

            if current_stage == "Up" and predicted_stage == "Middle":
                direction = "Down"
            elif current_stage == "Middle" and predicted_stage == "Down" and direction == "Down":
                counter += 1
                direction = "Up"
            current_stage = predicted_stage

            cv2.rectangle(image, (0, 0), (image.shape[1], 60), (245, 117, 16), -1)            
            cv2.putText(image, "STAGE, REP", (30, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            if predicted_stage == "Up":
                cv2.putText(image, f"{predicted_stage}, {counter}", (40, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 
                        (255, 255, 255), 2, cv2.LINE_AA)
            else:
                cv2.putText(image, f"{predicted_stage}, {counter}", (20, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 
                        (255, 255, 255), 2, cv2.LINE_AA)

            cv2.putText(image, "ERRORS", (220, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

            # Hiển thị các lỗi theo hàng dọc
            y_position = 45
            for error in errors_list:
                cv2.putText(image, error, (220, y_position), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 150, 255), 2, cv2.LINE_AA)
                y_position += 30

        except Exception as e:
            print(f"Error: {e}")

        cv2.imshow("CV2", image)
        
        # Nhấn q để thoát
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [12]:
def detection_without_model():
    cap = cv2.VideoCapture(VIDEO_TEST)
    current_stage = "Unknown"

    # Số frame được bỏ qua
    frame_skip = 1
    frame_count = 0

    # Số rep tập được
    counter = 0
    direction = "Up"

    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        while cap.isOpened():
            ret, image = cap.read()

            if not ret:
                print("Ignoring empty camera frame.")
                break
            
            frame_count += 1
            
            # Bỏ qua frame nếu không phải frame được xử lý
            if frame_count % frame_skip != 0:
                continue

            image = rescale_frame(image, percent=60)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = pose.process(image)

            if not results.pose_landmarks:
                print("No human found")
                continue

            image.flags.writeable = True

            # Cần khôi phục lại màu gốc của ảnh
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # Draw landmarks and connections
            mp_drawing.draw_landmarks(image, results.pose_landmarks, 
                                    mp_pose.POSE_CONNECTIONS, 
                                    mp_drawing.DrawingSpec(color=(244, 117, 66), thickness=2, circle_radius=2), 
                                    mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=1))
            
            # Get landmarks
            try:
                image_size = get_image_size(image)
                errors = define_errors(results.pose_landmarks.landmark, image_size)
                predicted_stage = determine_stage(results.pose_landmarks.landmark, image_size)

                errors_list = errors.split(", ")
                if len(errors_list) == 1:
                    error_types = errors_list[0].replace(" ", "_")
                elif len(errors_list) == 2:
                    error_types = "_".join(errors_list).replace(" ", "_").replace("_not_straight", "", 1)
                else:
                    error_types = "_".join([errors_list[0], errors_list[1]]).replace(" ", "_").replace("_not_straight", "", 1)

                if errors != "None" and error_types in error_types_audio and not is_playing:
                    start_audio_thread(*error_types_audio[error_types])

                if current_stage == "Up" and predicted_stage == "Middle":
                    direction = "Down"
                elif current_stage == "Middle" and predicted_stage == "Down" and direction == "Down":
                    counter += 1
                    direction = "Up"
                current_stage = predicted_stage

                cv2.rectangle(image, (0, 0), (image.shape[1], 60), (245, 117, 16), -1)            
                cv2.putText(image, "STAGE, REP", (30, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                if predicted_stage == "Up":
                    cv2.putText(image, f"{predicted_stage}, {counter}", (40, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 
                            (255, 255, 255), 2, cv2.LINE_AA)
                else:
                    cv2.putText(image, f"{predicted_stage}, {counter}", (20, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 
                            (255, 255, 255), 2, cv2.LINE_AA)

                cv2.putText(image, "ERRORS", (220, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

                # Hiển thị các lỗi theo hàng dọc
                y_position = 45
                for error in errors_list:
                    cv2.putText(image, error, (220, y_position), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 150, 255), 2, cv2.LINE_AA)
                    y_position += 30

            except Exception as e:
                print(f"Error: {e}")

            cv2.imshow("CV2", image)
            
            # Nhấn q để thoát
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()

In [45]:
detection_without_model()

169.17484133650677 172.79896844526652
169.2910929931474 169.7664167085642
169.77374779127058 168.19011288236712
169.40284873211192 166.78865885596227
168.4668579919925 165.6522003208095
167.68615686207326 165.52114361451734
167.75780455131058 164.01109653755495
167.66708305810468 164.1569714879959
167.5361950985263 164.11613599445207
167.29314519497294 163.53720395218573
166.57273541571067 163.4170122130585
165.67402275337847 163.19869106791197
165.324870153712 163.40802973913415
164.50518218273115 166.05409807834246
163.66381945887358 164.96071278549113
162.84259436921576 164.39735187845432
162.6720245181335 164.44600237375587
162.22884589601276 162.28162053059728
159.95828755659008 161.55517555711285
158.2805950566665 160.99131195955832
157.51699960257494 161.04946353975492
156.6635140641032 160.97036561191473
156.22492264231423 160.8794368333252
156.73758968420017 161.07707622749442
155.722235221755 160.0158072889273
154.14251886268525 158.66356600648874
152.55898611112607 158.44273