In [2]:
from mediapipe import solutions as mp_solutions
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose
mp_drawing = mp_solutions.drawing_utils

def draw_pose_on_image(image_path):
    image = cv2.imread(image_path)
    with mp_pose.Pose(static_image_mode=True) as pose:
        results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        if results.pose_landmarks:
            mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        cv2.imshow("Pose Landmarks", image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
draw_pose_on_image('pose_start.JPG')
draw_pose_on_image('pose_end.JPG')

In [3]:
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose

def extract_reference_pose(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"Image not found: {image_path}")
    
    with mp_pose.Pose(static_image_mode=True) as pose:
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = pose.process(image_rgb)

        if not results.pose_world_landmarks:
            raise ValueError("Pose landmarks not detected in the image.")

        landmarks = results.pose_world_landmarks.landmark
        pose_array = np.array([[lm.x, lm.y, lm.z] for lm in landmarks])  # (33, 3)
        return pose_array
reference_pose_start = extract_reference_pose('pose_start.JPG')
reference_pose_end = extract_reference_pose('pose_end.JPG')
print(reference_pose_start.shape,reference_pose_end.shape)

(33, 3) (33, 3)


In [5]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

def compute_similarity(pose1, pose2):
    if pose1.shape != (33, 3) or pose2.shape != (33, 3):
        return 0.0
    dist = np.linalg.norm(pose1 - pose2)
    ref_dist = np.linalg.norm(pose2)
    return max(0.0, 1 - dist / (ref_dist + 1e-5))  # avoid divide by zero

def extract_pose_landmarks(results):
    if not results.pose_world_landmarks:
        return None
    landmarks = results.pose_world_landmarks.landmark
    return np.array([[lm.x, lm.y, lm.z] for lm in landmarks])

def generate_pose_data(input_path, reference_pose_start, reference_pose_end, output_path):
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    history = []
    with mp_pose.Pose(static_image_mode=False) as pose:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Convert frame color
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = pose.process(image_rgb)

            if results.pose_landmarks:
                mp_drawing.draw_landmarks(
                    frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            # 取得 3D 姿勢資訊
            landmarks_3d = extract_pose_landmarks(results)
            if landmarks_3d is not None:
                sim_start  = compute_similarity(landmarks_3d, reference_pose_start)
                sim_end = compute_similarity(landmarks_3d, reference_pose_end)
                history.append((sim_start, sim_end))

                # 顯示相似度分數
                cv2.putText(frame, f"Start Pose Sim: {sim_start:.2f}", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                cv2.putText(frame, f"End Pose Sim: {sim_end:.2f}", (10, 60),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 128, 255), 2)

            # 顯示與輸出
            cv2.imshow("Pose Comparison", frame)
            out.write(frame)
            if cv2.waitKey(1) & 0xFF == 27:
                break

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    return history

history = generate_pose_data('pitch_0001.mp4', reference_pose_start, reference_pose_end, 'pitch_0001_detect.mp4')
df = pd.DataFrame(history ,columns=['sim_start', 'sim_end'])


In [6]:
df.max()

sim_start    0.913429
sim_end      0.793367
dtype: float64