In [1]:
!pip install opencv-python mediapipe


Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python
  Downloading opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting mediapipe
  Downloading mediapipe-0.10.18-cp312-cp312-win_amd64.whl.metadata (9.9 kB)
Collecting absl-py (from mediapipe)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting flatbuffers>=2.0 (from mediapipe)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting jax (from mediapipe)
  Downloading jax-0.4.35-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.4.35-cp312-cp312-win_amd64.whl.metadata (1.0 kB)
Collecting numpy>=1.21.2 (from opencv-python)
  Downloading numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Collecting opencv-contrib-python (from mediapipe)
  Downloading opencv_contrib_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
print(os.listdir("/content/"))

FileNotFoundError: [WinError 3] 系統找不到指定的路徑。: '/content/'

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from google.colab.patches import cv2_imshow
import time

# 初始化 MediaPipe 手部辨識和姿勢偵測
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# 設定手部辨識和姿勢偵測的參數
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)
pose = mp_pose.Pose(static_image_mode=False, model_complexity=1, enable_segmentation=True, min_detection_confidence=0.5)

def process_image(image_path, per_frame_output=True):
    # 讀取圖片
    image = cv2.imread(image_path)
    if image is None:
        print(f"無法讀取圖片，請確認路徑正確：{image_path}")
        return

    # 將影像從 BGR 轉換為 RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # 手部偵測
    hand_results = hands.process(image_rgb)

    # 上半身骨架偵測
    pose_results = pose.process(image_rgb)

    # 只有當偵測到手部關鍵點時才儲存座標
    if hand_results.multi_hand_landmarks:
        frame_data = []

        # 繪製手部關節並儲存座標
        for hand_landmarks in hand_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            for landmark in hand_landmarks.landmark:
                frame_data.append([landmark.x, landmark.y, landmark.z])

        # 繪製上半身骨架（排除臉部）並儲存座標
        if pose_results.pose_landmarks:
            upper_body_indices = list(range(11, 23))
            mp_drawing.draw_landmarks(image, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
            for i in upper_body_indices:
                landmark = pose_results.pose_landmarks.landmark[i]
                frame_data.append([landmark.x, landmark.y, landmark.z])

        frame_matrix = np.array(frame_data)
        if per_frame_output:
            print("單一幀資料矩陣:\n", frame_matrix)
        else:
            return frame_matrix

    # 顯示影像
    cv2_imshow(image)

def process_video(video_path, per_frame_output=True):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"無法讀取影片，請確認路徑正確：{video_path}")
        return

    all_frames_data = []
    frame_id = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("影片播放完畢或無法讀取影格")
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        hand_results = hands.process(frame_rgb)
        pose_results = pose.process(frame_rgb)

        # 只有當偵測到手部關鍵點時才記錄該幀數據
        if hand_results.multi_hand_landmarks:
            frame_data = []

            # 繪製手部關節並儲存座標
            for hand_landmarks in hand_results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                for landmark in hand_landmarks.landmark:
                    frame_data.extend([landmark.x, landmark.y, landmark.z])

            # 繪製上半身骨架（排除臉部）並儲存座標
            if pose_results.pose_landmarks:
                upper_body_indices = list(range(11, 23))
                mp_drawing.draw_landmarks(frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
                for i in upper_body_indices:
                    landmark = pose_results.pose_landmarks.landmark[i]
                    frame_data.extend([landmark.x, landmark.y, landmark.z])

            if per_frame_output:
                frame_matrix = np.array(frame_data).reshape(-1, 3)
                print(f"幀 {frame_id} 資料矩陣:\n", frame_matrix)
            else:
                all_frames_data.append([frame_id] + frame_data)

        frame_id += 1
        cv2_imshow(frame)
        time.sleep(0.033)

    cap.release()

    if not per_frame_output:
        max_length = max(len(data) for data in all_frames_data)
        all_frames_data_padded = [np.pad(data, (0, max_length - len(data)), 'constant') for data in all_frames_data]
        all_frames_matrix = np.array(all_frames_data_padded)
        print("全部幀數資料矩陣:\n", all_frames_matrix)
        return all_frames_matrix


# 主程式入口
choice = input("請選擇輸入方式：\n1. 單一圖片\n2. 影片\n輸入您的選擇 (1/2): ")
output_mode = input("請選擇輸出模式：\n1. 每幀輸出\n2. 全部幀數一起輸出\n輸入您的選擇 (1/2): ")
per_frame_output = (output_mode == '1')

if choice == '1':
    image_path = input("請輸入圖片路徑: ")
    process_image(image_path, per_frame_output=per_frame_output)
elif choice == '2':
    video_path = input("請輸入影片路徑: ")
    process_video(video_path, per_frame_output=per_frame_output)
else:
    print("無效的選擇")


KeyboardInterrupt: Interrupted by user

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from google.colab.patches import cv2_imshow
import time

# 初始化 MediaPipe 手部辨識和姿勢偵測
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# 設定手部辨識和姿勢偵測的參數
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)
pose = mp_pose.Pose(static_image_mode=False, model_complexity=1, enable_segmentation=True, min_detection_confidence=0.5)

#def normalize_z(z, z_min, z_max):
#    """將 z 座標進行最小-最大正規化，縮放至 [0, 1]"""
#    return (z - z_min) / (z_max - z_min) if z_max > z_min else 0.5

def process_video(video_path, per_frame_output=True):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"無法讀取影片，請確認路徑正確：{video_path}")
        return

    all_frames_data = []
    frame_id = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("影片播放完畢或無法讀取影格")
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        hand_results = hands.process(frame_rgb)
        pose_results = pose.process(frame_rgb)

        # 檢查是否偵測到手部，若有，繼續處理該幀
        if hand_results.multi_hand_landmarks:
            frame_data = []

            # 取得該幀所有 z 座標，準備進行正規化
            all_z_values = [
                landmark.z for hand_landmarks in hand_results.multi_hand_landmarks
                for landmark in hand_landmarks.landmark
            ] + [
                landmark.z for i in range(11, 23) if pose_results.pose_landmarks
                for landmark in [pose_results.pose_landmarks.landmark[i]]
            ]

            z_min, z_max = min(all_z_values, default=0), max(all_z_values, default=1)

            # 繪製手部關節並儲存座標，z 座標正規化
            for hand_landmarks in hand_results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                for landmark in hand_landmarks.landmark:
                    frame_data.extend([landmark.x, landmark.y, landmark.z])

            # 繪製上半身骨架（去掉臉部）並儲存座標，z 座標正規化
            if pose_results.pose_landmarks:
                upper_body_indices = list(range(11, 23))
                mp_drawing.draw_landmarks(frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
                for i in upper_body_indices:
                    landmark = pose_results.pose_landmarks.landmark[i]
                    frame_data.extend([landmark.x, landmark.y, landmark.z])

            # 將結果輸出
            if per_frame_output:
                frame_matrix = np.array(frame_data).reshape(-1, 3)
                print(f"幀 {frame_id} 資料矩陣:\n", frame_matrix)
            else:
                all_frames_data.append([frame_id] + frame_data)


        frame_id += 1
        cv2_imshow(frame)
        time.sleep(0.033)

    cap.release()

    if not per_frame_output:
        max_length = max(len(data) for data in all_frames_data)
        all_frames_data_padded = [np.pad(data, (0, max_length - len(data)), 'constant') for data in all_frames_data]
        all_frames_matrix = np.array(all_frames_data_padded)
        print("全部幀數資料矩陣:\n", all_frames_matrix)

        #
        np.savetxt('large_array.txt', all_frames_matrix, fmt='%f')  #矩陣印到別處顯示  /#####
        return all_frames_matrix


# 主程式入口
output_mode = input("請選擇輸出模式：\n1. 每幀輸出\n2. 全部幀數一起輸出\n輸入您的選擇 (1/2): ")
per_frame_output = (output_mode == '1')

video_path = input("請輸入影片路徑: ")
process_video(video_path, per_frame_output=per_frame_output)
