In [3]:
#!pip install opencv-python dlib face_recognition numpy pillow
#!pip install dlib face_recognition opencv-python pillow
#!pip install -q ultralytics
#!pip install opencv-python
#!pip install numpy
#!pip install mediapipe
#!pip install --upgrade mediapipe

In [2]:
logging.getLogger("ultralytics").setLevel(logging.ERROR)

class ComprehensiveRecognitionSystem:
    def __init__(self):
        """初始化系統"""
        self._init_directories()
        self._init_face_models()
        self._init_pose_model()
        self._init_hand_model()
        
    def _init_directories(self):
        """創建必要的目錄"""
        self.train_dir = "training_data"
        self.model_dir = "models"
        self.temp_dir = "temp"
        
        for directory in [self.train_dir, self.model_dir, self.temp_dir]:
            Path(directory).mkdir(exist_ok=True)
    
    def _init_face_models(self):
        """初始化臉部辨識模型"""
        # 初始化 dlib 模型
        self.face_detector = dlib.get_frontal_face_detector()
        
        # 檢查 shape_predictor 模型是否存在
        shape_predictor_path = r"C:\Users\User\project\shape_predictor_5_face_landmarks.dat"
        if not os.path.exists(shape_predictor_path):
            print(f"警告：找不到 {shape_predictor_path}，部分臉部對齊功能將不可用")
            self.shape_predictor = None
        else:
            self.shape_predictor = dlib.shape_predictor(shape_predictor_path)
            
        # 檢查臉部識別模型是否存在
        face_rec_model_path = r"C:\Users\User\project\dlib_face_recognition_resnet_model_v1.dat"
        if not os.path.exists(face_rec_model_path):
            print(f"警告：找不到 {face_rec_model_path}，部分臉部識別功能將不可用")
            self.face_recognizer = None
        else:
            self.face_recognizer = dlib.face_recognition_model_v1(face_rec_model_path)
        
        # OpenCV 的人臉檢測器作為備用
        self.haar_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        self.profile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_profileface.xml')
        
        # 載入已知人臉編碼
        self.known_face_encodings = []
        self.known_face_names = []
        self.load_face_model()
    
    def _init_pose_model(self):
        """初始化 YOLO 姿態檢測模型"""
        try:
            self.pose_model = YOLO("yolo11n-pose.pt")
            print("YOLO 姿態檢測模型已成功載入")
        except Exception as e:
            print(f"警告：無法載入 YOLO 姿態檢測模型：{e}")
            self.pose_model = None
    
    def _init_hand_model(self):
        """初始化 MediaPipe 手勢檢測模型"""
        try:
            # 檢查 MediaPipe 手部檢測模型是否存在
            model_path = r"C:\Users\User\project\hand_landmarker.task"
            if not os.path.exists(model_path):
                print(f"警告：找不到 {model_path}，手勢辨識功能將不可用")
                self.hand_detector = None
            else:
                base_options = python.BaseOptions(model_asset_path=model_path)
                options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
                self.hand_detector = vision.HandLandmarker.create_from_options(options)
                print("MediaPipe 手部檢測模型已成功載入")
        except Exception as e:
            print(f"警告：無法初始化 MediaPipe 手部檢測模型：{e}")
            self.hand_detector = None
        
        # 用於跟蹤手腕位置的字典
        self.prev_wrist_x = {}
    
    # ================ 臉部辨識功能 ================
    
    def load_face_model(self):
        """載入先前訓練過的臉部辨識模型"""
        model_path = os.path.join(self.model_dir, "face_encodings.pickle")
        if os.path.exists(model_path):
            with open(model_path, "rb") as f:
                data = pickle.load(f)
                self.known_face_encodings = data["encodings"]
                self.known_face_names = data["names"]
            print(f"臉部辨識模型載入成功，包含 {len(self.known_face_names)} 個人物")
        else:
            print("沒有找到現有臉部辨識模型，需要先訓練")
    
    def add_person(self, person_name, images_folder):
        """添加新人臉到辨識系統"""
        if not os.path.exists(images_folder):
            print(f"錯誤：找不到資料夾 {images_folder}")
            return False
        
        image_paths = [os.path.join(images_folder, f) for f in os.listdir(images_folder) 
                       if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        
        if not image_paths:
            print(f"錯誤：資料夾 {images_folder} 中沒有圖片檔案")
            return False
        
        face_encodings = []
        successful_images = 0
        
        print(f"開始處理 {person_name} 的 {len(image_paths)} 張圖片...")
        
        for img_path in image_paths:
            try:
                # 使用 PIL 載入圖片（更穩定）
                pil_image = Image.open(img_path)
                # 轉換為 RGB（移除 alpha 通道如果有的話）
                if pil_image.mode != 'RGB':
                    pil_image = pil_image.convert('RGB')
                # 轉換為 numpy 數組給 face_recognition 使用
                img = np.array(pil_image)
                
                # 檢測人臉 - 使用 face_recognition 庫（基於 dlib）
                face_locations = face_recognition.face_locations(img, model="hog")
                
                # 如果找不到人臉，嘗試使用 OpenCV 的正面和側面檢測器
                if not face_locations:
                    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                    # 先檢測正面
                    faces = self.haar_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
                    if len(faces) == 0:
                        # 再檢測側面
                        faces = self.profile_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
                    
                    if len(faces) > 0:
                        # 轉換 OpenCV 坐標到 face_recognition 格式 (top, right, bottom, left)
                        x, y, w, h = faces[0]
                        face_locations = [(y, x+w, y+h, x)]
                
                # 如果還是找不到人臉
                if not face_locations:
                    print(f"警告：無法在 {img_path} 中檢測到人臉")
                    continue
                
                # 計算人臉特徵編碼
                encodings = face_recognition.face_encodings(img, face_locations)
                
                if encodings:
                    face_encodings.extend(encodings)
                    successful_images += 1
                    print(f"成功處理: {img_path}")
                else:
                    print(f"警告：無法在 {img_path} 中創建有效的人臉編碼")
            
            except Exception as e:
                print(f"處理 {img_path} 時發生錯誤: {e}")
        
        print(f"成功處理 {successful_images}/{len(image_paths)} 張圖片")
        
        # 如果有成功處理的圖片，將他們加入到模型中
        if face_encodings:
            self.known_face_encodings.extend(face_encodings)
            self.known_face_names.extend([person_name] * len(face_encodings))
            
            # 儲存模型
            self.save_face_model()
            return True
        else:
            print(f"錯誤：無法為 {person_name} 創建任何有效的人臉編碼")
            return False
    
    def save_face_model(self):
        """保存臉部辨識模型到檔案"""
        data = {
            "encodings": self.known_face_encodings,
            "names": self.known_face_names
        }
        
        with open(os.path.join(self.model_dir, "face_encodings.pickle"), "wb") as f:
            pickle.dump(data, f)
        
        print(f"臉部辨識模型已保存，包含 {len(self.known_face_names)} 個人臉")
    
    # ================ 手勢識別功能 ================
    
    def is_ok_sign(self, landmarks):
        """檢測是否為 OK 手勢"""
        thumb_tip = np.array([landmarks[4].x, landmarks[4].y])
        index_tip = np.array([landmarks[8].x, landmarks[8].y])
        distance = np.linalg.norm(thumb_tip - index_tip)
        middle_tip = landmarks[12].y
        ring_tip = landmarks[16].y
        pinky_tip = landmarks[20].y
        palm_base = landmarks[0].y
        return distance < 0.05 and middle_tip < palm_base and ring_tip < palm_base and pinky_tip < palm_base
    
    def is_peace_sign(self, landmarks):
        """檢測是否為 V 手勢"""
        def is_extended(pip, tip):
            return tip.y < pip.y
        return (
            is_extended(landmarks[6], landmarks[8]) and
            is_extended(landmarks[10], landmarks[12]) and
            landmarks[16].y > landmarks[14].y and
            landmarks[20].y > landmarks[18].y
        )
    
    def is_open_palm(self, landmarks):
        """檢測是否為張開手掌"""
        return all(landmarks[tip].y < landmarks[tip - 2].y for tip in [8, 12, 16, 20])
    
    def is_fist(self, landmarks):
        """檢測是否為握拳"""
        wrist = np.array([landmarks[0].x, landmarks[0].y])
        closed_fingers = [np.linalg.norm(wrist - np.array([landmarks[i].x, landmarks[i].y])) < 0.1 for i in [8, 12, 16, 20]]
        return all(closed_fingers)
    
    def is_clapping(self, hands):
        """檢測是否為拍手"""
        if len(hands) != 2:
            return False
        p1 = np.array([hands[0][5].x, hands[0][5].y])
        p2 = np.array([hands[1][5].x, hands[1][5].y])
        return np.linalg.norm(p1 - p2) < 0.08
    
    def is_raising_hand(self, wrist, shoulder):
        """檢測是否為舉手"""
        if np.all(wrist == 0) or np.all(shoulder == 0):
            return False
        
        y_diff = shoulder[1] - wrist[1]
        x_diff = abs(wrist[0] - shoulder[0])
        return y_diff > 20 and x_diff < 200
    
    def is_squatting(self, hip, knee):
        """檢測是否為蹲下"""
        if np.all(hip == 0) or np.all(knee == 0):
            return False
        return (knee[1] - hip[1]) < 40
    
    def is_waving_hand(self, track_id, wrist):
        """檢測是否為揮手"""
        if np.all(wrist == 0):
            return False
        x_now = wrist[0]
        waving = False
        if track_id in self.prev_wrist_x:
            diff = abs(x_now - self.prev_wrist_x[track_id])
            if diff > 20:
                waving = True
        self.prev_wrist_x[track_id] = x_now
        return waving
    
    # ================ 畫面處理功能 ================
    
    def draw_hand_landmarks(self, image, detection_result, thickness=3):
        """繪製手部標記點和手勢"""
        annotated_image = image.copy()
        hands = detection_result.hand_landmarks if detection_result else []
        clap_shown = self.is_clapping(hands) if hands else False

        for landmarks in hands or []:
            gesture = ""
            if self.is_ok_sign(landmarks):
                gesture = "OK Sign"
            elif self.is_peace_sign(landmarks):
                gesture = "Peace Sign"
            elif self.is_open_palm(landmarks):
                gesture = "Open Palm"
            elif self.is_fist(landmarks):
                gesture = "Fist"
            elif clap_shown:
                gesture = "Clap"

            for landmark in landmarks:
                x, y = int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])
                cv2.circle(annotated_image, (x, y), 7, (0, 0, 255), -1)
            
            connections = mp.solutions.hands.HAND_CONNECTIONS
            for connection in connections:
                start_idx, end_idx = connection
                start = landmarks[start_idx]
                end = landmarks[end_idx]
                start_point = (int(start.x * image.shape[1]), int(start.y * image.shape[0]))
                end_point = (int(end.x * image.shape[1]), int(end.y * image.shape[0]))
                cv2.line(annotated_image, start_point, end_point, (0, 255, 0), thickness)

            if gesture and not clap_shown:
                x, y = int(landmarks[0].x * image.shape[1]), int(landmarks[0].y * image.shape[0])
                cv2.putText(annotated_image, gesture, (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 0), 2)

        if clap_shown:
            cv2.putText(annotated_image, "Clap", (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 255), 3)

        return annotated_image
    
    # ================ 主要運行功能 ================
    
    def recognize_face(self, image, tolerance=0.6):
        """在圖像中識別人臉"""
        if not self.known_face_encodings:
            print("錯誤：臉部辨識模型尚未訓練，請先添加人物")
            return image
        
        # 檢測所有人臉
        face_locations = face_recognition.face_locations(image, model="hog")
        
        # 如果找不到人臉，使用 OpenCV 嘗試檢測正面和側面
        if not face_locations:
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            # 檢測正面
            frontal_faces = self.haar_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
            for (x, y, w, h) in frontal_faces:
                face_locations.append((y, x+w, y+h, x))
        
            # 檢測側面
            profile_faces = self.profile_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
            for (x, y, w, h) in profile_faces:
                # 檢查是否與已檢測的面孔重疊
                is_new = True
                for (top, right, bottom, left) in face_locations:
                    # 如果中心點落在已有的人臉框內，則不添加
                    center_x, center_y = x + w//2, y + h//2
                    if left <= center_x <= right and top <= center_y <= bottom:
                        is_new = False
                        break
                if is_new:
                    face_locations.append((y, x+w, y+h, x))
        
        # 如果有檢測到人臉，進行識別
        if face_locations:
            face_encodings = face_recognition.face_encodings(image, face_locations)
            
            # 辨識每個人臉
            for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
                # 與所有已知人臉比對
                matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding, tolerance=tolerance)
                name = "未知"
                
                # 如果有匹配項，使用距離最近的那個
                if True in matches:
                    face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding)
                    best_match_index = np.argmin(face_distances)
                    confidence = 1 - face_distances[best_match_index]
                    if matches[best_match_index]:
                        name = f"{self.known_face_names[best_match_index]} ({confidence:.2f})"
                
                # 繪製人臉框和名字
                cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)
                cv2.rectangle(image, (left, bottom - 35), (right, bottom), (0, 255, 0), cv2.FILLED)
                cv2.putText(image, name, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)
        
        return image
    
    def process_frame(self, frame):
        """處理單一幀，應用所有識別功能"""
        original_frame = frame.copy()
        results = None
        
        # 1. 使用 YOLO 進行姿態識別
        if self.pose_model:
            try:
                results = self.pose_model.track(frame, persist=True, tracker="botsort.yaml")
                
                if results:
                    for r in results:
                        annotated = r.plot()
                        
                        if hasattr(r, "keypoints") and r.keypoints is not None:
                            kps = r.keypoints.xy.numpy()
                            if len(kps) > 0:
                                for idx, kp in enumerate(kps):
                                    track_id = int(r.id[idx]) if hasattr(r, "id") and r.id is not None else idx
                                    
                                    # 提取關鍵點
                                    lw = kp[9]; rw = kp[10]
                                    lsh = kp[5]; rsh = kp[6]
                                    lhip = kp[11]; rhip = kp[12]
                                    lknee = kp[13]; rknee = kp[14]
                                    
                                    # 檢測姿態
                                    left_up = self.is_raising_hand(lw, lsh)
                                    right_up = self.is_raising_hand(rw, rsh)
                                    squat_left = self.is_squatting(lhip, lknee)
                                    squat_right = self.is_squatting(rhip, rknee)
                                    wave_left = self.is_waving_hand(f"{track_id}_L", lw)
                                    wave_right = self.is_waving_hand(f"{track_id}_R", rw)
                                    
                                    # 顯示結果
                                    text = ""
                                    if left_up and right_up:
                                        text = "both hands "
                                    elif left_up:
                                        text = "left hand "
                                    elif right_up:
                                        text = "right hand "
                                    
                                    if squat_left and squat_right:
                                        text += "knees down "
                                    if wave_left or wave_right:
                                        text += "waving "
                                    
                                    if text:
                                        x, y = int(kp[0][0]), int(kp[0][1])
                                        cv2.putText(annotated, f"ID {track_id} {text}", 
                                                   (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
                        
                        frame = annotated.copy()
            except Exception as e:
                print(f"YOLO 姿態識別錯誤: {e}")
        
        # 2. 使用 MediaPipe 進行手勢識別
        if self.hand_detector:
            try:
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
                detection_result = self.hand_detector.detect(mp_image)
                
                if detection_result:
                    frame = self.draw_hand_landmarks(frame, detection_result)
            except Exception as e:
                print(f"MediaPipe 手勢識別錯誤: {e}")
        
        # 3. 使用 face_recognition 進行臉部辨識
        try:
            frame = self.recognize_face(frame)
        except Exception as e:
            print(f"臉部辨識錯誤: {e}")
        
        return frame
    
    def process_image(self, image_path):
        """處理單張圖片"""
        try:
            # 載入圖片
            pil_image = Image.open(image_path)
            if pil_image.mode != 'RGB':
                pil_image = pil_image.convert('RGB')
            
            # 轉換為 numpy 數組
            image = np.array(pil_image)
            
            # 處理圖片
            result_img = self.process_frame(image)
            
            # 儲存結果
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            result_path = os.path.join(self.temp_dir, f"result_{timestamp}.jpg")
            
            # 將 numpy 數組轉回 PIL 圖像
            result_pil = Image.fromarray(result_img)
            
            # 保存圖片
            result_pil.save(result_path, quality=95)
            print(f"結果已保存至 {result_path}")
            
            return result_path
        except Exception as e:
            print(f"處理圖片時發生錯誤: {e}")
            return None
    
    def process_video(self, video_path, output_path=None, process_every_n_frames=1):
        """處理影片"""
        # 開啟影片
        cap = cv2.VideoCapture(video_path)
        
        if not cap.isOpened():
            print(f"錯誤：無法開啟影片檔案 {video_path}")
            return None
        
        # 獲取影片屬性
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        print(f"影片資訊：")
        print(f"- 解析度：{width}x{height}")
        print(f"- FPS：{fps}")
        print(f"- 總幀數：{total_frames}")
        print(f"- 時長：{total_frames/fps:.2f} 秒")
        
        # 如果需要儲存輸出影片
        out = None
        if output_path:
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        # 處理影片
        frame_count = 0
        recognition_stats = {}
        
        print("開始處理影片...")
        
        try:
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                
                # 顯示進度
                if frame_count % 30 == 0:
                    progress = (frame_count / total_frames) * 100
                    print(f"處理進度：{progress:.1f}% ({frame_count}/{total_frames} 幀)")
                
                # 只處理每第 n 幀以提高效能
                if frame_count % process_every_n_frames == 0:
                    # 處理幀
                    processed_frame = self.process_frame(frame)
                    
                    # 在畫面左上角顯示幀數和時間
                    time_text = f"幀：{frame_count} | 時間：{frame_count/fps:.2f}s"
                    cv2.putText(processed_frame, time_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                    
                    # 如果需要儲存輸出影片
                    if out:
                        out.write(processed_frame)
                    
                    # 顯示結果
                    cv2.imshow('影片處理', cv2.resize(processed_frame, (900, 680)))
                    if cv2.waitKey(1) & 0xFF == ord(' '):
                        print("使用者中斷處理")
                        break
                
                frame_count += 1
                
        finally:
            # 釋放資源
            cap.release()
            if out:
                out.release()
            cv2.destroyAllWindows()
        
        print(f"\n影片處理完成！")
        if output_path:
            print(f"輸出影片已儲存至：{output_path}")
        
        return recognition_stats
    
    def process_webcam(self, camera_id=0):
        """從網路攝影機進行即時識別"""
        # 開啟攝影機
        cap = cv2.VideoCapture(camera_id)
        
        if not cap.isOpened():
            print("錯誤：無法開啟攝影機")
            return
        
        print("開始即時辨識。按空白鍵結束。")
        
        try:
            while True:
                # 讀取一幀
                ret, frame = cap.read()
                
                if not ret:
                    print("無法從攝影機獲取畫面")
                    break
                
                # 處理幀
                processed_frame = self.process_frame(frame)
                
                # 顯示結果
                cv2.imshow('即時辨識', processed_frame)
                
                if cv2.waitKey(1) & 0xFF == ord(' '):
                    break
                
        finally:
            # 釋放資源
            cap.release()
            cv2.destroyAllWindows()


# 主程序
if __name__ == "__main__":
    # 初始化系統
    system = ComprehensiveRecognitionSystem()
    
    # 顯示功能選單
    print("\n==== 綜合人體姿態、手勢和臉部辨識系統 ====")
    print("1. 添加新人物到臉部辨識系統")
    print("2. 處理單張圖片")
    print("3. 處理影片")
    print("4. 從攝影機即時辨識")
    print("0. 退出")
    
    choice = input("\n請選擇功能 (0-4): ")
    
    if choice == "1":
        person_name = input("請輸入人物名稱: ")
        images_folder = input("請輸入包含該人物照片的資料夾路徑: ")
        system.add_person(person_name, images_folder)
        
    elif choice == "2":
        image_path = input("請輸入圖片路徑: ")
        system.process_image(image_path)
        
    elif choice == "3":
        video_path

臉部辨識模型載入成功，包含 69 個人物
YOLO 姿態檢測模型已成功載入
MediaPipe 手部檢測模型已成功載入

==== 綜合人體姿態、手勢和臉部辨識系統 ====
1. 添加新人物到臉部辨識系統
2. 處理單張圖片
3. 處理影片
4. 從攝影機即時辨識
0. 退出


In [4]:
import os
import cv2
import dlib
import face_recognition
from ultralytics import YOLO
import numpy as np
import logging
from pathlib import Path
import pickle
from PIL import Image
from datetime import datetime
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision


logging.getLogger("ultralytics").setLevel(logging.ERROR)

class VideoRecognitionSystem:
    def __init__(self):
        """初始化系統"""
        self._init_directories()
        self._init_face_models()
        self._init_pose_model()
        self._init_hand_detector()  
        
    """創建必要的目錄"""
    def _init_directories(self):
        self.model_dir = "models"
        self.temp_dir = "temp"
        
        for directory in [self.model_dir, self.temp_dir]:
            Path(directory).mkdir(exist_ok=True)
    
    """初始化手勢檢測器"""
    def _init_hand_detector(self):
        
        try:
            base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
            options = vision.HandLandmarkerOptions(base_options=base_options,
                                                  num_hands=4,
                                                  min_hand_detection_confidence=0.5,
                                                  min_hand_presence_confidence=0.5,
                                                  min_tracking_confidence=0.5)
            self.hand_detector = vision.HandLandmarker.create_from_options(options)
            print("MediaPipe 手勢檢測器已成功載入")
        except Exception as e:
            print(f"警告：無法載入 MediaPipe 手勢檢測器：{e}")
            # Fallback to a simple placeholder if loading fails
            self.hand_detector = type('', (), {
                'detect': lambda x: type('', (), {'hand_landmarks': []})()
            })()
    
    """初始化臉部辨識模型"""
    def _init_face_models(self):
        # 初始化 dlib 模型
        self.face_detector = dlib.get_frontal_face_detector()
        
        # 檢查模型是否存在
        shape_predictor_path = r"C:\Users\User\project\shape_predictor_5_face_landmarks.dat"
        if not os.path.exists(shape_predictor_path):
            print(f"警告：找不到 {shape_predictor_path}，部分臉部對齊功能將不可用")
            self.shape_predictor = None
        else:
            self.shape_predictor = dlib.shape_predictor(shape_predictor_path)
            
        
        face_rec_model_path = r"C:\Users\User\project\dlib_face_recognition_resnet_model_v1.dat"
        if not os.path.exists(face_rec_model_path):
            print(f"警告：找不到 {face_rec_model_path}，部分臉部識別功能將不可用")
            self.face_recognizer = None
        else:
            self.face_recognizer = dlib.face_recognition_model_v1(face_rec_model_path)
        
        # Opencv2 的人臉檢測器作為備用
        self.haar_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        self.profile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_profileface.xml')
        
        # 載入已知人臉編碼
        self.known_face_encodings = []
        self.known_face_names = []
        self.load_face_model()
    

    """初始化 YOLO 動作檢測模型"""
    def _init_pose_model(self):
        try:
            self.pose_model = YOLO("yolov8n-pose.pt")
            print("YOLO 姿態檢測模型已成功載入")
        except Exception as e:
            print(f"警告：無法載入 YOLO 姿態檢測模型：{e}")
            self.pose_model = None
        
        # 用於跟蹤手腕位置的字典
        self.prev_wrist_x = {}
    

    """載入先前訓練過的臉部辨識模型"""
    def load_face_model(self):
        model_path = os.path.join(self.model_dir, "face_encodings.pickle")
        if os.path.exists(model_path):
            with open(model_path, "rb") as f:
                data = pickle.load(f)
                self.known_face_encodings = data["encodings"]
                self.known_face_names = data["names"]
            print(f"臉部辨識模型載入成功，包含 {len(self.known_face_names)} 個人物")
        else:
            print("沒有找到現有臉部辨識模型，臉部辨識功能將無法識別特定人物")
    
    # ================ YOLO動作辨識 ================
    
    """舉手"""
    def is_raising_hand(self, wrist, shoulder):
        if np.all(wrist == 0) or np.all(shoulder == 0):
            return False
        
        y_diff = shoulder[1] - wrist[1]
        x_diff = abs(wrist[0] - shoulder[0])
        return y_diff > 20 and x_diff < 200
    
    """蹲下"""
    def is_squatting(self, hip, knee):
        if np.all(hip == 0) or np.all(knee == 0):
            return False
        return (knee[1] - hip[1]) < 40
    
    """揮手"""
    def is_waving_hand(self, track_id, wrist):
        if np.all(wrist == 0):
            return False
        x_now = wrist[0]
        waving = False
        if track_id in self.prev_wrist_x:
            diff = abs(x_now - self.prev_wrist_x[track_id])
            if diff > 20:
                waving = True
        self.prev_wrist_x[track_id] = x_now
        return waving
    
    

    # ================ Mediapipe手勢辨識 ================

    """檢測OK手勢"""
    @staticmethod
    def is_ok_sign(landmarks):
        thumb_tip = np.array([landmarks[4].x, landmarks[4].y])
        index_tip = np.array([landmarks[8].x, landmarks[8].y])
        distance = np.linalg.norm(thumb_tip - index_tip)
        middle_tip = landmarks[12].y
        ring_tip = landmarks[16].y
        pinky_tip = landmarks[20].y
        palm_base = landmarks[0].y
        return distance < 0.05 and middle_tip < palm_base and ring_tip < palm_base and pinky_tip < palm_base
    
    """Peace手勢"""
    @staticmethod
    def is_peace_sign(landmarks):
        def is_extended(pip, tip):
            return tip.y < pip.y
        return (
            is_extended(landmarks[6], landmarks[8]) and
            is_extended(landmarks[10], landmarks[12]) and
            landmarks[16].y > landmarks[14].y and
            landmarks[20].y > landmarks[18].y
        )
    
    """張手手勢"""
    @staticmethod
    def is_open_palm(landmarks):
        return all(landmarks[tip].y < landmarks[tip - 2].y for tip in [8, 12, 16, 20])

    """檢測握拳手勢"""
    @staticmethod
    def is_fist(landmarks):
        wrist = np.array([landmarks[0].x, landmarks[0].y])
        closed_fingers = [np.linalg.norm(wrist - np.array([landmarks[i].x, landmarks[i].y])) < 0.1 for i in [8, 12, 16, 20]]
        return all(closed_fingers)

    """檢測拍手手勢"""
    @staticmethod
    def is_clapping(hands):
        if len(hands) != 2:
            return False
        p1 = np.array([hands[0][5].x, hands[0][5].y])
        p2 = np.array([hands[1][5].x, hands[1][5].y])
        return np.linalg.norm(p1 - p2) < 0.08
    
    # ================繪圖與手勢顯示 ===============
    def draw_landmarks_on_image(self, image, detection_result, thickness=3):
        annotated_image = image.copy()
        hands = detection_result.hand_landmarks
        
        # 修正：正確檢查是否有手部並檢測拍手
        clap_shown = False
        if hands and len(hands) >= 2:
            clap_shown = self.is_clapping(hands)

        for landmarks in hands or []:
            gesture = ""
            # 修正：移除不必要的 self 參數
            if self.is_ok_sign(landmarks):
                gesture = "OK Sign"
            elif self.is_peace_sign(landmarks):
                gesture = "Peace Sign"
            elif self.is_open_palm(landmarks):
                gesture = "Open Palm"
            elif self.is_fist(landmarks):
                gesture = "Fist"
            elif clap_shown:
                gesture = "Clap"

            for landmark in landmarks:
                x, y = int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])
                cv2.circle(annotated_image, (x, y), 7, (0, 0, 255), -1)
            connections = mp.solutions.hands.HAND_CONNECTIONS

            for connection in connections:
                start_idx, end_idx = connection
                start = landmarks[start_idx]
                end = landmarks[end_idx]
                start_point = (int(start.x * image.shape[1]), int(start.y * image.shape[0]))
                end_point = (int(end.x * image.shape[1]), int(end.y * image.shape[0]))
                cv2.line(annotated_image, start_point, end_point, (0, 255, 0), thickness)

            if gesture and not clap_shown:
                x, y = int(landmarks[0].x * image.shape[1]), int(landmarks[0].y * image.shape[0])
                cv2.putText(annotated_image, gesture, (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 0), 2)

        if clap_shown:
            cv2.putText(annotated_image, "Clap", (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 255), 3)

        return annotated_image
    
    # ================ 臉部辨識功能 ================
    
    """在圖像中識別人臉"""
    def recognize_face(self, image, tolerance=0.6):
        if not self.known_face_encodings:
            # 只檢測人臉位置但不進行識別
            face_locations = face_recognition.face_locations(image, model="hog")
            
            # 如果找不到人臉，使用 Opencv2 嘗試檢測正面和側面
            if not face_locations:
                gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
                # 檢測正面
                frontal_faces = self.haar_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
                for (x, y, w, h) in frontal_faces:
                    face_locations.append((y, x+w, y+h, x))
            
                # 檢測側面
                profile_faces = self.profile_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
                for (x, y, w, h) in profile_faces:
                    # 檢查是否與已檢測的面孔重疊
                    is_new = True
                    for (top, right, bottom, left) in face_locations:
                        # 如果中心點落在已有的人臉框內，則不添加
                        center_x, center_y = x + w//2, y + h//2
                        if left <= center_x <= right and top <= center_y <= bottom:
                            is_new = False
                            break
                    if is_new:
                        face_locations.append((y, x+w, y+h, x))
            
            # 繪製人臉框
            for (top, right, bottom, left) in face_locations:
                cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)
                cv2.putText(image, "人臉", (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)
        else:
            # 檢測所有人臉
            face_locations = face_recognition.face_locations(image, model="hog")
            
            # 如果找不到人臉，使用 Opencv2 嘗試檢測正面和側面
            if not face_locations:
                gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
                # 檢測正面
                frontal_faces = self.haar_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
                for (x, y, w, h) in frontal_faces:
                    face_locations.append((y, x+w, y+h, x))
            
                # 檢測側面
                profile_faces = self.profile_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
                for (x, y, w, h) in profile_faces:
                    # 檢查是否與已檢測的面孔重疊
                    is_new = True
                    for (top, right, bottom, left) in face_locations:
                        # 如果中心點落在已有的人臉框內，則不添加
                        center_x, center_y = x + w//2, y + h//2
                        if left <= center_x <= right and top <= center_y <= bottom:
                            is_new = False
                            break
                    if is_new:
                        face_locations.append((y, x+w, y+h, x))
            
            # 如果有檢測到人臉，進行識別
            if face_locations:
                face_encodings = face_recognition.face_encodings(image, face_locations)
                
                # 辨識每個人臉
                for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
                    # 與所有已知人臉比對
                    matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding, tolerance=tolerance)
                    name = "未知"
                    
                    # 如果有匹配項，使用距離最近的那個
                    if True in matches:
                        face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding)
                        best_match_index = np.argmin(face_distances)
                        confidence = 1 - face_distances[best_match_index]
                        if matches[best_match_index]:
                            name = f"{self.known_face_names[best_match_index]} ({confidence:.2f})"
                    
                    # 繪製人臉框和名字
                    cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)
                    cv2.rectangle(image, (left, bottom - 35), (right, bottom), (0, 255, 0), cv2.FILLED)
                    cv2.putText(image, name, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)
        
        return image
    
    def process_frame(self, frame):
        """處理單一幀，應用所有識別功能"""
        original_frame = frame.copy()
        results = None

        # 處理手勢識別
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        hand_result = self.hand_detector.detect(mp_image)
        frame = self.draw_landmarks_on_image(frame, hand_result)
        
        # 1. 使用 YOLO 進行姿態識別
        if self.pose_model:
            try:
                results = self.pose_model.track(frame, persist=True, tracker="botsort.yaml")
                
                if results:
                    for r in results:
                        annotated = r.plot()
                        
                        if hasattr(r, "keypoints") and r.keypoints is not None:
                            kps = r.keypoints.xy.numpy()
                            if len(kps) > 0:
                                for idx, kp in enumerate(kps):
                                    track_id = int(r.id[idx]) if hasattr(r, "id") and r.id is not None else idx
                                    
                                    # 提取關鍵點
                                    lw = kp[9]; rw = kp[10]
                                    lsh = kp[5]; rsh = kp[6]
                                    lhip = kp[11]; rhip = kp[12]
                                    lknee = kp[13]; rknee = kp[14]
                                    
                                    # 檢測姿態
                                    left_up = self.is_raising_hand(lw, lsh)
                                    right_up = self.is_raising_hand(rw, rsh)
                                    squat_left = self.is_squatting(lhip, lknee)
                                    squat_right = self.is_squatting(rhip, rknee)
                                    wave_left = self.is_waving_hand(f"{track_id}_L", lw)
                                    wave_right = self.is_waving_hand(f"{track_id}_R", rw)
                                    
                                    # 顯示結果
                                    text = ""
                                    if left_up and right_up:
                                        text = "both hands "
                                    elif left_up:
                                        text = "left hand "
                                    elif right_up:
                                        text = "right hand "
                                    
                                    if squat_left and squat_right:
                                        text += "knees down "
                                    if wave_left or wave_right:
                                        text += "waving "
                                    
                                    if text:
                                        x, y = int(kp[0][0]), int(kp[0][1])
                                        cv2.putText(annotated, f"ID {track_id} {text}", 
                                                   (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
                        
                        frame = annotated.copy()
            except Exception as e:
                print(f"YOLO 姿態識別錯誤: {e}")
        
        # 2. 使用 face_recognition 進行臉部辨識
        try:
            frame = self.recognize_face(frame)
        except Exception as e:
            print(f"臉部辨識錯誤: {e}")
        
        return frame
    
    def process_video(self, video_path, output_path=None, process_every_n_frames=1):
        """處理影片"""
        # 開啟影片
        cap = cv2.VideoCapture(video_path)
        
        if not cap.isOpened():
            print(f"錯誤：無法開啟影片檔案 {video_path}")
            return None
        
        # 獲取影片屬性
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        print(f"影片資訊：")
        print(f"- 解析度：{width}x{height}")
        print(f"- FPS:{fps}")
        print(f"- 總幀數：{total_frames}")
        print(f"- 時長：{total_frames/fps:.2f} 秒")
        
        # 如果需要儲存輸出影片
        out = None
        if output_path:
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        # 處理影片
        frame_count = 0
        
        print("開始處理影片...")
        
        try:
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                
                # 顯示進度
                if frame_count % 30 == 0:
                    progress = (frame_count / total_frames) * 100
                    print(f"處理進度：{progress:.1f}% ({frame_count}/{total_frames} 幀)")
                
                # 只處理每第 n 幀以提高效能
                if frame_count % process_every_n_frames == 0:
                    # 處理幀
                    processed_frame = self.process_frame(frame)
                    
                    # 在畫面左上角顯示幀數和時間
                    time_text = f"幀：{frame_count} | 時間：{frame_count/fps:.2f}s"
                    cv2.putText(processed_frame, time_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                    
                    # 如果需要儲存輸出影片
                    if out:
                        out.write(processed_frame)
                    
                    # 顯示結果
                    cv2.imshow('影片處理', cv2.resize(processed_frame, (900, 680)))
                    if cv2.waitKey(1) & 0xFF == ord(' '):
                        print("使用者中斷處理")
                        break
                
                frame_count += 1
                
        finally:
            # 釋放資源
            cap.release()
            if out:
                out.release()
            cv2.destroyAllWindows()
        
        print(f"\n影片處理完成!")
        if output_path:
            print(f"輸出影片已儲存至：{output_path}")
    
    def process_webcam(self, camera_id=0):
        """從網路攝影機進行即時識別"""
        # 開啟攝影機
        cap = cv2.VideoCapture(camera_id)
        
        if not cap.isOpened():
            print("錯誤：無法開啟攝影機")
            return
        
        print("開始即時辨識。按空白鍵結束。")
        
        try:
            while True:
                # 讀取一幀
                ret, frame = cap.read()
                
                if not ret:
                    print("無法從攝影機獲取畫面")
                    break
                
                # 處理幀
                processed_frame = self.process_frame(frame)
                
                # 顯示結果
                cv2.imshow('即時辨識', processed_frame)
                
                if cv2.waitKey(1) & 0xFF == ord(' '):
                    break
                
        finally:
            # 釋放資源
            cap.release()
            cv2.destroyAllWindows()


# 主程序
if __name__ == "__main__":
    # 初始化系統
    system = VideoRecognitionSystem()
    
    # 顯示功能選單
    print("\n==== 影片人臉和姿態辨識系統 ====")
    print("1. 處理影片")
    print("2. 從攝影機即時辨識")
    print("0. 退出")
    
    choice = input("\n請選擇功能 (0-2): ")
    
    if choice == "1":
        video_path = input("請輸入影片路徑: ")
        output_path = input("請輸入輸出影片路徑 (留空則不儲存): ")
        if not output_path:
            output_path = None
        process_every = int(input("每隔幾幀處理一次 (建議: 1-5, 數字越大效能越佳但精度較低): ") or "1")
        system.process_video(video_path, output_path, process_every)
        
    elif choice == "2":
        camera_id = int(input("請輸入攝影機 ID (預設 0): ") or "0")
        system.process_webcam(camera_id)
        
    else:
        print("感謝使用！")

臉部辨識模型載入成功，包含 69 個人物
YOLO 姿態檢測模型已成功載入
MediaPipe 手勢檢測器已成功載入

==== 影片人臉和姿態辨識系統 ====
1. 處理影片
2. 從攝影機即時辨識
0. 退出
影片資訊：
- 解析度：1920x1080
- FPS:29
- 總幀數：79
- 時長：2.72 秒
開始處理影片...
處理進度：0.0% (0/79 幀)

影片處理完成!
輸出影片已儲存至：C:\Users\User\OneDrive\圖片\相機相簿\WIN_20250520_21_21_07_Pro.mp4


In [20]:
import os
import cv2
import dlib
import face_recognition
from ultralytics import YOLO
import numpy as np
import logging
from pathlib import Path
import pickle
from PIL import Image
from datetime import datetime
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import time
import traceback

# Suppress unnecessarily verbose ultralytics logs
logging.getLogger("ultralytics").setLevel(logging.ERROR)

class VideoRecognitionSystem:
    
    """初始化系統"""
    def __init__(self):
        self._init_directories()
        self._init_face_models()
        self._init_pose_model()
        self._init_hand_detector()  
        
    """創建必要的目錄"""
    def _init_directories(self):
        self.model_dir = "models"
        self.temp_dir = "temp"
        
        for directory in [self.model_dir, self.temp_dir]:
            Path(directory).mkdir(exist_ok=True)
    
    """初始化手勢檢測器"""
    def _init_hand_detector(self):
        try:
            # 修復：使用正確的MediaPipe手部檢測器初始化方式
            self.mp_hands = mp.solutions.hands
            self.hands = self.mp_hands.Hands(
                static_image_mode=False,
                max_num_hands=2,
                min_detection_confidence=0.3,
                min_tracking_confidence=0.3
            )
            self.mp_drawing = mp.solutions.drawing_utils
            print("MediaPipe 手勢檢測器已成功載入")
        except Exception as e:
            print(f"警告：無法載入 MediaPipe 手勢檢測器：{e}")
            self.hands = None
            self.mp_hands = None
            self.mp_drawing = None
    
    """初始化臉部辨識模型"""
    def _init_face_models(self):
        # 初始化 dlib 模型
        self.face_detector = dlib.get_frontal_face_detector()
        
        # 檢查模型是否存在
        shape_predictor_path = os.path.join(self.model_dir, "shape_predictor_5_face_landmarks.dat")
        if not os.path.exists(shape_predictor_path):
            print(f"警告：找不到 {shape_predictor_path}，部分臉部對齊功能將不可用")
            self.shape_predictor = None
        else:
            self.shape_predictor = dlib.shape_predictor(shape_predictor_path)
            
        face_rec_model_path = os.path.join(self.model_dir, "dlib_face_recognition_resnet_model_v1.dat")
        if not os.path.exists(face_rec_model_path):
            print(f"警告：找不到 {face_rec_model_path}，部分臉部識別功能將不可用")
            self.face_recognizer = None
        else:
            self.face_recognizer = dlib.face_recognition_model_v1(face_rec_model_path)
        
        # OpenCV 的人臉檢測器作為備用
        self.haar_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        self.profile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_profileface.xml')
        
        # 載入已知人臉編碼
        self.known_face_encodings = []
        self.known_face_names = []
        self.load_face_model()

    """初始化 YOLO 模型"""
    def _init_pose_model(self):
        try:
            self.pose_model = YOLO("yolov8n-pose.pt")
            print("YOLO 姿態檢測模型已成功載入")
        except Exception as e:
            print(f"警告：無法載入 YOLO 姿態檢測模型：{e}")
            self.pose_model = None
        
        # 用於跟蹤手腕位置的字典 - 分別追蹤左右手
        self.prev_left_wrist_x = {}
        self.prev_right_wrist_x = {}
        self.wrist_position_history = {}  # 用於更穩定的揮手檢測
    
    """載入先前訓練過的臉部辨識模型"""
    def load_face_model(self):
        model_path = os.path.join(self.model_dir, "face_encodings.pickle")
        if os.path.exists(model_path):
            with open(model_path, "rb") as f:
                data = pickle.load(f)
                self.known_face_encodings = data["encodings"]
                self.known_face_names = data["names"]
            print(f"臉部辨識模型載入成功，包含 {len(self.known_face_names)} 個人物")
        else:
            print("沒有找到現有臉部辨識模型，臉部辨識功能將無法識別特定人物")
    
    # ================ 動作辨識功能 ================
    
    """舉手檢測"""
    def is_raising_hand(self, wrist, shoulder, elbow=None):
        if wrist is None or shoulder is None or len(wrist) < 2 or len(shoulder) < 2:
            return False
            
        if np.all(wrist == 0) or np.all(shoulder == 0):
            return False
        
        # 手腕必須比肩膀高
        y_diff = shoulder[1] - wrist[1]
        
        # 檢查手腕是否在肩膀附近的合理範圍內
        x_diff = abs(wrist[0] - shoulder[0])
        
        # 更寬鬆的條件來檢測舉手
        is_above_shoulder = y_diff > 15  # 降低閾值
        is_reasonable_distance = x_diff < 300  # 增加允許的橫向距離
        
        # 如果有肘部信息，加入肘部檢查
        if elbow is not None and len(elbow) >= 2 and not np.all(elbow == 0):
            # 手腕應該比肘部高（或接近）
            elbow_check = (elbow[1] - wrist[1]) >= -20  # 允許手腕稍低於肘部
            return is_above_shoulder and is_reasonable_distance and elbow_check
        
        return is_above_shoulder and is_reasonable_distance
    
    """蹲下檢測"""
    def is_squatting(self, hip, knee):
        if hip is None or knee is None or len(hip) < 2 or len(knee) < 2:
            return False
            
        if np.all(hip == 0) or np.all(knee == 0):
            return False
        return (knee[1] - hip[1]) < 50  # 調整閾值
    
    """揮手檢測"""
    def is_waving_hand(self, track_id, wrist, side='left'):
        if wrist is None or len(wrist) < 2 or np.all(wrist == 0):
            return False
        
        x_now = wrist[0]
        waving = False
        
        # 根據左右手選擇對應的歷史記錄
        if side == 'left':
            prev_dict = self.prev_left_wrist_x
        else:
            prev_dict = self.prev_right_wrist_x
        
        # 初始化位置歷史
        if track_id not in self.wrist_position_history:
            self.wrist_position_history[track_id] = {'left': [], 'right': []}
        
        # 記錄位置歷史（保留最近5個位置）
        history = self.wrist_position_history[track_id][side]
        history.append(x_now)
        if len(history) > 5:
            history.pop(0)
        
        # 檢查是否有足夠的歷史記錄來判斷揮手
        if len(history) >= 3:
            # 計算位置變化
            recent_changes = [abs(history[i] - history[i-1]) for i in range(1, len(history))]
            avg_change = sum(recent_changes) / len(recent_changes)
            
            # 如果平均變化超過閾值，認為是揮手
            if avg_change > 15:
                waving = True
        
        # 保持原有的簡單檢測作為備用
        if track_id in prev_dict:
            diff = abs(x_now - prev_dict[track_id])
            if diff > 25:  # 調整閾值
                waving = True
        
        prev_dict[track_id] = x_now
        return waving
    
    """ok手勢"""
    @staticmethod
    def is_ok_sign(landmarks):
        if not landmarks or len(landmarks) < 21:
            return False
        try:
            thumb_tip = np.array([landmarks[4].x, landmarks[4].y])
            index_tip = np.array([landmarks[8].x, landmarks[8].y])
            distance = np.linalg.norm(thumb_tip - index_tip)
            middle_tip = landmarks[12].y
            ring_tip = landmarks[16].y
            pinky_tip = landmarks[20].y
            palm_base = landmarks[0].y
            return distance < 0.05 and middle_tip < palm_base and ring_tip < palm_base and pinky_tip < palm_base
        except (IndexError, AttributeError):
            return False
    
    """peace手勢"""
    @staticmethod
    def is_peace_sign(landmarks):
        if not landmarks or len(landmarks) < 21:
            return False
        try:
            def is_extended(pip, tip):
                return tip.y < pip.y
            return (
                is_extended(landmarks[6], landmarks[8]) and
                is_extended(landmarks[10], landmarks[12]) and
                landmarks[16].y > landmarks[14].y and
                landmarks[20].y > landmarks[18].y
            )
        except (IndexError, AttributeError):
            return False
    
    """open_palm手勢"""
    @staticmethod
    def is_open_palm(landmarks):
        if not landmarks or len(landmarks) < 21:
            return False
        try:
            return all(landmarks[tip].y < landmarks[tip - 2].y for tip in [8, 12, 16, 20])
        except (IndexError, AttributeError):
            return False

    """握拳手勢"""
    @staticmethod
    def is_fist(landmarks):
        if not landmarks or len(landmarks) < 21:
            return False
        try:
            wrist = np.array([landmarks[0].x, landmarks[0].y])
            closed_fingers = [np.linalg.norm(wrist - np.array([landmarks[i].x, landmarks[i].y])) < 0.1 for i in [8, 12, 16, 20]]
            return all(closed_fingers)
        except (IndexError, AttributeError):
            return False

    """clap手勢"""
    @staticmethod
    def is_clapping(hands):
        if not hands or len(hands) != 2:
            return False
        try:
            p1 = np.array([hands[0][5].x, hands[0][5].y])
            p2 = np.array([hands[1][5].x, hands[1][5].y])
            return np.linalg.norm(p1 - p2) < 0.08
        except (IndexError, AttributeError):
            return False
    
    # ================修復的繪圖與手勢顯示 ===============

    """繪製手部標記並識別手勢"""
    def draw_landmarks_on_image(self, image, results):
        if image is None or self.hands is None:
            return image
        
        annotated_image = image.copy()
        
        if results.multi_hand_landmarks:
            clap_shown = False
            if len(results.multi_hand_landmarks) >= 2:
                clap_shown = self.is_clapping(results.multi_hand_landmarks)

            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                gesture = ""
                hand_label = ""
                
                # 獲取手部標籤（左手或右手）
                if results.multi_handedness and idx < len(results.multi_handedness):
                    hand_label = results.multi_handedness[idx].classification[0].label
                
                # 檢測手勢
                if self.is_ok_sign(hand_landmarks.landmark):
                    gesture = f"OK Sign"
                elif self.is_peace_sign(hand_landmarks.landmark):
                    gesture = f"Peace Sign"
                elif self.is_open_palm(hand_landmarks.landmark):
                    gesture = f"Open Palm"
                elif self.is_fist(hand_landmarks.landmark):
                    gesture = f"Fist"
                elif clap_shown:
                    gesture = "Clap"

                try:
                    # 繪製手部標記
                    self.mp_drawing.draw_landmarks(
                        annotated_image, hand_landmarks, self.mp_hands.HAND_CONNECTIONS,
                        self.mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2),
                        self.mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2)
                    )

                    # 顯示手勢名稱和手部標籤
                    if gesture and not clap_shown:
                        x = int(hand_landmarks.landmark[0].x * image.shape[1])
                        y = int(hand_landmarks.landmark[0].y * image.shape[0])
                        cv2.putText(annotated_image, gesture, (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
                    elif hand_label:
                        x = int(hand_landmarks.landmark[0].x * image.shape[1])
                        y = int(hand_landmarks.landmark[0].y * image.shape[0])
                        cv2.putText(annotated_image, f"{hand_label} Hand", (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                        
                except (IndexError, AttributeError, ValueError) as e:
                    print(f"繪製手部標記時發生錯誤: {e}")
                    continue

            # 如果檢測到拍手，顯示在畫面頂部
            if clap_shown:
                cv2.putText(annotated_image, "Clap", (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 255), 3)

        return annotated_image
    
    # ================ 臉部辨識功能 ================
    """圖像中識別人臉"""
    def recognize_face(self, image, tolerance=0.6):
        if image is None or image.size == 0:
            return image
        
        output_image = image.copy()
        
        try:
            face_locations = face_recognition.face_locations(image, model="hog")
            
            if not face_locations:
                if len(image.shape) == 2:
                    gray = image
                else:
                    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                
                frontal_faces = self.haar_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
                for (x, y, w, h) in frontal_faces:
                    face_locations.append((y, x+w, y+h, x))
            
                profile_faces = self.profile_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4)
                for (x, y, w, h) in profile_faces:
                    is_new = True
                    for (top, right, bottom, left) in face_locations:
                        center_x, center_y = x + w//2, y + h//2
                        if left <= center_x <= right and top <= center_y <= bottom:
                            is_new = False
                            break
                    if is_new:
                        face_locations.append((y, x+w, y+h, x))
            
            if not self.known_face_encodings:
                for (top, right, bottom, left) in face_locations:
                    cv2.rectangle(output_image, (left, top), (right, bottom), (0, 255, 0), 2)
                    cv2.putText(output_image, "人臉", (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)
            else:
                if face_locations:
                    face_encodings = face_recognition.face_encodings(image, face_locations)
                    
                    for idx, ((top, right, bottom, left), face_encoding) in enumerate(zip(face_locations, face_encodings)):
                        matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding, tolerance=tolerance)
                        name = "未知"
                        
                        if True in matches:
                            face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding)
                            best_match_index = np.argmin(face_distances)
                            confidence = 1 - face_distances[best_match_index]
                            if matches[best_match_index]:
                                name = f"{self.known_face_names[best_match_index]} ({confidence:.2f})"
                        
                        cv2.rectangle(output_image, (left, top), (right, bottom), (0, 255, 0), 2)
                        cv2.rectangle(output_image, (left, bottom - 35), (right, bottom), (0, 255, 0), cv2.FILLED)
                        cv2.putText(output_image, name, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)
        except Exception as e:
            print(f"臉部辨識錯誤: {e}")
            return image
        
        return output_image
    
    def process_frame(self, frame):
        """處理單一幀，應用所有識別功能"""
        if frame is None or frame.size == 0:
            print("收到空幀，跳過處理")
            return np.zeros((480, 640, 3), dtype=np.uint8)
        
        try:
            processed_frame = frame.copy()
            
            # 1. 處理手勢識別
            if self.hands is not None:
                try:
                    rgb_frame = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
                    hand_results = self.hands.process(rgb_frame)
                    processed_frame = self.draw_landmarks_on_image(processed_frame, hand_results)
                except Exception as e:
                    print(f"手勢識別錯誤: {e}")
            
            # 2. 使用 YOLO 進行姿態識別
            if self.pose_model:
                try:
                    results = self.pose_model.track(processed_frame, persist=True, tracker="botsort.yaml", verbose=False)
                    
                    if results and len(results) > 0:
                        for r in results:
                            try:
                                annotated = r.plot()
                                
                                if hasattr(r, "keypoints") and r.keypoints is not None and r.keypoints.xy is not None:
                                    kps = r.keypoints.xy.cpu().numpy() if hasattr(r.keypoints.xy, 'cpu') else r.keypoints.xy.numpy()
                                    
                                    if len(kps) > 0:
                                        for idx, kp in enumerate(kps):
                                            # 安全地獲取 track_id
                                            track_id = idx  # 預設值
                                            if hasattr(r, "id") and r.id is not None:
                                                try:
                                                    ids = r.id.cpu().numpy() if hasattr(r.id, 'cpu') else r.id.numpy()
                                                    if idx < len(ids):
                                                        track_id = int(ids[idx])
                                                except:
                                                    track_id = idx
                                            
                                            # YOLO Pose 關鍵點索引檢查
                                            if len(kp) >= 17:  # 確保有足夠的關鍵點
                                                # 提取關鍵點並檢查有效性
                                                def get_keypoint(kp, index):
                                                    if index < len(kp) and len(kp[index]) >= 2:
                                                        return kp[index]
                                                    return np.array([0, 0])
                                                
                                                left_wrist = get_keypoint(kp, 9)      # 左手腕
                                                right_wrist = get_keypoint(kp, 10)    # 右手腕
                                                left_shoulder = get_keypoint(kp, 5)   # 左肩
                                                right_shoulder = get_keypoint(kp, 6)  # 右肩
                                                left_elbow = get_keypoint(kp, 7)      # 左肘
                                                right_elbow = get_keypoint(kp, 8)     # 右肘
                                                left_hip = get_keypoint(kp, 11)       # 左髖
                                                right_hip = get_keypoint(kp, 12)      # 右髖
                                                left_knee = get_keypoint(kp, 13)      # 左膝
                                                right_knee = get_keypoint(kp, 14)     # 右膝
                                                
                                                # 檢測姿態
                                                left_hand_up = self.is_raising_hand(left_wrist, left_shoulder, left_elbow)
                                                right_hand_up = self.is_raising_hand(right_wrist, right_shoulder, right_elbow)
                                                squat_left = self.is_squatting(left_hip, left_knee)
                                                squat_right = self.is_squatting(right_hip, right_knee)
                                                wave_left = self.is_waving_hand(track_id, left_wrist, 'left')
                                                wave_right = self.is_waving_hand(track_id, right_wrist, 'right')
                                                
                                                # 顯示結果
                                                actions = []
                                                if left_hand_up and right_hand_up:
                                                    actions.append("both hands up")
                                                elif left_hand_up:
                                                    actions.append("left hand up")
                                                elif right_hand_up:
                                                    actions.append("right hand up")
                                                
                                                if squat_left and squat_right:
                                                    actions.append("squatting")
                                                
                                                if wave_left:
                                                    actions.append("waving left")
                                                if wave_right:
                                                    actions.append("waving right")
                                                
                                                # 在畫面上顯示動作
                                                if actions:
                                                    # 找一個合適的位置顯示文字
                                                    text_x, text_y = 50, 50 + idx * 30  # 預設位置，每個人物錯開
                                                    
                                                    if len(kp) > 0 and len(kp[0]) >= 2 and not np.all(kp[0] == 0):
                                                        text_x, text_y = int(kp[0][0]), int(kp[0][1]) - 30
                                                    elif not np.all(left_shoulder == 0):
                                                        text_x, text_y = int(left_shoulder[0]), int(left_shoulder[1]) - 30
                                                    elif not np.all(right_shoulder == 0):
                                                        text_x, text_y = int(right_shoulder[0]), int(right_shoulder[1]) - 30
                                                    
                                                    action_text = f"ID {track_id}: {', '.join(actions)}"
                                                    cv2.putText(annotated, action_text, 
                                                              (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
                                
                                processed_frame = annotated.copy()
                            except Exception as e:
                                print(f"YOLO 結果處理錯誤: {str(e)}")
                                continue
                except Exception as e:
                    print(f"YOLO 姿態識別錯誤: {str(e)}")
            
            # 3. 使用 face_recognition 進行臉部辨識
            try:
                processed_frame = self.recognize_face(processed_frame)
            except Exception as e:
                print(f"臉部辨識錯誤: {str(e)}")
            
            return processed_frame
        
        except Exception as e:
            print(f"幀處理錯誤: {str(e)}")
            traceback.print_exc()
            if frame is not None and frame.size > 0:
                return frame
            else:
                return np.zeros((480, 640, 3), dtype=np.uint8)
    
    def process_video(self, video_path, output_path=None, process_every_n_frames=1):
        """處理影片"""
        if not os.path.exists(video_path):
            print(f"錯誤：影片檔案不存在 - {video_path}")
            return None
        
        cap = cv2.VideoCapture(video_path)
        
        if not cap.isOpened():
            print(f"錯誤：無法開啟影片檔案 {video_path}")
            return None
        
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        if fps <= 0:
            print("警告：無法獲取正確的 FPS，使用預設值 30")
            fps = 30
        if width <= 0 or height <= 0:
            print("警告：無法獲取正確的解析度，使用預設值 640x480")
            width, height = 640, 480
        if total_frames <= 0:
            print("警告：無法獲取正確的總幀數")
            total_frames = 1000
        
        print(f"影片資訊：")
        print(f"- 解析度：{width}x{height}")
        print(f"- FPS:{fps}")
        print(f"- 總幀數：{total_frames}")
        print(f"- 時長：{total_frames/fps:.2f} 秒")
        
        out = None
        if output_path:
            output_dir = r'C:\Users\User\project\a'
    
        if not os.path.exists(output_dir):
             os.makedirs(output_dir)

        # 加上檔案名稱
        output_path = os.path.join(output_dir, "result.mp4")
    
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
        if not out.isOpened():
            print(f"錯誤：無法建立輸出影片檔案 {output_path}")
            out = None

        frame_count = 0

        
        print("開始處理影片...")
        
        try:
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                
                # 顯示進度
                if frame_count % 30 == 0:
                    progress = (frame_count / total_frames) * 100
                    print(f"處理進度：{progress:.1f}% ({frame_count}/{total_frames} 幀)")
                
                # 只處理每第 n 幀以提高效能
                if frame_count % process_every_n_frames == 0:
                    # 處理幀
                    processed_frame = self.process_frame(frame)
                    
                    # 在畫面左上角顯示幀數和時間
                    time_text = f"幀：{frame_count} | 時間：{frame_count/fps:.2f}s"
                    cv2.putText(processed_frame, time_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                    
                    # 如果需要儲存輸出影片
                    if out:
                        out.write(processed_frame)
                    
                    # 顯示結果
                    cv2.imshow('影片處理', cv2.resize(processed_frame, (900, 680)))
                    if cv2.waitKey(1) & 0xFF == ord(' '):
                        print("使用者中斷處理")
                        break
                
                frame_count += 1
                
        finally:
            # 釋放資源
            cap.release()
            if out:
                out.release()
            cv2.destroyAllWindows()
        
        print(f"\n影片處理完成!")
        if output_path:
            print(f"輸出影片已儲存至：{output_path}")
    
    def process_webcam(self, camera_id=0):
        """從網路攝影機進行即時識別"""
        # 開啟攝影機
        cap = cv2.VideoCapture(camera_id)
        
        if not cap.isOpened():
            print("錯誤：無法開啟攝影機")
            return
        
        print("開始即時辨識。按空白鍵結束。")
        
        try:
            while True:
                # 讀取一幀
                ret, frame = cap.read()
                
                if not ret:
                    print("無法從攝影機獲取畫面")
                    break
                
                # 處理幀
                processed_frame = self.process_frame(frame)
                
                # 顯示結果
                cv2.imshow('即時辨識', processed_frame)
                
                if cv2.waitKey(1) & 0xFF == ord(' '):
                    break
                
        finally:
            # 釋放資源
            cap.release()
            cv2.destroyAllWindows()

# 主程序
if __name__ == "__main__":
    # 初始化系統
    system = VideoRecognitionSystem()
    
    # 顯示功能選單
    print("\n==== 影片人臉和姿態辨識系統 ====")
    print("1. 處理影片")
    print("2. 從攝影機即時辨識")
    print("0. 退出")
    
    choice = input("\n請選擇功能 (0-2): ")
    
    if choice == "1":
        video_path = input("請輸入影片路徑: ")
        output_path = input("請輸入輸出影片路徑 (留空則不儲存): ")
        if not output_path:
            output_path = None
        process_every = int(input("每隔幾幀處理一次 (建議: 1-5, 數字越大效能越佳但精度較低): ") or "1")
        system.process_video(video_path, output_path, process_every)
        
    elif choice == "2":
        camera_id = int(input("請輸入攝影機 ID (預設 0): ") or "0")
        system.process_webcam(camera_id)
        
    else:
        print("感謝使用！")

警告：找不到 models\shape_predictor_5_face_landmarks.dat，部分臉部對齊功能將不可用
警告：找不到 models\dlib_face_recognition_resnet_model_v1.dat，部分臉部識別功能將不可用
臉部辨識模型載入成功，包含 69 個人物
YOLO 姿態檢測模型已成功載入
MediaPipe 手勢檢測器已成功載入

==== 影片人臉和姿態辨識系統 ====
1. 處理影片
2. 從攝影機即時辨識
0. 退出
開始即時辨識。按空白鍵結束。
