### 프로젝트 2
[주제]
yolo-pose를 사용하여 사람 동작 탐지

[구현 기능]
1. 웹캠 열기
2. 화면내 사람의 자세를 분석하는 YOLO 모델 적용
3. 손을 들었는지 확인 -> 들었으면 알림 표시
4. 손을 들었으면 -> 무슨 손을 들었는지 표시

[추가 기능]
1. OX 맞추기
    > O: 양 손목의 좌표가 어느 범주내에서 유지될 때, 손목이 머리보다 높다
    > X: 왼쪽 손목이 오른쪽 어깨보다 오른쪽, 오른쪽 손목이 왼쪽 어깨보다 왼쪽에 있을 때

In [1]:
# 라이브러리 불러오기
import cv2
import numpy as np
from collections import deque
from ultralytics import YOLO
import time

In [5]:
# Yolo-pose 모델 불러오기
model = YOLO('yolov8n-pose.pt')
model.info()

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-pose.pt to 'yolov8n-pose.pt': 100%|██████████| 6.52M/6.52M [00:00<00:00, 8.19MB/s]


YOLOv8n-pose summary: 144 layers, 3,295,470 parameters, 0 gradients, 9.3 GFLOPs


(144, 3295470, 0, 9.2959952)

In [2]:
# 보조 함수
def angle_deg(a, b, c, eps=1e-6):
    """세 점 a-b-c가 이루는 각(b를 꼭짓점) [deg]"""
    a, b, c = np.array(a, float), np.array(b, float), np.array(c, float)
    ab, cb = a-b, c-b
    cosv = np.dot(ab, cb) / (np.linalg.norm(ab)*np.linalg.norm(cb) + eps)
    cosv = np.clip(cosv, -1.0, 1.0)
    return float(np.degrees(np.arccos(cosv)))

In [2]:
import cv2
import numpy as np
from collections import deque
from ultralytics import YOLO
import time

MODEL_WEIGHTS = "yolov8n-pose.pt"
CONF_THRESH = 0.5          # 탐지 신뢰도(박스)
KP_CONF_THRESH = 0.4       # 키포인트 신뢰도
STABILITY_WINDOW = 10      # O 판정을 위한 최근 프레임 개수
STABILITY_TOL_PX = 20      # 손목이 '작은 범위'에 있다고 볼 픽셀 허용치(표준편차 상한)
TEXT_THICK = 2
TEXT_SCALE = 0.7
LINE_THICK = 2

# COCO 17 keypoints 이름 (Ultralytics YOLO-Pose 기준)
KP_NAMES = [
    "nose", "left_eye", "right_eye", "left_ear", "right_ear",
    "left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
    "left_wrist", "right_wrist", "left_hip", "right_hip",
    "left_knee", "right_knee", "left_ankle", "right_ankle"
]
IDX = {name: i for i, name in enumerate(KP_NAMES)}

# 손목 위치 안정성 체크용 버퍼
wrist_hist = {
    "left": deque(maxlen=STABILITY_WINDOW),
    "right": deque(maxlen=STABILITY_WINDOW)
}

def draw_text(img, text, org, color=(0, 255, 0), scale=TEXT_SCALE, thick=TEXT_THICK, bg=True):
    if bg:
        (w, h), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scale, thick)
        x, y = org
        cv2.rectangle(img, (x, y - h - baseline - 4), (x + w + 6, y + 4), (0, 0, 0), -1)
    cv2.putText(img, text, org, cv2.FONT_HERSHEY_SIMPLEX, scale, color, thick, cv2.LINE_AA)

def get_head_y(kps_xy, kps_conf):
    candidates = []
    for name in ["nose", "left_eye", "right_eye"]:
        i = IDX[name]
        if kps_conf[i] >= KP_CONF_THRESH:
            candidates.append(kps_xy[i, 1])
    return min(candidates) if candidates else None

def is_hand_raised(kps_xy, kps_conf, side, margin=0):
    wrist_i = IDX[f"{side}_wrist"]
    shoulder_i = IDX[f"{side}_shoulder"]
    if kps_conf[wrist_i] < KP_CONF_THRESH or kps_conf[shoulder_i] < KP_CONF_THRESH:
        return False

    wrist_y = kps_xy[wrist_i, 1]
    shoulder_y = kps_xy[shoulder_i, 1]
    if wrist_y + margin < shoulder_y:  # 높이 비교 (y는 작을수록 위)
        return True
    return False

def which_hand_raised(kps_xy, kps_conf):
    left = is_hand_raised(kps_xy, kps_conf, "left")
    right = is_hand_raised(kps_xy, kps_conf, "right")
    if left and right:
        return "both"
    if left:
        return "left"
    if right:
        return "right"
    return "none"

def update_wrist_history(kps_xy, kps_conf):
    for side in ["left", "right"]:
        i = IDX[f"{side}_wrist"]
        if kps_conf[i] >= KP_CONF_THRESH:
            wrist_hist[side].append(tuple(kps_xy[i]))
        else:
            # 유효치 없으면 빈 프레임 채우지 않음
            pass

def wrists_stable_and_above_head(kps_xy, kps_conf):
    """O 조건: 양 손목이 머리보다 높고, 최근 STABILITY_WINDOW 프레임에서 위치 변동이 작음"""
    head_y = get_head_y(kps_xy, kps_conf)
    if head_y is None:
        return False

    # 현재 프레임에서 두 손목이 머리보다 높아야 함
    lw_i, rw_i = IDX["left_wrist"], IDX["right_wrist"]
    if not (kps_conf[lw_i] >= KP_CONF_THRESH and kps_conf[rw_i] >= KP_CONF_THRESH):
        return False
    if not (kps_xy[lw_i, 1] < head_y and kps_xy[rw_i, 1] < head_y):
        return False

    # 최근 프레임에서 손목 위치의 표준편차가 작아야 함 (작은 범주 내 유지)
    ok = True
    for side in ["left", "right"]:
        if len(wrist_hist[side]) < max(3, STABILITY_WINDOW // 2):  # 너무 짧으면 안정 판정 보류
            ok = False
            break
        arr = np.array(wrist_hist[side])  # (N,2)
        std_xy = arr.std(axis=0)          # (2,)
        if std_xy[0] > STABILITY_TOL_PX or std_xy[1] > STABILITY_TOL_PX:
            ok = False
            break
    return ok

def is_X_pose(kps_xy, kps_conf):
    lw_i, rw_i = IDX["left_wrist"], IDX["right_wrist"]
    rs_i, ls_i = IDX["right_shoulder"], IDX["left_shoulder"]
    needed = [lw_i, rw_i, rs_i, ls_i]
    if any(kps_conf[i] < KP_CONF_THRESH for i in needed):
        return False
    left_wrist_x = kps_xy[lw_i, 0]
    right_wrist_x = kps_xy[rw_i, 0]
    right_shoulder_x = kps_xy[rs_i, 0]
    left_shoulder_x = kps_xy[ls_i, 0]
    return (left_wrist_x > right_shoulder_x) and (right_wrist_x < left_shoulder_x)

def pick_target_person(results):
    if results.keypoints is None or len(results.keypoints) == 0:
        return None, None

    # keypoints
    kps_xy_all = results.keypoints.xy  # (n,17,2) tensor-like
    kps_conf_all = results.keypoints.conf  # (n,17) conf

    # boxes로 가장 큰 사람 선택
    idx = 0
    if results.boxes is not None and len(results.boxes) > 0:
        areas = []
        for b in results.boxes.xyxy:
            x1, y1, x2, y2 = map(float, b)
            areas.append((x2 - x1) * (y2 - y1))
        idx = int(np.argmax(areas))

    kps_xy = np.array(kps_xy_all[idx].cpu())
    kps_conf = np.array(kps_conf_all[idx].cpu())
    return kps_xy, kps_conf

def main():
    model = YOLO(MODEL_WEIGHTS)

    cap = cv2.VideoCapture(0)  # 필요시 인덱스 변경
    if not cap.isOpened():
        raise RuntimeError("웹캠을 열 수 없습니다. VideoCapture 인덱스를 확인하세요.")

    fps_hist = deque(maxlen=30)
    last_time = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # YOLO 추론
        results = model(frame, verbose=False, conf=CONF_THRESH)[0]

        # 키포인트/박스 시각화 프레임
        vis = results.plot()  # 기본 시각화

        # 타깃 한 명 선택 후 로직 적용
        kps_xy, kps_conf = pick_target_person(results)
        status_text = "No person"
        hand_text = ""
        ox_text = ""

        if kps_xy is not None:
            # 손 듦/어느 손인지
            hand_state = which_hand_raised(kps_xy, kps_conf)
            if hand_state == "both":
                status_text = "Up"
                hand_text = "Both"
            elif hand_state == "left":
                status_text = "Up"
                hand_text = "Left"
            elif hand_state == "right":
                status_text = "Up"
                hand_text = "Right"
            else:
                status_text = "Down"
                hand_text = ""

            # OX 로직
            update_wrist_history(kps_xy, kps_conf)
            if wrists_stable_and_above_head(kps_xy, kps_conf):
                ox_text = "O"
                ox_color = (0, 255, 0)
            elif is_X_pose(kps_xy, kps_conf):
                ox_text = "X"
                ox_color = (0, 0, 255)
            else:
                ox_text = ""

            # 텍스트 표시
            draw_text(vis, f"State: {status_text}", (20, 40), (0, 255, 255))
            if hand_text:
                draw_text(vis, f"Hands: {hand_text}", (20, 80), (255, 255, 0))
            if ox_text:
                draw_text(vis, f"O or X: {ox_text}", (20, 120), ox_color)

        # FPS 표시
        now = time.time()
        fps = 1.0 / max(1e-6, (now - last_time))
        last_time = now
        fps_hist.append(fps)
        avg_fps = sum(fps_hist) / len(fps_hist)
        draw_text(vis, f"FPS: {avg_fps:.1f}", (20, vis.shape[0] - 20), (200, 200, 200), scale=0.6, thick=1)

        cv2.imshow("YOLO-Pose - Hand Raise & OX", vis)
        key = cv2.waitKey(1) & 0xFF
        if key == 27:  # ESC
            break

    cap.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    main()


  kps_xy = np.array(kps_xy_all[idx].cpu())
  kps_conf = np.array(kps_conf_all[idx].cpu())
