In [1]:
import cv2
import numpy as np
import json
import os
from typing import List, Tuple


In [2]:
def make_windows(features: np.ndarray,
                 labels: np.ndarray,
                 window_size: int = 30,
                 stride: int = 5):
    """
    features: (T, D)
    labels  : (T,)
    return  : X (N, window_size, D), y (N,)
    """
    T = min(len(features), len(labels))
    features = features[:T]
    labels = labels[:T]

    X_list = []
    y_list = []

    for start in range(0, T - window_size + 1, stride):
        end = start + window_size
        win_feat = features[start:end]
        win_lab = labels[start:end]
        center_label = win_lab[window_size // 2]
        X_list.append(win_feat)
        y_list.append(center_label)

    if not X_list:
        return None, None

    X = np.stack(X_list, axis=0)
    y = np.array(y_list, dtype=np.int64)
    return X, y


In [6]:
def extract_mediapipe_pose_seq(video_path: str) -> np.ndarray:
    """
    MediaPipe Pose로 영상 전체에서 포즈 시퀀스 추출.
    return: (T, 33*4) = (T, 132)  [x, y, z, visibility]
    """
    import mediapipe as mp

    mp_pose = mp.solutions.pose
    cap = cv2.VideoCapture(video_path)

    seq = []

    with mp_pose.Pose(
        static_image_mode=False,
        model_complexity=1,
        enable_segmentation=False,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    ) as pose:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            result = pose.process(image_rgb)

            if result.pose_landmarks:
                lm = result.pose_landmarks.landmark
                vec = []
                for p in lm:
                    vec.extend([p.x, p.y, p.z, p.visibility])
            else:
                vec = [0.0] * (33 * 4)
            seq.append(vec)

    cap.release()
    seq = np.array(seq, dtype=np.float32)  # (T, 132)
    return seq


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleTCN(nn.Module):
    def __init__(self, in_dim: int, hidden: int = 128, num_classes: int = 2):
        super().__init__()
        self.conv1 = nn.Conv1d(in_dim, hidden, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(hidden, hidden, kernel_size=3, padding=1)
        self.fc = nn.Linear(hidden, num_classes)

    def forward(self, x):
        # x: (B, T, F)
        x = x.permute(0, 2, 1)  # -> (B, F, T)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.mean(dim=2)       # global average pooling over time
        out = self.fc(x)
        return out


In [8]:
def run_mediapipe_tcn(
    video_path: str = "sample_720p_15fps.mp4",
    label_path: str = "labels.npy",
    window_size: int = 30,
    stride: int = 5,
):
    print("=== [1] MediaPipe Pose + TCN Prototype ===")

    if not (os.path.exists(video_path) and os.path.exists(label_path)):
        print("[ERROR] video 또는 labels.npy가 없습니다.")
        return

    labels = np.load(label_path)  # (T,)

    # 1) 포즈 시퀀스 추출
    pose_seq = extract_mediapipe_pose_seq(video_path)  # (T, 132)
    print("pose_seq shape:", pose_seq.shape, "labels shape:", labels.shape)

    # 2) 슬라이딩 윈도우
    X, y = make_windows(pose_seq, labels, window_size=window_size, stride=stride)
    if X is None:
        print("[ERROR] 윈도우가 생성되지 않았습니다.")
        return

    print("X shape:", X.shape, "y shape:", y.shape)

    # 3) PyTorch 텐서 변환
    X_t = torch.tensor(X, dtype=torch.float32)   # (N, T, F)
    y_t = torch.tensor(y, dtype=torch.long)      # (N,)

    # 4) 모델 생성 및 더미 forward
    in_dim = X_t.shape[-1]
    model = SimpleTCN(in_dim=in_dim, hidden=128, num_classes=2)

    with torch.no_grad():
        logits = model(X_t)  # (N, 2)
    print("logits shape:", logits.shape)

    print("MediaPipe+TCN 프로토타입 forward 통과 완료.")


In [9]:
if __name__ == "__main__":
    # 필요에 따라 주석/해제해서 실행
    # 1) MediaPipe Pose + TCN
    run_mediapipe_tcn()

    # 2) YOLOv8n-Pose + TCN
    # run_yolo_tcn()

    # 3) RGB 3D-CNN (MoViNet-like)
    # run_rgb_3dcnn()

    pass


=== [1] MediaPipe Pose + TCN Prototype ===


ImportError: 

IMPORTANT: PLEASE READ THIS FOR ADVICE ON HOW TO SOLVE THIS ISSUE!

Importing the numpy C-extensions failed. This error can happen for
many reasons, often due to issues with your setup or how NumPy was
installed.

We have compiled some common reasons and troubleshooting tips at:

    https://numpy.org/devdocs/user/troubleshooting-importerror.html

Please note and check the following:

  * The Python version is: Python3.12 from "c:\Users\user\miniconda3\envs\py312\python.exe"
  * The NumPy version is: "2.1.3"

and make sure that they are the versions you expect.
Please carefully study the documentation linked above for further help.

Original error was: DLL load failed while importing _multiarray_umath: 지정된 모듈을 찾을 수 없습니다.
