In [6]:
import os
import numpy as np
import pandas as pd

CLASSES = ["A", "S", "D"]

def load_labels_pair(pred_path: str, gt_path: str, classes=CLASSES):
    """
    하나의 비디오에 대해
    - pred_path: 예측 라벨 CSV (A/S/D 컬럼 포함)
    - gt_path  : 정답 라벨 CSV (A/S/D 컬럼 포함)
    을 읽어서 (y_true, y_pred) 넘파이 배열로 반환.

    길이가 다르면 공통으로 겹치는 frame 수(min 길이)까지만 사용.
    """
    gt = pd.read_csv(gt_path)
    pred = pd.read_csv(pred_path)

    missing_gt = [c for c in classes if c not in gt.columns]
    missing_pred = [c for c in classes if c not in pred.columns]
    if missing_gt or missing_pred:
        raise ValueError(
            f"Missing label columns. "
            f"gt missing={missing_gt}, pred missing={missing_pred}"
        )

    # GT는 float, pred는 int 일 수 있어서 모두 int로 캐스팅
    y_true = gt[classes].values.astype(int)
    y_pred = pred[classes].values.astype(int)

    n = min(len(y_true), len(y_pred))
    if len(y_true) != len(y_pred):
        print(
            f"[WARN] Length mismatch: gt={len(y_true)}, pred={len(y_pred)}; "
            f"using first {n} frames"
        )

    return y_true[:n], y_pred[:n]


def compute_frame_metrics(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    classes=CLASSES,
    ignore_all_zero: bool = False,
):
    """
    주어진 y_true, y_pred (N x C, 0/1)에 대해
    - 클래스별 TP/FP/FN/TN, precision/recall/F1/acc/support
    - 전체 micro-precision/recall/F1/acc
    - exact_frame_acc (A/S/D 모두 맞춘 프레임 비율)
    을 계산해서 반환.
    """
    eps = 1e-8
    y_true = np.asarray(y_true).astype(int)
    y_pred = np.asarray(y_pred).astype(int)

    # 이벤트가 있는 프레임만 보고 싶을 때 (A/S/D 중 하나라도 1인 프레임)
    if ignore_all_zero:
        mask = y_true.sum(axis=1) > 0
        y_true = y_true[mask]
        y_pred = y_pred[mask]

    stats = {}
    for idx, cls in enumerate(classes):
        yt = y_true[:, idx]
        yp = y_pred[:, idx]

        tp = int(((yt == 1) & (yp == 1)).sum())
        tn = int(((yt == 0) & (yp == 0)).sum())
        fp = int(((yt == 0) & (yp == 1)).sum())
        fn = int(((yt == 1) & (yp == 0)).sum())

        precision = tp / (tp + fp + eps)
        recall    = tp / (tp + fn + eps)
        f1        = 2 * precision * recall / (precision + recall + eps)
        acc       = (tp + tn) / (tp + tn + fp + fn + eps)

        stats[cls] = dict(
            tp=tp, tn=tn, fp=fp, fn=fn,
            precision=precision,
            recall=recall,
            f1=f1,
            acc=acc,
            support=int(yt.sum()),  # GT에서 1인 프레임 수
        )

    # micro 평균 (A/S/D 전부 합쳐서)
    tp = int(((y_true == 1) & (y_pred == 1)).sum())
    tn = int(((y_true == 0) & (y_pred == 0)).sum())
    fp = int(((y_true == 0) & (y_pred == 1)).sum())
    fn = int(((y_true == 1) & (y_pred == 0)).sum())

    micro_precision = tp / (tp + fp + eps)
    micro_recall    = tp / (tp + fn + eps)
    micro_f1        = 2 * micro_precision * micro_recall / (micro_precision + micro_recall + eps)
    micro_acc       = (tp + tn) / (tp + tn + fp + fn + eps)

    # 프레임 단위로 A/S/D 3개 다 맞춘 비율
    exact_match = float((y_true == y_pred).all(axis=1).mean())

    overall = dict(
        micro_precision=micro_precision,
        micro_recall=micro_recall,
        micro_f1=micro_f1,
        micro_acc=micro_acc,
        exact_frame_acc=exact_match,
        n_frames=int(len(y_true)),
    )
    return stats, overall


In [7]:
def evaluate_from_gt_folder(
    gt_root: str,
    pred_root: str,
    classes=CLASSES,
    ignore_all_zero: bool = False,
) -> pd.DataFrame:
    """
    GT 폴더(gt_root)를 기준으로 csv를 순회하면서,
    각 파일에 매칭되는 pred csv를 pred_root에서 찾아 평가.

    GT 파일 이름 형식:
        video_normal_new_001_lange.csv
        video_missing1_new_002_lange.csv
        ...
    Pred 파일 이름 형식:
        yolo_to_tcn_video_normal_new_001.csv
        yolo_to_tcn_video_missing1_new_002.csv
        ...

    반환:
        각 비디오별 지표 + 마지막 행(mean)에 평균 지표가 들어있는 DataFrame
    """
    rows = []

    for fname in os.listdir(gt_root):
        if not fname.lower().endswith(".csv"):
            continue

        # 예: fname = "video_normal_new_001_lange.csv"
        base = os.path.splitext(fname)[0]  # video_normal_new_001_lange

        if not base.endswith("_lange"):
            # 형식이 다르면 스킵
            print(f"[WARN] Unexpected GT name (skip): {fname}")
            continue

        video_base = base[:-len("_lange")]  # "video_normal_new_001"
        gt_path = os.path.join(gt_root, fname)

        # Pred 이름: "yolo_to_tcn_" + video_base + ".csv"
        pred_name = f"yolo_to_tcn_{video_base}.csv"
        pred_path = os.path.join(pred_root, pred_name)

        if not os.path.exists(pred_path):
            print(f"[WARN] Pred not found for GT {fname} (expected {pred_name})")
            continue

        # 라벨 로드 + 지표 계산
        y_true, y_pred = load_labels_pair(pred_path, gt_path, classes)
        per_cls, overall = compute_frame_metrics(
            y_true, y_pred, classes, ignore_all_zero=ignore_all_zero
        )

        row = {
            "video": video_base,
            "n_frames_eval": overall["n_frames"],
            "exact_frame_acc": overall["exact_frame_acc"],
            "micro_precision": overall["micro_precision"],
            "micro_recall": overall["micro_recall"],
            "micro_f1": overall["micro_f1"],
            "micro_acc": overall["micro_acc"],
        }

        for cls in classes:
            row[f"{cls}_precision"] = per_cls[cls]["precision"]
            row[f"{cls}_recall"]    = per_cls[cls]["recall"]
            row[f"{cls}_f1"]        = per_cls[cls]["f1"]
            row[f"{cls}_support"]   = per_cls[cls]["support"]

        rows.append(row)

    df = pd.DataFrame(rows)

    # 마지막에 평균 행(mean) 추가
    if not df.empty:
        mean_row = {"video": "mean"}
        for col in df.columns:
            if col == "video":
                continue
            mean_row[col] = df[col].mean()
        df = pd.concat([df, pd.DataFrame([mean_row])], ignore_index=True)

    return df


In [8]:
if __name__ == "__main__":
    gt_root   = r"test_in_model\test_csv"
    pred_root = r"test_pred"

    # 1) 전체 프레임 기준 평가
    df_all = evaluate_from_gt_folder(
        gt_root, pred_root,
        ignore_all_zero=False
    )
    print("=== 전체 프레임 기준 ===")
    print(df_all.round(3))

    # 2) 이벤트가 있는 프레임만 기준 (A/S/D 중 하나라도 1인 프레임만)
    df_event = evaluate_from_gt_folder(
        gt_root, pred_root,
        ignore_all_zero=True
    )
    print("\n=== 이벤트 프레임만 기준 ===")
    print(df_event.round(3))


[WARN] Pred not found for GT video_ idle_001_lange.csv (expected yolo_to_tcn_video_ idle_001.csv)
[WARN] Length mismatch: gt=216, pred=187; using first 187 frames
[WARN] Length mismatch: gt=245, pred=216; using first 216 frames
[WARN] Length mismatch: gt=308, pred=279; using first 279 frames
[WARN] Length mismatch: gt=253, pred=224; using first 224 frames
[WARN] Length mismatch: gt=252, pred=223; using first 223 frames
[WARN] Length mismatch: gt=205, pred=176; using first 176 frames
[WARN] Length mismatch: gt=481, pred=452; using first 452 frames
[WARN] Length mismatch: gt=346, pred=317; using first 317 frames
[WARN] Length mismatch: gt=264, pred=235; using first 235 frames
=== 전체 프레임 기준 ===
                    video  n_frames_eval  exact_frame_acc  micro_precision  \
0  video_missing1_new_001        187.000            0.791            0.745   
1  video_missing1_new_002        216.000            0.634            0.619   
2  video_missing1_new_003        279.000            0.692        