In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import argparse
from typing import List, Optional, Tuple

In [28]:
csv_path = "C:/Users/user/Desktop/parkinsons/runs/scenario_summary.csv"

In [23]:
# ---------- robust helpers ----------
def get_series(df: pd.DataFrame, col: str) -> pd.Series:
    """df[col]이 중복 컬럼이어도 항상 Series로 반환."""
    loc = df.columns.get_loc(col)
    if isinstance(loc, (slice, range)):
        s = df.iloc[:, loc.start]
    elif isinstance(loc, np.ndarray):
        s = df.iloc[:, int(loc[0])]
    else:
        s = df.iloc[:, int(loc)]
    if isinstance(s, pd.DataFrame):
        s = s.iloc[:, 0]
    return s

def pick_auc_macro_col(df: pd.DataFrame) -> str:
    """AUC macro 관련 컬럼 자동 선택."""
    if "auc_macro" in df.columns:
        return "auc_macro"
    for c in ["auc_macro_mean", "auc_mean_macro", "roc_auc_macro", "auc_macro_score"]:
        if c in df.columns:
            return c
    cands = [c for c in df.columns if re.search(r"auc", c, re.I) and re.search(r"macro", c, re.I)]
    if cands:
        return cands[0]
    raise KeyError("AUC 매크로 계열 컬럼을 찾지 못했습니다. (예: auc_macro, auc_macro_mean)")

def decide_color_by(df: pd.DataFrame, forced: Optional[str] = None) -> str:
    """색상 구분 컬럼 결정: --by > group > scenario."""
    if forced:
        if forced not in df.columns:
            raise KeyError(f"'--by {forced}' 컬럼이 없습니다. 현재 컬럼: {list(df.columns)}")
        return forced
    if "group" in df.columns:
        return "group"
    if "scenario" in df.columns:
        return "scenario"
    raise KeyError("색상 구분용 컬럼을 찾지 못했습니다. 'group' 또는 'scenario' 필요")

def make_color_map(categories: List[str], cmap_name: str = "tab20") -> dict:
    """카테고리별 고유 색상 매핑."""
    cmap = plt.get_cmap(cmap_name)
    uniq = list(dict.fromkeys(categories))  # 순서 유지
    return {cat: cmap(i % cmap.N) for i, cat in enumerate(uniq)}

def plot_barh_hd(
    df: pd.DataFrame,
    y_col: str,
    x_col: str,
    color_by: str,
    title: str,
    out_path: str,
    cmap_name: str = "tab20",
    width_in: float = 16.0,
    dpi: int = 450,
    headroom: float = 0.18,  # x축 우측 여유 비율
) -> None:
    """가로 막대 HD 플롯: x축 여백 확장 + 고DPI 저장 + 그룹별 색."""
    y = get_series(df, y_col)
    x = get_series(df, x_col)
    c = get_series(df, color_by)

    d = pd.DataFrame({"y": y, "x": x, "c": c}).dropna(subset=["y", "x"]).copy()
    d = d.sort_values("x", ascending=False)

    color_map = make_color_map(d["c"].astype(str).tolist(), cmap_name=cmap_name)
    colors = d["c"].map(color_map)

    fig_h = max(3.5, 0.6 * len(d) + 1.0)
    fig, ax = plt.subplots(figsize=(width_in, fig_h))

    bars = ax.barh(d["y"], d["x"], color=colors, edgecolor="black", linewidth=0.5)
    ax.set_xlabel(x_col)
    ax.set_ylabel(y_col)
    ax.set_title(title)

    # x축 범위 확장 (값 라벨이 겹치지 않도록)
    xmin, xmax_data = 0.0, float(d["x"].max())
    ax.set_xlim(xmin, xmax_data * (1.0 + headroom))

    ax.grid(axis="x", linestyle="--", alpha=0.35)
    ax.invert_yaxis()

    # 값 라벨
    xmin, xmax = ax.get_xlim()
    offset = (xmax - xmin) * 0.01
    for rect, v in zip(bars, d["x"].to_numpy(float)):
        ymid = rect.get_y() + rect.get_height() / 2
        ax.text(v + offset, ymid, f"{v:.3f}", va="center", ha="left")

    # 범례
    handles = [plt.Line2D([0], [0], marker='s', linestyle='', color=col, markersize=8)
               for col in color_map.values()]
    labels = list(color_map.keys())
    ax.legend(handles, labels, title=color_by, bbox_to_anchor=(1.02, 1), loc="upper left", frameon=False)

    plt.tight_layout()
    fig.savefig(out_path, dpi=dpi, bbox_inches="tight")
    plt.close(fig)

In [26]:
def main(csv_path: str, by: Optional[str], cmap_name: str, width_in: float, dpi: int, headroom: float):
    df = pd.read_csv(csv_path)
    if "scenario" not in df.columns:
        raise KeyError("필수 컬럼 'scenario'가 없습니다.")

    auc_col = pick_auc_macro_col(df)
    needed = ["acc_mean", "f1_mean", auc_col]
    missing = [m for m in needed if m not in df.columns]
    if missing:
        raise KeyError(f"다음 컬럼을 찾지 못했습니다: {missing}")

    color_by = decide_color_by(df, forced=by)

    plot_barh_hd(df, "scenario", "acc_mean", color_by, "acc_mean by scenario",
                 "barh_acc_mean_hd.png", cmap_name=cmap_name, width_in=width_in, dpi=dpi, headroom=headroom)
    plot_barh_hd(df, "scenario", "f1_mean", color_by, "f1_mean by scenario",
                 "barh_f1_mean_hd.png", cmap_name=cmap_name, width_in=width_in, dpi=dpi, headroom=headroom)
    plot_barh_hd(df, "scenario", auc_col, color_by, f"{auc_col} by scenario",
                 f"barh_{auc_col}_hd.png", cmap_name=cmap_name, width_in=width_in, dpi=dpi, headroom=headroom)

if __name__ == "__main__":
    # Jupyter/VSCode의 -f 인자 무시를 위해 parse_known_args 사용
    parser = argparse.ArgumentParser()
    parser.add_argument("--csv", default=csv_path, help="CSV 경로 (기본: scenario_summary.csv)")
    parser.add_argument("--by", default=None, help="색상 구분 컬럼 (기본: group→scenario 자동)")
    parser.add_argument("--cmap", default="tab20", help="색상 팔레트 (예: tab20, Set3, tab10)")
    parser.add_argument("--width", type=float, default=16.0, help="가로 폭(인치)")
    parser.add_argument("--dpi", type=int, default=450, help="저장 DPI")
    parser.add_argument("--headroom", type=float, default=0.18, help="x축 우측 여백 비율(0~1)")
    args, _unknown = parser.parse_known_args()

    if not os.path.exists(args.csv):
        raise FileNotFoundError(f"CSV 파일을 찾지 못했습니다: {args.csv}")

    main(args.csv, by=args.by, cmap_name=args.cmap, width_in=args.width, dpi=args.dpi, headroom=args.headroom)