In [5]:
# filename: ev_parking_predict.py
# -*- coding: utf-8 -*-
# 要件:
# - 共通モデル(単一) + 個別モデル(hashvinごと) の学習/評価を実行
# - 特徴量に charge_cluster_id を必須で使用
# - charge_start_time をそのまま(datetime)投入 / 周期特徴へ加工 の両方を実験
# - 多クラス評価: accuracy / top-3 accuracy / log_loss / macro_f1 + 混同行列保存
# - CSV未指定ならサンプルデータを自動生成（列名は本案件準拠で上書き）
#
# 依存: pip install autogluon.tabular==1.* scikit-learn pandas numpy matplotlib

import argparse
import os
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import (
    accuracy_score, f1_score, log_loss, confusion_matrix
)

from autogluon.tabular import TabularDataset, TabularPredictor


# ---------------------------
# ユーティリティ
# ---------------------------
def ensure_categorical(df, cols):
    for c in cols:
        if c in df.columns:
            df[c] = df[c].astype(str)
    return df

def make_time_features(df, dt_col, mode="raw"):
    """mode:
        - 'raw': dtをdatetime型で渡す (AutoGluonが自動派生)
        - 'engineered': 曜日/時刻/周期sin-cosを明示的に追加
    """
    out = df.copy()
    if dt_col not in out.columns:
        raise ValueError(f"{dt_col} がありません")
    # to datetime
    out[dt_col] = pd.to_datetime(out[dt_col])

    if mode == "raw":
        # そのまま。AutoGluonが年月日・曜日・時刻など派生してくれる
        return out, [dt_col]

    elif mode == "engineered":
        out["weekday"] = out[dt_col].dt.weekday            # 0=Mon..6=Sun
        out["hour"] = out[dt_col].dt.hour

        # 周期性 (時間 24h / 曜日 7d)
        out["hour_sin"] = np.sin(2*np.pi*out["hour"]/24.0)
        out["hour_cos"] = np.cos(2*np.pi*out["hour"]/24.0)
        out["wday_sin"] = np.sin(2*np.pi*out["weekday"]/7.0)
        out["wday_cos"] = np.cos(2*np.pi*out["weekday"]/7.0)

        feat_list = ["weekday", "hour", "hour_sin", "hour_cos", "wday_sin", "wday_cos"]
        return out, feat_list

    else:
        raise ValueError("modeは 'raw' か 'engineered' を指定してください")


def top_k_accuracy(y_true, proba_df, k=3):
    classes = list(proba_df.columns)
    idx_map = {c:i for i,c in enumerate(classes)}
    true_idx = np.array([idx_map[y] for y in y_true])
    topk = proba_df.values.argsort(axis=1)[:, -k:]
    hit = (topk == true_idx.reshape(-1,1)).any(axis=1)
    return float(hit.mean())


def eval_multiclass(y_true, y_pred, proba_df):
    metrics = {}
    metrics["accuracy"] = accuracy_score(y_true, y_pred)
    metrics["macro_f1"] = f1_score(y_true, y_pred, average="macro")
    # log_lossは確率が必要。クラスが1クラスしかないと失敗するので例外処理
    try:
        metrics["log_loss"] = log_loss(y_true, proba_df.values, labels=list(proba_df.columns))
    except Exception:
        metrics["log_loss"] = np.nan
    metrics["top3_acc"] = top_k_accuracy(y_true, proba_df, k=3)
    return metrics


def plot_confmat(cm, classes, title, outpath):
    plt.figure()
    im = plt.imshow(cm, interpolation='nearest')
    plt.title(title)
    plt.colorbar(im)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, ha='right')
    plt.yticks(tick_marks, classes)
    plt.ylabel('True')
    plt.xlabel('Predicted')
    plt.tight_layout()
    plt.savefig(outpath, bbox_inches='tight')
    plt.close()


# ---------------------------
# 学習/評価 ルーチン
# ---------------------------
def train_eval_global(df, feature_cols, label, exp_name, out_dir):
    """hashvinを特徴に含めた共通モデル"""
    use_cols = feature_cols + [label]
    data = TabularDataset(df[use_cols])
    predictor = TabularPredictor(
        label=label, problem_type="multiclass",
        eval_metric="accuracy",
        path=os.path.join(out_dir, f"predictor_{exp_name}_global")
    ).fit(
        train_data=data,
        presets="best_quality"
    )
    # ホールドアウトがないので同データでの再評価 + 予測確率
    y_true = data[label].astype(str).tolist()
    y_pred = predictor.predict(data[feature_cols]).astype(str).tolist()
    proba = predictor.predict_proba(data[feature_cols])

    metrics = eval_multiclass(y_true, y_pred, proba)

    # 混同行列
    classes = sorted(proba.columns.tolist())
    cm = confusion_matrix(y_true, y_pred, labels=classes)
    plot_confmat(cm, classes,
                 f"Confusion Matrix (GLOBAL, {exp_name})",
                 os.path.join(out_dir, f"confmat_global_{exp_name}.png"))

    return predictor, metrics


def train_eval_per_hashvin(df, feature_cols, label, exp_name, out_dir):
    """hashvinごとに独立モデル。結果を平均/加重平均でも報告。"""
    results = []
    per_models = {}
    classes_global = sorted(df[label].astype(str).unique().tolist())
    all_true, all_pred, all_proba_rows = [], [], []
    # 一括確率配列を作るために列を合わせる
    proba_cols_union = sorted(df[label].astype(str).unique())

    for hv, sub in df.groupby("hashvin"):
        use_cols = feature_cols + [label]
        data = TabularDataset(sub[use_cols])
        if data.shape[0] < 10:
            # データが少なすぎる場合はスキップ
            continue

        predictor = TabularPredictor(
            label=label, problem_type="multiclass",
            eval_metric="accuracy",
            path=os.path.join(out_dir, f"predictor_{exp_name}_hv_{hv}")
        ).fit(
            train_data=data,
            presets="medium_quality"
        )
        y_true = data[label].astype(str).tolist()
        y_pred = predictor.predict(data[feature_cols]).astype(str).tolist()
        proba = predictor.predict_proba(data[feature_cols])

        # 欠けているクラス列があれば0で補完（log_loss計算のため）
        for c in proba_cols_union:
            if c not in proba.columns:
                proba[c] = 0.0
        proba = proba[proba_cols_union]

        metrics = eval_multiclass(y_true, y_pred, proba)
        results.append({"hashvin": hv, **metrics, "n": len(y_true)})
        per_models[hv] = predictor

        # 連結用
        all_true.extend(y_true)
        all_pred.extend(y_pred)
        all_proba_rows.append(proba.values)

        # 混同行列（個別保存）
        classes = sorted(proba.columns.tolist())
        cm = confusion_matrix(y_true, y_pred, labels=classes)
        plot_confmat(cm, classes,
                     f"Confusion Matrix (PER-HV {hv}, {exp_name})",
                     os.path.join(out_dir, f"confmat_per_{exp_name}_{hv}.png"))

    # 集約（全サンプル結合での再評価）
    if len(all_true) > 0:
        all_proba = np.vstack(all_proba_rows)
        # columnsはproba_cols_union
        all_proba_df = pd.DataFrame(all_proba, columns=proba_cols_union)
        agg_metrics = eval_multiclass(all_true, all_pred, all_proba_df)
    else:
        agg_metrics = {"accuracy": np.nan, "macro_f1": np.nan, "log_loss": np.nan, "top3_acc": np.nan}

    per_df = pd.DataFrame(results)
    if not per_df.empty:
        # 件数加重平均
        w = per_df["n"].values
        agg_weighted = {
            "accuracy": np.average(per_df["accuracy"], weights=w),
            "macro_f1": np.average(per_df["macro_f1"], weights=w),
            "log_loss": np.average(per_df["log_loss"].fillna(0), weights=w),  # log_lossのNaNは0扱い
            "top3_acc": np.average(per_df["top3_acc"], weights=w),
        }
    else:
        agg_weighted = {"accuracy": np.nan, "macro_f1": np.nan, "log_loss": np.nan, "top3_acc": np.nan}

    return per_models, per_df, agg_metrics, agg_weighted


# ---------------------------
# サンプルデータ生成（CSV未指定時）
# ---------------------------
def synthesize_sample(n_hashvin=5, seed=42):
    """
    PoCに近い生成規則:
    - hashvin: 5台
    - charge_cluster_id: 8種
    - inactive_cluster_id: 6種（目的変数）
    - charge_start_time: 90日分のランダム時刻
    - ラベルは (hashvin, charge_cluster_id, hour帯, 曜日) に依存して確率的に決まる
    """
    rng = np.random.default_rng(seed)
    hashvins = [f"H{d:03d}" for d in range(1, n_hashvin+1)]
    charge_clusters = [f"C{c:02d}" for c in range(1, 9)]
    inactive_clusters = [f"P{p:02d}" for p in range(1, 7)]

    rows = []
    t0 = datetime(2025, 6, 1, 0, 0, 0)
    for hv in hashvins:
        n = rng.integers(60, 220)  # 台ごとにデータ量を変える
        for i in range(int(n)):
            cc = rng.choice(charge_clusters, p=np.array([3,3,2,2,2,2,1,1])/16)
            dt = t0 + timedelta(days=int(rng.integers(0, 90)),
                                hours=int(rng.integers(0,24)),
                                minutes=int(rng.integers(0,60)))
            # 確率分布を生成（「それっぽい」規則）
            hour = dt.hour
            wday = dt.weekday()
            base = rng.random(len(inactive_clusters))
            # 充電場所×時間帯により特定ラベルを強める
            bias_idx = (hash(cc) + hour) % len(inactive_clusters)
            base[bias_idx] += 1.5
            # 平日昼はP01/P02、週末夕方はP04を強める等
            if wday < 5 and 11 <= hour <= 14:
                base[0] += 1.0; base[1] += 0.8
            if wday >= 5 and 17 <= hour <= 21:
                base[3] += 1.2
            # hashvin固有バイアス
            base[hash(hv) % len(inactive_clusters)] += 1.0
            prob = base / base.sum()
            y = rng.choice(inactive_clusters, p=prob)

            rows.append({
                "hashvin": hv,
                "charge_cluster_id": cc,
                "inactive_cluster_id": y,
                "charge_start_time": dt
            })
    df = pd.DataFrame(rows)
    return df


# ---------------------------
# メイン
# ---------------------------
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--csv", type=str, default=None, help="入力CSVへのパス（列名は案件準拠）")
    parser.add_argument("--out", type=str, default="outputs", help="結果出力ディレクトリ")
    args = parser.parse_args()

    os.makedirs(args.out, exist_ok=True)

    # 1) データ読み込み or 生成
    if args.csv is None:
        print("[INFO] CSV未指定のためサンプルデータを生成します。")
        df = synthesize_sample(n_hashvin=5, seed=42)
    else:
        raw = pd.read_csv(args.csv)
        # 必須列チェック
        needed = ["hashvin", "charge_cluster_id", "inactive_cluster_id", "charge_start_time"]
        missing = [c for c in needed if c not in raw.columns]
        if missing:
            raise ValueError(f"必要列が不足: {missing}")
        df = raw.copy()
        # 文字列→datetime
        df["charge_start_time"] = pd.to_datetime(df["charge_start_time"])

    # 2) 型整形（ID類はカテゴリ/str、ラベルもstr）
    df = ensure_categorical(df, ["hashvin", "charge_cluster_id", "inactive_cluster_id"])

    # 3) 実験セットアップ: raw/engineered の2条件
    experiments = [
        {"name": "time_raw", "mode": "raw"},
        {"name": "time_engineered", "mode": "engineered"},
    ]

    summary_rows = []
    per_detail_frames = []

    for exp in experiments:
        exp_name = exp["name"]
        mode = exp["mode"]
        print(f"\n===== Experiment: {exp_name} ({mode}) =====")

        # 特徴生成
        df_feat, time_feats = make_time_features(df, "charge_start_time", mode=mode)

        # 共通: 必須の charge_cluster_id
        base_feats = ["hashvin", "charge_cluster_id"]  # 共通モデルではhashvinも使う
        # rawなら time_feats = ["charge_start_time"], engineeredなら派生一式
        features_global = base_feats + time_feats

        # --- 共通モデル（hashvinを含める）---
        global_pred, global_metrics = train_eval_global(
            df_feat, features_global, "inactive_cluster_id", exp_name, args.out
        )
        summary_rows.append({
            "setting": f"GLOBAL_{exp_name}",
            **global_metrics
        })

        # --- 個別モデル（hashvinごと）---
        # 個別モデルでは特徴からhashvinは外す（固定値のため）
        features_per = ["charge_cluster_id"] + time_feats
        per_models, per_df, agg_metrics, agg_weighted = train_eval_per_hashvin(
            df_feat, features_per, "inactive_cluster_id", exp_name, args.out
        )
        per_df["setting"] = f"PER_{exp_name}"
        per_detail_frames.append(per_df)

        summary_rows.append({
            "setting": f"PER_{exp_name}_ALL-CONCAT",  # 全件結合での再評価
            **agg_metrics
        })
        summary_rows.append({
            "setting": f"PER_{exp_name}_WEIGHTED",    # 件数加重平均
            **agg_weighted
        })

    # 4) まとめ出力
    summary = pd.DataFrame(summary_rows)
    summary = summary[["setting", "accuracy", "top3_acc", "macro_f1", "log_loss"]]
    summary.sort_values("setting", inplace=True)
    summary_path = os.path.join(args.out, "summary_metrics.csv")
    summary.to_csv(summary_path, index=False)
    print("\n=== SUMMARY (CSV saved) ===")
    print(summary)

    if per_detail_frames:
        per_detail = pd.concat(per_detail_frames, ignore_index=True)
        per_detail = per_detail[["setting", "hashvin", "n", "accuracy", "top3_acc", "macro_f1", "log_loss"]]
        per_detail_path = os.path.join(args.out, "per_hashvin_metrics.csv")
        per_detail.to_csv(per_detail_path, index=False)
        print("\n=== PER-HASHVIN METRICS (CSV saved) ===")
        print(per_detail.head())

    print(f"\nOutputs saved in: {os.path.abspath(args.out)}")
    print(" - summary_metrics.csv : グローバル/個別 × 時間特徴(生/加工)の比較")
    print(" - per_hashvin_metrics.csv : 個別モデルの詳細（hashvin別）")
    print(" - confmat_*.png : 混同行列（グローバル/個別）")

if __name__ == "__main__":
    import sys
    # Jupyterからの不要な引数を除去
    sys.argv = [sys.argv[0]]
    main()



Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.12.10
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Memory Avail:       19.67 GB / 31.17 GB (63.1%)
Disk Space Avail:   833.37 GB / 930.73 GB (89.5%)
Presets specified: ['best_quality']
Using hyperparameters preset: hyperparameters='zeroshot'
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack 

[INFO] CSV未指定のためサンプルデータを生成します。

===== Experiment: time_raw (raw) =====


Beginning AutoGluon training ... Time limit = 900s
AutoGluon will save models to "c:\workspace\src\kaggle\ml-study\EV-Battery-Parking-Degradation-Mitigation\train\outputs\predictor_time_raw_global\ds_sub_fit\sub_fit_ho"
Train Data Rows:    415
Train Data Columns: 3
Label Column:       inactive_cluster_id
Problem Type:       multiclass
Preprocessing data ...
Train Data Class Count: 6
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    20136.99 MB
	Train Data (Original)  Memory Usage: 0.05 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting CategoryFeatureGenerator...
			Fitting CategoryMemoryMinimizeFeatureGenerator...
		Fitting DatetimeFeatureGenerator

KeyboardInterrupt: 