# 充電→長時間放置 ランキング予測 (要件対応版)

hashvin ごとに統計テーブルを構築し、AutoGluon でランキング学習・評価・推論を行います。
各セルの役割が分かりやすいよう、手順ごとに Markdown を挿入しています。


## 1. ライブラリ読み込み
必要なユーティリティを `train.ranking` から読み込みます。


In [1]:
# ライブラリ読み込み
from pathlib import Path
import pandas as pd

from ranking import (
    RankingConfig,
    RankingPipeline,
    UserDataBuilder,
    build_training_table,
    load_sessions,
)


## 2. 設定値の確認
直近重みや候補数など、要件ベースのデフォルト値を `RankingConfig` で設定します。


In [2]:
# 設定値（必要に応じて調整してください）
config = RankingConfig(
    window_days=90,
    halflife_days=30,
    use_decay_weight=True,
    alpha_smooth=0.5,
    k_candidates=12,
    m_routine=8,
    n_charge_prior=8,
    l_nearby=4,
    nearby_radius_km=1.0,
    lambda_start=0.7,
    w_routine=1.0,
    w_charge=1.0,
    gamma_distance=0.05,
    kernel_sigma_hour=2.0,
    topk_eval=[1, 3, 5],
    random_seed=42,
    time_limit=180,
    ag_presets='medium_quality_faster_train',
)
TOP_K = 3  # 推論時に提示する候補数
config


RankingConfig(window_days=90, halflife_days=30, use_decay_weight=True, alpha_smooth=0.5, k_candidates=12, m_routine=8, n_charge_prior=8, l_nearby=4, nearby_radius_km=1.0, lambda_start=0.7, w_routine=1.0, w_charge=1.0, gamma_distance=0.05, kernel_sigma_hour=2.0, topk_eval=[1, 3, 5], random_seed=42, time_limit=180, ag_presets='medium_quality_faster_train', result_root=WindowsPath('result'))

## 3. セッションデータの読み込み
サンプルCSVを読み込み、共通前処理（時刻の正規化・派生列付与）を行います。


In [3]:
# セッション読み込み
DATA_PATH = Path('../eda/ev_sessions_test_augmented_v2.csv')
sessions = load_sessions(DATA_PATH)
sessions.head()


Unnamed: 0,hashvin,session_cluster,session_type,start_time,end_time,duration_minutes,start_soc,end_soc,change_soc,start_lat,start_lon,end_lat,end_lon,weekday,start_hour,date,is_long_park
0,hv_0001_demo,I_101,inactive,2025-08-31 20:30:00,2025-09-01 07:30:00,660.0,80.0,79.5,-0.5,35.68,139.76,35.68,139.76,6,20,2025-08-31,True
1,hv_0001_demo,I_202,inactive,2025-09-01 09:00:00,2025-09-01 17:30:00,510.0,79.5,78.6,-0.9,35.69,139.7,35.69,139.7,0,9,2025-09-01,True
2,hv_0001_demo,I_303,inactive,2025-09-01 18:00:00,2025-09-01 18:39:00,39.0,78.6,78.4,-0.2,35.66,139.75,35.66,139.75,0,18,2025-09-01,False
3,hv_0001_demo,I_101,inactive,2025-09-01 20:30:00,2025-09-02 07:30:00,660.0,78.4,78.2,-0.2,35.68,139.76,35.68,139.76,0,20,2025-09-01,True
4,hv_0001_demo,I_202,inactive,2025-09-02 09:00:00,2025-09-02 17:30:00,510.0,78.2,77.2,-1.0,35.69,139.7,35.69,139.7,1,9,2025-09-02,True


## 4. hashvin ごとの学習と評価
`RankingPipeline.fit_all()` で統計テーブル構築→特徴量化→AutoGluon 学習→評価指標算出を行います。


In [4]:
# hashvin ごとに学習・評価を実行
pipeline = RankingPipeline(sessions, config)
metrics_by_user = pipeline.fit_all()
metrics_df = pd.DataFrame(metrics_by_user).T
metrics_df


  current = start.floor("H")
No path specified. Models will be saved in: "AutogluonModels\ag-20251015_150005"
Preset alias specified: 'medium_quality_faster_train' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.12.10
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26200
CPU Count:          16
Memory Avail:       18.09 GB / 31.17 GB (58.1%)
Disk Space Avail:   831.33 GB / 930.73 GB (89.3%)
Presets specified: ['medium_quality_faster_train']
Using hyperparameters preset: hyperparameters='default'
Beginning AutoGluon training ... Time limit = 180s
AutoGluon will save models to "c:\workspace\src\kaggle\ml-study\EV-Battery-Parking-Degradation-Mitigation\train\AutogluonModels\ag-20251015_150005"
Train Data Rows:    257
Train Data Columns: 28
Tuning Data Rows:    65
Tuning Data Columns: 28
Label Column:       label
Problem Type:       binary
Preprocessing data ...
Selected class <--> label mapping:  class

Unnamed: 0,Hit@1,NDCG@1,Hit@3,NDCG@3,Hit@5,NDCG@5,MRR
hv_0001_demo,0.708333,0.708333,0.75,0.734622,0.75,0.734622,0.729167


## 5. 中間テーブルの確認
学習済みのユーザーを1件取り出し、リンクテーブルや統計値、特徴量テーブルを確認します。


In [5]:
# 学習済みモデルのうち1件を選んで詳細を確認
if pipeline.user_models:
    sample_hashvin = next(iter(pipeline.user_models))
    model = pipeline.get_user_model(sample_hashvin)
    user_data = model.user_data
    print('hashvin:', sample_hashvin)
    display(user_data.links.head())
    display(user_data.presence.head())
    display(user_data.start_prob.head())
    display(model.training_table.head())
else:
    print('学習対象のユーザーが見つかりませんでした。')


hashvin: hv_0001_demo


Unnamed: 0,hashvin,event_id,weekday,charge_cluster,charge_start_time,charge_start_hour,charge_end_time,charge_end_lat,charge_end_lon,park_cluster,...,park_start_lon,gap_minutes,dist_charge_to_park_km,age_days,weight_time,start_soc,end_soc,time_since_last_charge_min,soc_drop_since_prev,prev_charge_cluster
0,hv_0001_demo,1,1,C_505,2025-09-02 18:30:00,18,2025-09-02 19:12:00,35.65,139.74,I_101,...,139.76,78.0,3.793725,119.940972,0.0,74.8,95.0,,,
1,hv_0001_demo,2,2,C_505,2025-09-03 18:45:00,18,2025-09-03 19:29:00,35.65,139.74,I_101,...,139.76,61.0,3.793725,118.930556,0.0,90.5,95.0,1413.0,4.5,C_505
2,hv_0001_demo,3,3,C_606,2025-09-04 18:30:00,18,2025-09-04 19:18:00,35.705,139.72,I_101,...,139.76,72.0,4.558132,117.940972,0.0,86.6,95.0,1381.0,8.4,C_505
3,hv_0001_demo,4,4,C_505,2025-09-05 18:30:00,18,2025-09-05 19:31:00,35.65,139.74,I_101,...,139.76,59.0,3.793725,116.940972,0.0,88.6,95.0,1392.0,6.4,C_606
4,hv_0001_demo,5,5,C_606,2025-09-06 14:30:00,14,2025-09-06 15:12:00,35.705,139.72,I_101,...,139.76,318.0,4.558132,116.107639,0.0,91.0,95.0,1139.0,4.0,C_505


Unnamed: 0,weekday,hour,cluster,presence_weight,presence_prob,long_park_ratio


Unnamed: 0,weekday,hour,cluster,start_weight,start_prob
0,0,6,I_202,0.581806,0.193809
1,0,7,I_201,0.495639,0.161197
2,0,7,I_202,0.680883,0.191189
3,0,8,I_201,0.72695,0.159334
4,0,8,I_202,1.55162,0.266426


Unnamed: 0,hashvin,event_id,candidate_cluster,label,weekday,charge_cluster,charge_start_hour,weight_time,age_days,presence_prob,...,candidate_score,start_soc,time_since_last_charge_min,soc_drop_since_prev,prev_charge_cluster,prev_same_candidate,sin_hour,cos_hour,sin_week,cos_week
0,hv_0001_demo,188,I_103,0,6,C_607,19,0.935536,2.884028,0.0,...,-0.154208,93.16,,0.32,C_606,0,-0.965926,0.258819,-0.781831,0.62349
1,hv_0001_demo,188,I_102,0,6,C_607,19,0.935536,2.884028,0.0,...,-0.091833,93.16,,0.32,C_606,0,-0.965926,0.258819,-0.781831,0.62349
2,hv_0001_demo,188,I_100,0,6,C_607,19,0.935536,2.884028,0.0,...,-0.141522,93.16,,0.32,C_606,0,-0.965926,0.258819,-0.781831,0.62349
3,hv_0001_demo,188,I_101,0,6,C_607,19,0.935536,2.884028,0.0,...,-0.099685,93.16,,0.32,C_606,0,-0.965926,0.258819,-0.781831,0.62349
4,hv_0001_demo,187,I_100,0,2,C_606,21,0.853908,6.835417,0.0,...,-0.069497,85.68,,7.01,C_504,0,-0.707107,0.707107,0.974928,-0.222521


## 6. Top-k 推論の実行
学習済みモデルを使って各イベントの上位候補クラスタを表示します。


In [6]:
# Top-k 候補の推論を実行
if pipeline.user_models:
    topk_df = pipeline.predict_all(top_k=TOP_K)
    topk_df.head()
else:
    print('推論対象のモデルがありません。')
