In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import brier_score_loss

# データの読み込み
DATA_PATH = "./data/"
results_M_df = pd.read_csv(DATA_PATH + "MRegularSeasonCompactResults.csv")
results_W_df = pd.read_csv(DATA_PATH + "WRegularSeasonCompactResults.csv")
results_df =pd.concat([results_M_df, results_W_df], axis=0)
predictions_df = pd.read_csv("./MMLM_m3_output/" + "submissionStage1.csv")

# 必要なカラムを確認
print("試合結果のカラム:", results_df.columns)
print("予測結果のカラム:", predictions_df.columns)

# 試合結果のデータ処理
# 'WTeamID' (勝利チーム) と 'LTeamID' (敗北チーム) から試合のキーを作成
results_df['GameID'] = results_df.apply(lambda row: f"{row['Season']}_{min(row['WTeamID'], row['LTeamID'])}_{max(row['WTeamID'], row['LTeamID'])}", axis=1)

# 勝者が WTeamID なら 1、LTeamID なら 0 のターゲットを作成
results_df['Result'] = results_df.apply(lambda row: 1 if row['WTeamID'] < row['LTeamID'] else 0, axis=1)

# 予測データの処理
predictions_df.rename(columns={'ID': 'GameID'}, inplace=True)

# 試合結果と予測をマージ
merged_df = results_df.merge(predictions_df, on='GameID', how='inner')
merged_df['pred'] = merged_df['pred'].clip(0, 1)

# Brier スコアを計算
brier_score = brier_score_loss(merged_df['Result'], merged_df['pred'])

print(f"Brier Score: {brier_score}")


試合結果のカラム: Index(['Season', 'DayNum', 'WTeamID', 'WScore', 'LTeamID', 'LScore', 'WLoc',
       'NumOT'],
      dtype='object')
予測結果のカラム: Index(['ID', 'pred'], dtype='object')
Brier Score: 0.31067868796250325


In [None]:
# 予測値が正しくクリップされているか確認

print(f"予測値の最大値: {predictions_df['pred'].max()}")
print(f"予測値の最小値: {predictions_df['pred'].min()}")
print(predictions_df[['GameID', 'pred']].head(10))  # 予測値のサンプルを確認


予測値の最大値: 1.0138072034831735
予測値の最小値: -0.0127849271919331
           GameID      pred
0  2021_1101_1102  0.319430
1  2021_1101_1103  0.253045
2  2021_1101_1104  0.282571
3  2021_1101_1105  0.320934
4  2021_1101_1106  0.319781
5  2021_1101_1107  0.314741
6  2021_1101_1108  0.319123
7  2021_1101_1110  0.320276
8  2021_1101_1111  0.390357
9  2021_1101_1112  0.289523
