In [72]:
#Simple starter notebook for March Mania 2025
#https://www.kaggle.com/code/paultimothymooney/simple-starter-notebook-for-march-mania-2025

import numpy as np
import pandas as pd
from sklearn.metrics import brier_score_loss, mean_squared_error

In [None]:
#データの準備

DATA_PATH = "./data/"
w_seed = pd.read_csv(DATA_PATH + "WNCAATourneySeeds.csv")
m_seed = pd.read_csv(DATA_PATH + "MNCAATourneySeeds.csv")
seed_df = pd.concat([m_seed, w_seed], axis=0).fillna(0.05)

submission_df = pd.read_csv(DATA_PATH + "SampleSubmissionStage1.csv")
submission_df2 = pd.read_csv(DATA_PATH + "SampleSubmissionStage2.csv")

In [74]:
seed_df.head()

Unnamed: 0,Season,Seed,TeamID
0,1985,W01,1207
1,1985,W02,1210
2,1985,W03,1228
3,1985,W04,1260
4,1985,W05,1374


In [75]:
submission_df.head()

Unnamed: 0,ID,Pred
0,2021_1101_1102,0.5
1,2021_1101_1103,0.5
2,2021_1101_1104,0.5
3,2021_1101_1105,0.5
4,2021_1101_1106,0.5


In [76]:
# データの前処理(ゲーム情報とチームランキングを抽出する)

# defセクション
#年とチームIDを抽出する
def extract_game_info(id_str):
    parts = id_str.split("_")
    year = int(parts[0])
    teamID1 = int(parts[1])
    teamID2 = int(parts[2])
    return year, teamID1, teamID2

# シードとシード順位を抽出する
def extract_seed_value(seed_str):
    try:
        return int(seed_str[1:])
    except ValueError:
        return 16
    
# mainセクション(stage1)
submission_df[['Season', 'TeamID1', 'TeamID2']] = submission_df['ID'].apply(extract_game_info).tolist()
seed_df['SeedValue'] = seed_df['Seed'].apply(extract_seed_value)

#2つのデータをマージする(TeamID1)
submission_df = pd.merge(submission_df, seed_df[['Season','TeamID', 'SeedValue']], left_on=['Season', 'TeamID1'], right_on=['Season', 'TeamID'], how='left')
submission_df = submission_df.rename(columns={'SeedValue': 'SeedValue1'}).drop(columns=['TeamID'])

#2つのデータをマージする(TeamID2)
submission_df = pd.merge(submission_df, seed_df[['Season', 'TeamID', 'SeedValue']], left_on=['Season', 'TeamID2'], right_on=['Season', 'TeamID'], how='left')
submission_df = submission_df.rename(columns={'SeedValue': 'SeedValue2'}).drop(columns=['TeamID'])

In [78]:
# mainセクション(stage2)\
# シードが発表されるまでは Season+1 で適応する
seed_df['Season'] = seed_df['Season'] + 1
submission_df2[['Season', 'TeamID1', 'TeamID2']] = submission_df2['ID'].apply(extract_game_info).tolist()
submission_df2 = pd.merge(submission_df2, seed_df[['Season', 'TeamID', 'SeedValue']], left_on=['Season', 'TeamID1'], right_on=['Season', 'TeamID'], how='left')
submission_df2 = submission_df2.rename(columns={'SeedValue': 'SeedValue1'}).drop(columns=['TeamID'])
submission_df2 = pd.merge(submission_df2, seed_df[['Season', 'TeamID', 'SeedValue']], left_on=['Season', 'TeamID2'], right_on=['Season', 'TeamID'], how='left')
submission_df2 = submission_df2.rename(columns={'SeedValue': 'SeedValue2'}).drop(columns=['TeamID'])

In [79]:
#予測を行う(stage1)
submission_df['SeedDiff'] = submission_df['SeedValue1'] - submission_df['SeedValue2']
submission_df['Pred'] = 0.5 + (0.03 * submission_df['SeedDiff'])

#必要ない列を削減
submission_df = submission_df[['ID', 'Pred']].fillna(0.5)

submission_df.head()

Unnamed: 0,ID,Pred
0,2021_1101_1102,0.5
1,2021_1101_1103,0.5
2,2021_1101_1104,0.86
3,2021_1101_1105,0.5
4,2021_1101_1106,0.5


In [80]:
#予測を行う(stage2)
submission_df2['SeedDiff'] = submission_df2['SeedValue1'] - submission_df2['SeedValue2']
submission_df2['Pred'] = 0.5 + (0.03 * submission_df2['SeedDiff'])

#必要ない列を削減
submission_df2 = submission_df2[['ID', 'Pred']].fillna(0.5)

submission_df2.head()

Unnamed: 0,ID,Pred
0,2025_1101_1102,0.5
1,2025_1101_1103,0.5
2,2025_1101_1104,0.5
3,2025_1101_1105,0.5
4,2025_1101_1106,0.5


In [83]:
# ステータスを確認
stats = submission_df.iloc[:, 1].describe()
print(stats)

stats2 = submission_df2.iloc[:, 1].describe()
print(stats2)

count    507108.000000
mean          0.499938
std           0.039466
min           0.050000
25%           0.500000
50%           0.500000
75%           0.500000
max           0.950000
Name: Pred, dtype: float64
count    131407.000000
mean          0.499864
std           0.039344
min           0.050000
25%           0.500000
50%           0.500000
75%           0.500000
max           0.950000
Name: Pred, dtype: float64


In [82]:
submission_df.to_csv('./MMLM_m2_output/submissionStage1.csv', index=False)
submission_df2.to_csv('./MMLM_m2_output/submissionStage2.csv', index=False)

In [84]:
# Create a dataframe of ground truth values
solution_df = submission_df.copy()
solution_df['Pred'] = 1

# Now calculate the Brier score
y_true = solution_df['Pred']
y_pred = submission_df['Pred']
brier_score = brier_score_loss(y_true, y_pred)
print(f"Brier Score: {brier_score}")

Brier Score: 0.2516196940296741
