# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os
from sklearn.model_selection import train_test_split
import random
from data import Data, STATS_COLUMNS
import model
import elo

torch.manual_seed(20250222)
random.seed(20250222)
np.random.seed(20250222)

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
dataset = Data(batch_size=100)

dataset.games.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,...,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0
mean,2015.575896,70.371624,2101.677721,74.190697,2099.32368,61.205668,0.061738,26.180557,57.072441,6.919323,...,19.266279,11.437051,16.819468,10.801152,21.937435,11.218625,15.191485,6.454031,2.849489,18.836014
std,6.073611,36.089915,986.770174,11.403172,989.989621,11.373177,0.287218,4.809783,7.820486,3.168482,...,6.327385,5.238806,6.985267,4.417465,4.706791,3.764948,5.029754,2.984269,2.036512,4.586616
min,2003.0,0.0,1101.0,30.0,1101.0,11.0,0.0,9.0,26.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0
25%,2011.0,38.0,1261.0,66.0,1254.0,54.0,0.0,23.0,52.0,5.0,...,15.0,8.0,12.0,8.0,19.0,9.0,12.0,4.0,1.0,16.0
50%,2016.0,74.0,1413.0,74.0,1407.0,61.0,0.0,26.0,57.0,7.0,...,19.0,11.0,16.0,10.0,22.0,11.0,15.0,6.0,3.0,19.0
75%,2021.0,101.0,3245.0,81.0,3246.0,69.0,0.0,29.0,62.0,9.0,...,23.0,15.0,21.0,14.0,25.0,14.0,18.0,8.0,4.0,22.0
max,2025.0,132.0,3480.0,149.0,3480.0,144.0,6.0,58.0,113.0,30.0,...,80.0,48.0,65.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The x's will be the indexes of two team IDs and program IDs, the y's will include and indicator of who won at the game stats

## Predicting Stats
The first model we build will predict game statistics. There is more information encapsulated in the box scores than simply wins and losses so we will use this to build up embeddings that are predictive about how the games will play out

In [3]:
stats_train_loader, stats_validation_loader = dataset.train_test_data(output_stats=True, cache="stats", train_size=0.99)

Loading cached data


In [4]:
stats_model = model.StatsModel(program_embedding=512, team_embedding=512,
                               num_programs=len(dataset.programs), num_teams=len(dataset.teams),
                               model_sizes=(512, 256), dropout=0.5).to(device)

In [5]:
model.train(stats_train_loader, stats_validation_loader, stats_model, name="stats_model", learning_rate=0.001, use_cache=True)

Loading from cache


## The Model
Now we use the embeddings developed in the first model to build a model to predict which team will win, which is the thing we are, in the end trying to do.

In [6]:
result_train_loader, result_validation_loader = dataset.train_test_data(output_stats=False, cache="result", train_size=0.99, seed=20250320)

Loading cached data


In [7]:
result_model = model.Model(matchup=stats_model.matchup, model_sizes=(384, 32), dropout=0.5).to(device)
result_model.matchup.freeze()

## Training the model

Train the model

In [8]:
model.train(result_train_loader, result_validation_loader, result_model, name="result_model", learning_rate=0.0001)

Epoch   0: Train Loss=0.24881859, Test Loss=0.24883938
Epoch   1: Train Loss=0.16938005, Test Loss=0.17200699
Epoch   2: Train Loss=0.16415690, Test Loss=0.16759954
Epoch   3: Train Loss=0.16248913, Test Loss=0.16621562
Epoch   4: Train Loss=0.16414274, Test Loss=0.16760223
Epoch   5: Train Loss=0.16271739, Test Loss=0.16623589
Epoch   6: Train Loss=0.16380225, Test Loss=0.16746641
Epoch   7: Train Loss=0.16083402, Test Loss=0.16491942
Epoch   8: Train Loss=0.16227809, Test Loss=0.16604713
Epoch   9: Train Loss=0.16250004, Test Loss=0.16647970
Epoch  10: Train Loss=0.16230731, Test Loss=0.16640287
Epoch  11: Train Loss=0.16291618, Test Loss=0.16710496
Epoch  12: Train Loss=0.16210578, Test Loss=0.16664009
Best Loss: 0.164919


In [9]:
model.print_results(result_validation_loader, result_model, label="Validation Results")

Validation Results: Accuracy=75.42, Loss=0.164919


With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [10]:
tourney_dataset = dataset.gen_dataset(dataset.tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=500, shuffle=True)

In [11]:
model.print_results(tourney_loader, result_model, label="Tournament Results")

Tournament Results: Accuracy=73.77, Loss=0.169402


When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

In [12]:
stage1_loader = dataset.tourney_data(after=2021)
model.print_results(stage1_loader, result_model, label=f"Stage 1")

Stage 1: Accuracy=71.94, Loss=0.176030


Train with early torney data

In [13]:
tourney_df = dataset.tourney[dataset.tourney.Season < 2021]

tourney_train_df, tourney_validation_df = train_test_split(tourney_df, train_size=0.8)
tourney_train_data = dataset.gen_dataset(tourney_train_df)
tourney_validation_data = dataset.gen_dataset(tourney_validation_df)

tourney_train_loader = DataLoader(tourney_train_data, batch_size=500)
tourney_validation_loader = DataLoader(tourney_validation_data, batch_size=500)

In [14]:
result_model.matchup.freeze()

In [15]:
model.train(tourney_train_loader, tourney_validation_loader, result_model, name="tuned_model", learning_rate=0.00001)

Epoch   0: Train Loss=0.16485613, Test Loss=0.17763302
Epoch   1: Train Loss=0.16484500, Test Loss=0.17761223
Epoch   2: Train Loss=0.16482299, Test Loss=0.17757453
Epoch   3: Train Loss=0.16479456, Test Loss=0.17753238
Epoch   4: Train Loss=0.16476800, Test Loss=0.17749139
Epoch   5: Train Loss=0.16474275, Test Loss=0.17745272
Epoch   6: Train Loss=0.16472036, Test Loss=0.17742105
Epoch   7: Train Loss=0.16470042, Test Loss=0.17739547
Epoch   8: Train Loss=0.16468275, Test Loss=0.17737264
Epoch   9: Train Loss=0.16466596, Test Loss=0.17735222
Epoch  10: Train Loss=0.16465084, Test Loss=0.17733451
Epoch  11: Train Loss=0.16463645, Test Loss=0.17731892
Epoch  12: Train Loss=0.16462488, Test Loss=0.17729958
Epoch  13: Train Loss=0.16461429, Test Loss=0.17728349
Epoch  14: Train Loss=0.16460346, Test Loss=0.17727338
Epoch  15: Train Loss=0.16459321, Test Loss=0.17726260
Epoch  16: Train Loss=0.16458453, Test Loss=0.17725259
Epoch  17: Train Loss=0.16457638, Test Loss=0.17724358
Epoch  18:

In [16]:
model.print_results(stage1_loader, result_model, label=f"Stage 1")

Stage 1: Accuracy=71.56, Loss=0.176010


### Performance by year


In [17]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(year=season)
    model.print_results(loader, result_model, label=f"{season} Tournament")

2003 Tournament: Accuracy=75.78, Loss=0.166361
2004 Tournament: Accuracy=70.31, Loss=0.183144
2005 Tournament: Accuracy=75.00, Loss=0.174305
2006 Tournament: Accuracy=66.41, Loss=0.202345
2007 Tournament: Accuracy=73.44, Loss=0.176992
2008 Tournament: Accuracy=79.69, Loss=0.167135
2009 Tournament: Accuracy=75.78, Loss=0.152880
2010 Tournament: Accuracy=70.87, Loss=0.172585
2011 Tournament: Accuracy=74.23, Loss=0.168109
2012 Tournament: Accuracy=76.15, Loss=0.158513
2013 Tournament: Accuracy=70.77, Loss=0.176340
2014 Tournament: Accuracy=74.23, Loss=0.161917
2015 Tournament: Accuracy=78.46, Loss=0.142205
2016 Tournament: Accuracy=74.62, Loss=0.169670
2017 Tournament: Accuracy=79.23, Loss=0.153604
2018 Tournament: Accuracy=68.85, Loss=0.187690
2019 Tournament: Accuracy=75.38, Loss=0.150027
2021 Tournament: Accuracy=71.71, Loss=0.179726
2022 Tournament: Accuracy=74.63, Loss=0.170372
2023 Tournament: Accuracy=69.03, Loss=0.187680
2024 Tournament: Accuracy=70.90, Loss=0.166402


Breaking out by league

In [18]:
for season in dataset.tourney.Season.unique():
    for league in dataset.tourney[dataset.tourney.Season == season].League.unique():
        loader = dataset.tourney_data(year=season, league=league)
        model.print_results(loader, result_model, label=f"{season} {league} Tournament")

2003 M Tournament: Accuracy=75.78, Loss=0.166361
2004 M Tournament: Accuracy=70.31, Loss=0.183144
2005 M Tournament: Accuracy=75.00, Loss=0.174305
2006 M Tournament: Accuracy=66.41, Loss=0.202345
2007 M Tournament: Accuracy=73.44, Loss=0.176992
2008 M Tournament: Accuracy=79.69, Loss=0.167135
2009 M Tournament: Accuracy=75.78, Loss=0.152880
2010 M Tournament: Accuracy=67.97, Loss=0.201566
2010 W Tournament: Accuracy=73.81, Loss=0.143143
2011 M Tournament: Accuracy=67.91, Loss=0.214703
2011 W Tournament: Accuracy=80.95, Loss=0.118558
2012 M Tournament: Accuracy=70.15, Loss=0.187894
2012 W Tournament: Accuracy=82.54, Loss=0.127267
2013 M Tournament: Accuracy=65.67, Loss=0.202788
2013 W Tournament: Accuracy=76.19, Loss=0.148213
2014 M Tournament: Accuracy=66.42, Loss=0.200409
2014 W Tournament: Accuracy=82.54, Loss=0.120982
2015 M Tournament: Accuracy=74.63, Loss=0.160870
2015 W Tournament: Accuracy=82.54, Loss=0.122355
2016 M Tournament: Accuracy=73.88, Loss=0.185277
2016 W Tournament: A

## Inspect the model
First what are the sizes of the smallest input and output weights

In [19]:
print(f"Program embedding min: {result_model.matchup.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {result_model.matchup.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {result_model.fc3.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.373395
Team embedding min: 3.467458
FC min: 0.014881


Calculate the average gradient for each input feature

In [20]:
result_model.matchup.unfreeze()

In [21]:
program_weights, team_weights, stats_weights = model.feature_eval(result_model, tourney_loader)

In [22]:
program_weights.abs().sum().item(), team_weights.abs().sum().item()

(0.0002926667220890522, 0.0002671529073268175)

In [23]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")

Year:	-0.000059
Game:	-0.000263
League:	-0.008712


## Save the model

In [24]:
torch.save(result_model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [25]:
moderated = model.ModeratedModel(result_model, 0.75)
model.print_results(stage1_loader, moderated, label="Moderated Model")

Moderated Model: Accuracy=71.56, Loss=0.181021


In [26]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(season)
    model.print_results(loader, moderated, label=f"{season} Tournament")

2003 Tournament: Accuracy=75.78, Loss=0.175315
2004 Tournament: Accuracy=70.31, Loss=0.188149
2005 Tournament: Accuracy=75.00, Loss=0.180127
2006 Tournament: Accuracy=66.41, Loss=0.203929
2007 Tournament: Accuracy=73.44, Loss=0.184766
2008 Tournament: Accuracy=79.69, Loss=0.176976
2009 Tournament: Accuracy=75.78, Loss=0.165873
2010 Tournament: Accuracy=70.87, Loss=0.178294
2011 Tournament: Accuracy=74.23, Loss=0.173875
2012 Tournament: Accuracy=76.15, Loss=0.168252
2013 Tournament: Accuracy=70.77, Loss=0.181384
2014 Tournament: Accuracy=74.23, Loss=0.169867
2015 Tournament: Accuracy=78.46, Loss=0.154851
2016 Tournament: Accuracy=74.62, Loss=0.175418
2017 Tournament: Accuracy=79.23, Loss=0.162967
2018 Tournament: Accuracy=68.85, Loss=0.187796
2019 Tournament: Accuracy=75.38, Loss=0.160498
2021 Tournament: Accuracy=71.71, Loss=0.182769
2022 Tournament: Accuracy=74.63, Loss=0.177792
2023 Tournament: Accuracy=69.03, Loss=0.190468
2024 Tournament: Accuracy=70.90, Loss=0.173118


It underperforms even in years where there were upsets.

## Dig into 2023 results

In [27]:
loader = dataset.tourney_data(2023)

x, y = loader.dataset.tensors

preds = result_model(x.to(device))

In [28]:
t_2023 = pd.DataFrame({'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual': y.reshape(-1),
                       'predicted': preds.cpu().detach().numpy().reshape(-1)}).iloc[:67]

In [29]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted
23,F Dickinson,Purdue,1192,1345,1.0,0.014612
15,Princeton,Arizona,1343,1112,1.0,0.100831
8,Furman,Virginia,1202,1438,1.0,0.191977
53,FL Atlantic,Tennessee,1194,1397,1.0,0.268666
39,Princeton,Missouri,1343,1281,1.0,0.282443
58,San Diego St,Alabama,1361,1104,1.0,0.2922
57,Miami FL,Houston,1274,1222,1.0,0.318686
50,Michigan St,Marquette,1277,1266,1.0,0.366621
14,Penn St,Texas A&M,1336,1401,1.0,0.37588
37,Arkansas,Kansas,1116,1242,1.0,0.392336


The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a 1% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 13% and 17% chances of happening respectively.

In [30]:
t_2023['Upset'] = [dataset.upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [31]:
t_2023[t_2023.Upset].predicted.mean()

np.float64(0.36752991425369863)

On average the upsets had a 37% chance of happening

In [32]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
45,Creighton,Baylor,1166,1124,1.0,0.56527,True
60,Connecticut,Gonzaga,1163,1211,1.0,0.562127,True


I correctly predicted 4 upsets, though all were closely ranked

In [33]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
63,San Diego St,Creighton,1361,1166,1.0,0.395532,False
12,Missouri,Utah St,1281,1429,1.0,0.43732,False
0,Pittsburgh,Mississippi St,1338,1280,1.0,0.453678,False
2,Arizona St,Nevada,1113,1305,1.0,0.454863,False
59,Texas,Xavier,1400,1462,1.0,0.476817,False
48,Kansas St,Kentucky,1243,1246,1.0,0.489333,False
5,Arkansas,Illinois,1116,1228,1.0,0.48983,False


I also incorrectly predicted 5 upsets

Looking at all the tourneys

In [34]:
x, y = tourney_loader.dataset.tensors
preds = result_model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual': y.reshape([-1]),
                           'predicted': np.array(preds.tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual == 1.0]
tourney_df['Upset'] = [dataset.upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [35]:
len(tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5) & (tourney_df.season > 2020)].sort_values('predicted', ascending=False))

23

In [36]:
len(tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5) & (tourney_df.season > 2020)].sort_values('predicted'))

44

Overall I predicted 25 upsets correctly, and 36 incorrectly

## Predicting by seeds
What if I predict just using the seeds?

In [37]:
odds = dataset.odds_by_seed_diff(before=2021)

In [38]:
dataset.tourney_df(after=2021).SeedDiff.map(lambda x: odds[x]**2).mean()

np.float64(0.18527460145235355)

This results in a test Brier score of about 0.185.

## Using Elo
I can also compute Elo rankings

In [39]:
elo_model = elo.EloModel(dataset, k=50)
model.print_results(stage1_loader, elo_model, label="Elo Model")

Elo Model: Accuracy=73.45, Loss=0.178886


It is similar to our model

## Hybrid Model
Building a model using the neural net and seeds

In [40]:
seed_model = model.SeedModel(dataset)
model.print_results(stage1_loader, seed_model, label="Seed Model")
model.print_results(stage1_loader, elo_model, label="Elo Model")
model.print_results(stage1_loader, result_model, label="Base NN")

Seed Model: Accuracy=72.03, Loss=0.184022
Elo Model: Accuracy=73.45, Loss=0.178886
Base NN: Accuracy=71.56, Loss=0.176010


In [41]:
hybrid = model.HybridModel([result_model, seed_model, elo_model], [0.7, 0.2, 0.1])

model.print_results(stage1_loader, hybrid, label="Hybrid Model")

Hybrid Model: Accuracy=73.45, Loss=0.174766


They hybrid model outperforms all individual models

In [42]:
for season in range(2021, 2025):
    for league in ['M', 'W']:
        loader = dataset.tourney_data(year=season, league=league)
        model.print_results(loader, hybrid, label=f"{season} {league} Tournament")

2021 M Tournament: Accuracy=68.94, Loss=0.209908
2021 W Tournament: Accuracy=77.78, Loss=0.149517
2022 M Tournament: Accuracy=71.64, Loss=0.201336
2022 W Tournament: Accuracy=78.36, Loss=0.141725
2023 M Tournament: Accuracy=69.40, Loss=0.203989
2023 W Tournament: Accuracy=76.12, Loss=0.166270
2024 M Tournament: Accuracy=64.93, Loss=0.193466
2024 W Tournament: Accuracy=80.60, Loss=0.130932


## Generating the submission file
### Phase 2

Write the results

In [43]:
model.gen_submission(hybrid, dataset, fname="hybrid.csv")

In [44]:
model.gen_submission(result_model, dataset, fname="base.csv")

## Generate a bracket

With the hybrid model

In [45]:
season, league = 2025, 'M'

In [46]:
bracket = model.gen_bracket(dataset, season, league, hybrid).join(dataset.all_teams, on='Winner')[['Winner', 'TeamName', 'P']]
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(bracket)

      Winner        TeamName         P
Slot                                  
R1W1    1181            Duke  0.989060
R1W2    1104         Alabama  0.897577
R1W3    1458       Wisconsin  0.911944
R1W4    1112         Arizona  0.863336
R1W5    1332          Oregon  0.764633
R1W6    1140             BYU  0.558921
R1W7    1388    St Mary's CA  0.659181
R1W8    1280  Mississippi St  0.512260
R1X1    1222         Houston  0.982011
R1X2    1397       Tennessee  0.936173
R1X3    1246        Kentucky  0.864018
R1X4    1345          Purdue  0.819319
R1X5    1155         Clemson  0.804404
R1X6    1228        Illinois  0.649813
R1X7    1417            UCLA  0.577307
R1X8    1211         Gonzaga  0.622220
R1Y1    1120          Auburn  0.991866
R1Y2    1277     Michigan St  0.940208
R1Y3    1235         Iowa St  0.859171
R1Y4    1401       Texas A&M  0.758289
R1Y5    1276        Michigan  0.656759
R1Y6    1314  North Carolina  0.545538
R1Y7    1266       Marquette  0.583146
R1Y8    1257      Louisvi

In [47]:
model.tiebreaker(bracket, stats_model, dataset, season, league)

140.15078814263353

And with the base model

In [48]:
base_bracket = model.gen_bracket(dataset, 2025, 'M', result_model).join(dataset.all_teams, on='Winner')[['Winner', 'TeamName', 'P']]
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(base_bracket)

      Winner        TeamName         P
Slot                                  
R1W1    1181            Duke  0.990473
R1W2    1104         Alabama  0.875621
R1W3    1458       Wisconsin  0.905333
R1W4    1112         Arizona  0.869298
R1W5    1332          Oregon  0.762818
R1W6    1140             BYU  0.517452
R1W7    1388    St Mary's CA  0.650988
R1W8    1280  Mississippi St  0.511514
R1X1    1222         Houston  0.979262
R1X2    1397       Tennessee  0.927253
R1X3    1246        Kentucky  0.839157
R1X4    1345          Purdue  0.801925
R1X5    1155         Clemson  0.809260
R1X6    1228        Illinois  0.660618
R1X7    1417            UCLA  0.559506
R1X8    1211         Gonzaga  0.634360
R1Y1    1120          Auburn  0.993405
R1Y2    1277     Michigan St  0.934021
R1Y3    1235         Iowa St  0.828153
R1Y4    1401       Texas A&M  0.726061
R1Y5    1276        Michigan  0.613051
R1Y6    1314  North Carolina  0.603335
R1Y7    1266       Marquette  0.563105
R1Y8    1257      Louisvi

In [49]:
model.tiebreaker(base_bracket, stats_model, dataset, season, league)

140.15078814263353