# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os
from sklearn.model_selection import train_test_split
import random
from data import Data, STATS_COLUMNS
import model
import elo

torch.manual_seed(20250222)
random.seed(20250222)

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
dataset = Data(batch_size=100)

dataset.games.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,...,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0,200590.0
mean,2015.575896,70.371624,2101.677721,74.190697,2099.32368,61.205668,0.061738,26.180557,57.072441,6.919323,...,19.266279,11.437051,16.819468,10.801152,21.937435,11.218625,15.191485,6.454031,2.849489,18.836014
std,6.073611,36.089915,986.770174,11.403172,989.989621,11.373177,0.287218,4.809783,7.820486,3.168482,...,6.327385,5.238806,6.985267,4.417465,4.706791,3.764948,5.029754,2.984269,2.036512,4.586616
min,2003.0,0.0,1101.0,30.0,1101.0,11.0,0.0,9.0,26.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0
25%,2011.0,38.0,1261.0,66.0,1254.0,54.0,0.0,23.0,52.0,5.0,...,15.0,8.0,12.0,8.0,19.0,9.0,12.0,4.0,1.0,16.0
50%,2016.0,74.0,1413.0,74.0,1407.0,61.0,0.0,26.0,57.0,7.0,...,19.0,11.0,16.0,10.0,22.0,11.0,15.0,6.0,3.0,19.0
75%,2021.0,101.0,3245.0,81.0,3246.0,69.0,0.0,29.0,62.0,9.0,...,23.0,15.0,21.0,14.0,25.0,14.0,18.0,8.0,4.0,22.0
max,2025.0,132.0,3480.0,149.0,3480.0,144.0,6.0,58.0,113.0,30.0,...,80.0,48.0,65.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The x's will be the indexes of two team IDs and program IDs, the y's will include and indicator of who won at the game stats

## Predicting Stats
The first model we build will predict game statistics. There is more information encapsulated in the box scores than simply wins and losses so we will use this to build up embeddings that are predictive about how the games will play out

In [3]:
stats_train_loader, stats_validation_loader = dataset.train_test_data(output_stats=True, cache="stats", train_size=0.95)

Loading cached data


In [4]:
stats_model = model.StatsModel(program_embedding=512, team_embedding=512,
                               num_programs=len(dataset.programs), num_teams=len(dataset.teams),
                               model_sizes=(512,256), dropout=0.5).to(device)

In [5]:
model.train(stats_train_loader, stats_validation_loader, stats_model, name="stats_model", learning_rate=0.001, use_cache=True)

Loading from cache


## The Model
Now we use the embeddings developed in the first model to build a model to predict which team will win, which is the thing we are, in the end trying to do.

In [6]:
result_train_loader, result_validation_loader = dataset.train_test_data(output_stats=False, cache="result", train_size=0.95)

Loading cached data


In [7]:
result_model = model.Model(matchup=stats_model.matchup, model_sizes=(384,64), dropout=0.5).to(device)
result_model.matchup.freeze()

## Training the model

Train the model

In [8]:
model.train(result_train_loader, result_validation_loader, result_model, name="result_model", learning_rate=0.0001)

Epoch   0: Train Loss=0.24645815, Test Loss=0.24647877
Epoch   1: Train Loss=0.16389753, Test Loss=0.16502651
Epoch   2: Train Loss=0.16217688, Test Loss=0.16368773
Epoch   3: Train Loss=0.16268911, Test Loss=0.16427240
Epoch   4: Train Loss=0.16198963, Test Loss=0.16372778
Epoch   5: Train Loss=0.16061829, Test Loss=0.16257826
Epoch   6: Train Loss=0.16087948, Test Loss=0.16281307
Epoch   7: Train Loss=0.16183196, Test Loss=0.16368645
Epoch   8: Train Loss=0.16120144, Test Loss=0.16311883
Epoch   9: Train Loss=0.16078741, Test Loss=0.16269528
Epoch  10: Train Loss=0.16148565, Test Loss=0.16338576
Best Loss: 0.162578


In [9]:
model.print_results(result_validation_loader, result_model, label="Validation Results")

Validation Results: Accuracy=75.62, Loss=0.162578


With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [10]:
tourney_dataset = dataset.gen_dataset(dataset.tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=500, shuffle=True)

In [11]:
model.print_results(tourney_loader, result_model, label="Tournament Results")

Tournament Results: Accuracy=72.78, Loss=0.171288


When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

Train with early torney data

In [12]:
tourney_df = dataset.tourney[dataset.tourney.Season < 2021]

tourney_train_df, tourney_validation_df = train_test_split(tourney_df, train_size=0.8)
tourney_train_data = dataset.gen_dataset(tourney_train_df)
tourney_validation_data = dataset.gen_dataset(tourney_validation_df)

tourney_train_loader = DataLoader(tourney_train_data, batch_size=500)
tourney_validation_loader = DataLoader(tourney_validation_data, batch_size=500)

In [13]:
result_model.matchup.freeze()

In [14]:
model.train(tourney_train_loader, tourney_validation_loader, result_model, name="tuned_model", learning_rate=0.00001)

Epoch   0: Train Loss=0.16586681, Test Loss=0.18129527
Epoch   1: Train Loss=0.16581146, Test Loss=0.18121712
Epoch   2: Train Loss=0.16577924, Test Loss=0.18117880
Epoch   3: Train Loss=0.16575364, Test Loss=0.18114707
Epoch   4: Train Loss=0.16573700, Test Loss=0.18112787
Epoch   5: Train Loss=0.16572109, Test Loss=0.18110848
Epoch   6: Train Loss=0.16570259, Test Loss=0.18108504
Epoch   7: Train Loss=0.16568509, Test Loss=0.18106083
Epoch   8: Train Loss=0.16567214, Test Loss=0.18105192
Epoch   9: Train Loss=0.16566105, Test Loss=0.18104877
Epoch  10: Train Loss=0.16564726, Test Loss=0.18104728
Epoch  11: Train Loss=0.16563574, Test Loss=0.18103908
Epoch  12: Train Loss=0.16562064, Test Loss=0.18102608
Epoch  13: Train Loss=0.16560981, Test Loss=0.18101622
Epoch  14: Train Loss=0.16560472, Test Loss=0.18100452
Epoch  15: Train Loss=0.16559920, Test Loss=0.18099654
Epoch  16: Train Loss=0.16559600, Test Loss=0.18100029
Epoch  17: Train Loss=0.16558699, Test Loss=0.18099919
Epoch  18:

### Performance by year


In [15]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(year=season)
    model.print_results(loader, result_model, label=f"{season} Tournament")

2003 Tournament: Accuracy=72.66, Loss=0.174623
2004 Tournament: Accuracy=71.88, Loss=0.181018
2005 Tournament: Accuracy=75.00, Loss=0.175301
2006 Tournament: Accuracy=69.53, Loss=0.204250
2007 Tournament: Accuracy=76.56, Loss=0.172825
2008 Tournament: Accuracy=75.78, Loss=0.159102
2009 Tournament: Accuracy=75.78, Loss=0.164108
2010 Tournament: Accuracy=68.90, Loss=0.173332
2011 Tournament: Accuracy=70.77, Loss=0.181222
2012 Tournament: Accuracy=73.46, Loss=0.156405
2013 Tournament: Accuracy=69.62, Loss=0.181220
2014 Tournament: Accuracy=76.92, Loss=0.163540
2015 Tournament: Accuracy=79.23, Loss=0.147210
2016 Tournament: Accuracy=73.85, Loss=0.166702
2017 Tournament: Accuracy=76.92, Loss=0.154626
2018 Tournament: Accuracy=68.85, Loss=0.185407
2019 Tournament: Accuracy=75.77, Loss=0.152379
2021 Tournament: Accuracy=72.09, Loss=0.179424
2022 Tournament: Accuracy=71.27, Loss=0.175001
2023 Tournament: Accuracy=70.90, Loss=0.189781
2024 Tournament: Accuracy=71.64, Loss=0.165432


In [16]:
stage1_loader = dataset.tourney_data(after=2021)
model.print_results(stage1_loader, result_model, label=f"Stage 1")

Stage 1: Accuracy=71.47, Loss=0.177390


Breaking out by league

In [17]:
for season in dataset.tourney.Season.unique():
    for league in dataset.tourney[dataset.tourney.Season == season].League.unique():
        loader = dataset.tourney_data(year=season, league=league)
        model.print_results(loader, result_model, label=f"{season} {league} Tournament")

2003 M Tournament: Accuracy=72.66, Loss=0.174623
2004 M Tournament: Accuracy=71.88, Loss=0.181018
2005 M Tournament: Accuracy=75.00, Loss=0.175301
2006 M Tournament: Accuracy=69.53, Loss=0.204250
2007 M Tournament: Accuracy=76.56, Loss=0.172825
2008 M Tournament: Accuracy=75.78, Loss=0.159102
2009 M Tournament: Accuracy=75.78, Loss=0.164108
2010 M Tournament: Accuracy=67.19, Loss=0.196024
2010 W Tournament: Accuracy=70.63, Loss=0.150280
2011 M Tournament: Accuracy=64.18, Loss=0.224343
2011 W Tournament: Accuracy=77.78, Loss=0.135363
2012 M Tournament: Accuracy=65.67, Loss=0.192004
2012 W Tournament: Accuracy=81.75, Loss=0.118547
2013 M Tournament: Accuracy=61.19, Loss=0.210685
2013 W Tournament: Accuracy=78.57, Loss=0.149884
2014 M Tournament: Accuracy=70.15, Loss=0.202009
2014 W Tournament: Accuracy=84.13, Loss=0.122629
2015 M Tournament: Accuracy=75.37, Loss=0.167038
2015 W Tournament: Accuracy=83.33, Loss=0.126124
2016 M Tournament: Accuracy=71.64, Loss=0.185261
2016 W Tournament: A

## Inspect the model
First what are the sizes of the smallest input and output weights

In [18]:
print(f"Program embedding min: {result_model.matchup.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {result_model.matchup.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {result_model.fc3.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.366919
Team embedding min: 3.500687
FC min: 0.009287


Calculate the average gradient for each input feature

In [19]:
result_model.matchup.unfreeze()

In [20]:
program_weights, team_weights, stats_weights = model.feature_eval(result_model, tourney_loader)

In [21]:
program_weights.abs().sum().item(), team_weights.abs().sum().item()

(0.00022815859119873494, 0.00021092827955726534)

In [22]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")

Year:	-0.000010
Game:	0.000128
League:	-0.000848


## Save the model

In [23]:
torch.save(result_model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [24]:
moderated = model.ModeratedModel(result_model, 0.75)
model.print_results(stage1_loader, moderated, label="Moderated Model")

Moderated Model: Accuracy=71.47, Loss=0.181154


In [25]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(season)
    model.print_results(loader, moderated, label=f"{season} Tournament")

2003 Tournament: Accuracy=72.66, Loss=0.179916
2004 Tournament: Accuracy=71.88, Loss=0.186354
2005 Tournament: Accuracy=75.00, Loss=0.179608
2006 Tournament: Accuracy=69.53, Loss=0.204703
2007 Tournament: Accuracy=76.56, Loss=0.180054
2008 Tournament: Accuracy=75.78, Loss=0.169629
2009 Tournament: Accuracy=75.78, Loss=0.173000
2010 Tournament: Accuracy=68.90, Loss=0.177550
2011 Tournament: Accuracy=70.77, Loss=0.182567
2012 Tournament: Accuracy=73.46, Loss=0.166207
2013 Tournament: Accuracy=69.62, Loss=0.184319
2014 Tournament: Accuracy=76.92, Loss=0.170762
2015 Tournament: Accuracy=79.23, Loss=0.158054
2016 Tournament: Accuracy=73.85, Loss=0.172638
2017 Tournament: Accuracy=76.92, Loss=0.162714
2018 Tournament: Accuracy=68.85, Loss=0.185586
2019 Tournament: Accuracy=75.77, Loss=0.161370
2021 Tournament: Accuracy=72.09, Loss=0.181804
2022 Tournament: Accuracy=71.27, Loss=0.179606
2023 Tournament: Accuracy=70.90, Loss=0.191933
2024 Tournament: Accuracy=71.64, Loss=0.171298


It underperforms even in years where there were upsets.

## Dig into 2023 results

In [26]:
loader = dataset.tourney_data(2023)

x, y = loader.dataset.tensors

preds = result_model(x.to(device))

In [27]:
t_2023 = pd.DataFrame({'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual': y.reshape(-1),
                       'predicted': preds.cpu().detach().numpy().reshape(-1)}).iloc[:67]

In [28]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted
23,F Dickinson,Purdue,1192,1345,1.0,0.017767
15,Princeton,Arizona,1343,1112,1.0,0.105397
8,Furman,Virginia,1202,1438,1.0,0.200058
58,San Diego St,Alabama,1361,1104,1.0,0.22838
39,Princeton,Missouri,1343,1281,1.0,0.271892
53,FL Atlantic,Tennessee,1194,1397,1.0,0.288231
57,Miami FL,Houston,1274,1222,1.0,0.304308
50,Michigan St,Marquette,1277,1266,1.0,0.345838
14,Penn St,Texas A&M,1336,1401,1.0,0.359226
62,Miami FL,Texas,1274,1400,1.0,0.381015


The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a 1% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 13% and 17% chances of happening respectively.

In [29]:
t_2023['Upset'] = [dataset.upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [30]:
t_2023[t_2023.Upset].predicted.mean()

np.float64(0.3520376150629206)

On average the upsets had a 37% chance of happening

In [31]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
45,Creighton,Baylor,1166,1124,1.0,0.526578,True
49,Miami FL,Indiana,1274,1231,1.0,0.51471,True
60,Connecticut,Gonzaga,1163,1211,1.0,0.5121,True
24,FL Atlantic,Memphis,1194,1272,1.0,0.504377,True


I correctly predicted 4 upsets, though all were closely ranked

In [32]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
12,Missouri,Utah St,1281,1429,1.0,0.399621,False
63,San Diego St,Creighton,1361,1166,1.0,0.403161,False
48,Kansas St,Kentucky,1243,1246,1.0,0.460557,False
5,Arkansas,Illinois,1116,1228,1.0,0.461863,False
11,Maryland,West Virginia,1268,1452,1.0,0.463619,False
31,Michigan St,USC,1277,1425,1.0,0.483931,False
0,Pittsburgh,Mississippi St,1338,1280,1.0,0.491536,False


I also incorrectly predicted 5 upsets

Looking at all the tourneys

In [33]:
x, y = tourney_loader.dataset.tensors
preds = result_model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual': y.reshape([-1]),
                           'predicted': np.array(preds.tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual == 1.0]
tourney_df['Upset'] = [dataset.upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [34]:
len(tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5) & (tourney_df.season > 2020)].sort_values('predicted', ascending=False))

19

In [35]:
len(tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5) & (tourney_df.season > 2020)].sort_values('predicted'))

49

Overall I predicted 25 upsets correctly, and 36 incorrectly

## Predicting by seeds
What if I predict just using the seeds?

In [36]:
odds = dataset.odds_by_seed_diff(before=2021)

In [37]:
dataset.tourney_df(after=2021).SeedDiff.map(lambda x: odds[x]**2).mean()

np.float64(0.18527460145235355)

This results in a test Brier score of about 0.185.

## Using Elo
I can also compute Elo rankings

In [38]:
elo_model = elo.EloModel(dataset, k=50)
model.print_results(stage1_loader, elo_model, label="Elo Model")

Elo Model: Accuracy=73.45, Loss=0.178886


It is similar to our model

## Hybrid Model
Building a model using the neural net and seeds

In [39]:
seed_model = model.SeedModel(dataset)
model.print_results(stage1_loader, seed_model, label="Seed Model")
model.print_results(stage1_loader, elo_model, label="Elo Model")
model.print_results(stage1_loader, result_model, label="Base NN")

Seed Model: Accuracy=72.03, Loss=0.184022
Elo Model: Accuracy=73.45, Loss=0.178886
Base NN: Accuracy=71.47, Loss=0.177390


In [40]:
hybrid = model.HybridModel([result_model, seed_model, elo_model], [0.7, 0.2, 0.1])

model.print_results(stage1_loader, hybrid, label="Hybrid Model")

Hybrid Model: Accuracy=72.03, Loss=0.175847


They hybrid model outperforms all individual models

In [41]:
for season in range(2021, 2025):
    for league in ['M', 'W']:
        loader = dataset.tourney_data(year=season, league=league)
        model.print_results(loader, hybrid, label=f"{season} {league} Tournament")

2021 M Tournament: Accuracy=66.67, Loss=0.211632
2021 W Tournament: Accuracy=76.98, Loss=0.147820
2022 M Tournament: Accuracy=69.40, Loss=0.206912
2022 W Tournament: Accuracy=76.12, Loss=0.142186
2023 M Tournament: Accuracy=64.93, Loss=0.207645
2023 W Tournament: Accuracy=76.87, Loss=0.166644
2024 M Tournament: Accuracy=66.42, Loss=0.189656
2024 W Tournament: Accuracy=79.10, Loss=0.133139


## Generating the submission file
### Phase 2

Write the results

In [42]:
model.gen_submission(hybrid, dataset, fname="hybrid.csv")

In [43]:
model.gen_submission(result_model, dataset, fname="base.csv")

## Generate a bracket

With the hybrid model

In [44]:
season, league = 2025, 'M'

In [45]:
bracket = model.gen_bracket(dataset, season, league, hybrid).join(dataset.all_teams, on='Winner')[['Winner', 'TeamName', 'P']]
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(bracket)

      Winner        TeamName         P
Slot                                  
R1W1    1181            Duke  0.988825
R1W2    1104         Alabama  0.938463
R1W3    1458       Wisconsin  0.941850
R1W4    1112         Arizona  0.896375
R1W5    1332          Oregon  0.788635
R1W6    1140             BYU  0.519889
R1W7    1388    St Mary's CA  0.638715
R1W8    1124          Baylor  0.567744
R1X1    1222         Houston  0.981787
R1X2    1397       Tennessee  0.954737
R1X3    1246        Kentucky  0.895621
R1X4    1345          Purdue  0.855055
R1X5    1155         Clemson  0.826530
R1X6    1228        Illinois  0.630975
R1X7    1417            UCLA  0.626464
R1X8    1211         Gonzaga  0.660496
R1Y1    1120          Auburn  0.990251
R1Y2    1277     Michigan St  0.949246
R1Y3    1235         Iowa St  0.862782
R1Y4    1401       Texas A&M  0.743958
R1Y5    1276        Michigan  0.711028
R1Y6    1314  North Carolina  0.571387
R1Y7    1266       Marquette  0.597772
R1Y8    1257      Louisvi

In [46]:
model.tiebreaker(bracket, stats_model, dataset, season, league)

142.70235603997725

And with the base model

In [47]:
base_bracket = model.gen_bracket(dataset, 2025, 'M', result_model).join(dataset.all_teams, on='Winner')[['Winner', 'TeamName', 'P']]
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(base_bracket)

      Winner        TeamName         P
Slot                                  
R1W1    1181            Duke  0.990137
R1W2    1104         Alabama  0.934030
R1W3    1458       Wisconsin  0.948057
R1W4    1112         Arizona  0.916496
R1W5    1332          Oregon  0.797107
R1W6    1433             VCU  0.538308
R1W7    1388    St Mary's CA  0.621751
R1W8    1124          Baylor  0.602779
R1X1    1222         Houston  0.978942
R1X2    1397       Tennessee  0.953775
R1X3    1246        Kentucky  0.884303
R1X4    1345          Purdue  0.852976
R1X5    1155         Clemson  0.840869
R1X6    1228        Illinois  0.633706
R1X7    1417            UCLA  0.629732
R1X8    1211         Gonzaga  0.689040
R1Y1    1120          Auburn  0.991098
R1Y2    1277     Michigan St  0.946932
R1Y3    1235         Iowa St  0.833312
R1Y4    1401       Texas A&M  0.705587
R1Y5    1276        Michigan  0.690578
R1Y6    1314  North Carolina  0.640262
R1Y7    1266       Marquette  0.583999
R1Y8    1257      Louisvi

In [48]:
model.tiebreaker(base_bracket, stats_model, dataset, season, league)

142.70235603997725