# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os
from sklearn.model_selection import train_test_split
import random
from data import Data, STATS_COLUMNS
from model import *

torch.manual_seed(20250222)
random.seed(20250222)

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
dataset = Data()

dataset.games.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,...,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0
mean,2015.470621,69.843291,2099.847868,74.183169,2097.450588,61.187026,0.061787,26.176339,57.063405,6.912005,...,19.248818,11.436922,16.826656,10.826832,21.949363,11.217125,15.21463,6.453946,2.848942,18.853504
std,6.024751,35.933736,986.382716,11.406085,989.676138,11.373007,0.287403,4.811306,7.828931,3.16658,...,6.325219,5.239163,6.987616,4.418293,4.708807,3.765042,5.028571,2.985335,2.037092,4.587468
min,2003.0,0.0,1101.0,30.0,1101.0,11.0,0.0,9.0,26.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0
25%,2011.0,37.0,1260.0,66.0,1253.0,53.0,0.0,23.0,52.0,5.0,...,15.0,8.0,12.0,8.0,19.0,9.0,12.0,4.0,1.0,16.0
50%,2016.0,73.0,1413.0,74.0,1407.0,61.0,0.0,26.0,57.0,7.0,...,19.0,11.0,16.0,10.0,22.0,11.0,15.0,6.0,3.0,19.0
75%,2020.0,101.0,3244.0,81.0,3245.0,69.0,0.0,29.0,62.0,9.0,...,23.0,15.0,21.0,14.0,25.0,14.0,18.0,8.0,4.0,22.0
max,2025.0,132.0,3480.0,149.0,3480.0,144.0,6.0,58.0,113.0,30.0,...,80.0,48.0,65.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The x's will be the indexes of two team IDs and program IDs, the y's will include and indicator of who won at the game stats

In [3]:
train_loader, validation_loader = dataset.train_test_data()

Loading cached data


## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [4]:
model = Model(embedding_sizes=[16, 2048], model_sizes=(128,128), dropout=0.1, dataset=dataset).to(device)

## Training the model

Train the model

In [5]:
train(train_loader, validation_loader, model, learning_rate=0.0005)

Initial: Accuracy: 50.00%, Stats loss: 796.786234 Result loss: 0.290508
Epoch 0
Train: Accuracy: 62.80%, Stats loss: 41.190527 Result loss: 0.222592
Test: Accuracy: 62.84%, Stats loss: 40.886882 Result loss: 0.222199
Epoch 1
Train: Accuracy: 67.15%, Stats loss: 41.419481 Result loss: 0.204245
Test: Accuracy: 67.00%, Stats loss: 41.238840 Result loss: 0.204092
Epoch 2
Train: Accuracy: 66.90%, Stats loss: 40.021307 Result loss: 0.204525
Test: Accuracy: 66.90%, Stats loss: 40.054462 Result loss: 0.204485
Epoch 3
Train: Accuracy: 66.58%, Stats loss: 47.653535 Result loss: 0.205660
Test: Accuracy: 66.65%, Stats loss: 47.649410 Result loss: 0.205361
Epoch 4
Train: Accuracy: 67.21%, Stats loss: 47.443626 Result loss: 0.203406
Test: Accuracy: 66.91%, Stats loss: 47.577053 Result loss: 0.203691
Epoch 5
Train: Accuracy: 67.36%, Stats loss: 48.851467 Result loss: 0.202474
Test: Accuracy: 67.23%, Stats loss: 49.187479 Result loss: 0.202332
Epoch 6
Train: Accuracy: 67.44%, Stats loss: 49.761065 Res

Fine tune with only the result

In [6]:
train(train_loader, validation_loader, model, learning_rate=0.0001, full_loss=False)

Initial: Accuracy: 73.69%, Stats loss: 43.135108 Result loss: 0.173434
Epoch 0
Train: Accuracy: 76.59%, Stats loss: 112.084718 Result loss: 0.156642
Test: Accuracy: 73.80%, Stats loss: 113.076810 Result loss: 0.173338
Epoch 1
Train: Accuracy: 76.77%, Stats loss: 168.187405 Result loss: 0.155803
Test: Accuracy: 73.71%, Stats loss: 168.907313 Result loss: 0.173208
Epoch 2
Train: Accuracy: 76.88%, Stats loss: 219.522373 Result loss: 0.155272
Test: Accuracy: 73.52%, Stats loss: 220.029674 Result loss: 0.173545
Epoch 3
Train: Accuracy: 77.01%, Stats loss: 271.363147 Result loss: 0.154431
Test: Accuracy: 73.63%, Stats loss: 271.771777 Result loss: 0.174077
Epoch 4
Train: Accuracy: 77.12%, Stats loss: 297.926009 Result loss: 0.154052
Test: Accuracy: 73.70%, Stats loss: 298.290706 Result loss: 0.173789
Epoch 5
Train: Accuracy: 77.25%, Stats loss: 331.561792 Result loss: 0.153179
Test: Accuracy: 73.68%, Stats loss: 331.729578 Result loss: 0.173879
Epoch 6
Train: Accuracy: 77.33%, Stats loss: 34

With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [7]:
tourney_dataset = dataset.gen_dataset(dataset.tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=500, shuffle=True)

In [8]:
test(tourney_loader, model, device, label="Tourney")

Tourney: Accuracy: 73.70%, Stats loss: 172.518689 Result loss: 0.170433


0.17043262693136313

When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

Train with early torney data

In [9]:
tourney_df = dataset.tourney[dataset.tourney.Season < 2021]

tourney_train_df, tourney_validation_df = train_test_split(tourney_df, train_size=0.8)
tourney_train_data = dataset.gen_dataset(tourney_train_df)
tourney_validation_data = dataset.gen_dataset(tourney_validation_df)

tourney_train_loader = DataLoader(tourney_train_data, batch_size=500)
tourney_validation_loader = DataLoader(tourney_validation_data, batch_size=500)

In [10]:
for param in model.team_embedding.parameters():
    param.requires_grad=False
for param in model.program_embedding.parameters():
    param.requires_grad=False

In [11]:
train(tourney_train_loader, tourney_validation_loader, model, learning_rate=0.0001, full_loss=False)

Initial: Accuracy: 72.06%, Stats loss: 168.088686 Result loss: 0.173256
Epoch 0
Train: Accuracy: 73.28%, Stats loss: 179.410581 Result loss: 0.170822
Test: Accuracy: 71.92%, Stats loss: 171.577583 Result loss: 0.177126
Epoch 1
Train: Accuracy: 73.64%, Stats loss: 181.078691 Result loss: 0.168291
Test: Accuracy: 72.06%, Stats loss: 173.218670 Result loss: 0.175324
Epoch 2
Train: Accuracy: 74.79%, Stats loss: 181.523042 Result loss: 0.165711
Test: Accuracy: 71.92%, Stats loss: 173.667225 Result loss: 0.173508
Epoch 3
Train: Accuracy: 75.00%, Stats loss: 181.505157 Result loss: 0.164447
Test: Accuracy: 72.21%, Stats loss: 173.659497 Result loss: 0.172923
Epoch 4
Train: Accuracy: 75.25%, Stats loss: 181.321761 Result loss: 0.164002
Test: Accuracy: 72.35%, Stats loss: 173.489685 Result loss: 0.173019
Epoch 5
Train: Accuracy: 75.64%, Stats loss: 181.277495 Result loss: 0.163787
Test: Accuracy: 72.49%, Stats loss: 173.455593 Result loss: 0.173312
Epoch 6
Train: Accuracy: 75.79%, Stats loss: 1

### Performance by year


In [12]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(year=season)
    test(loader, model, device, label=f"{season} Tournament")

2003 Tournament: Accuracy: 75.00%, Stats loss: 187.536896 Result loss: 0.177186
2004 Tournament: Accuracy: 64.84%, Stats loss: 180.405032 Result loss: 0.187859
2005 Tournament: Accuracy: 77.34%, Stats loss: 186.615882 Result loss: 0.165995
2006 Tournament: Accuracy: 68.75%, Stats loss: 178.120440 Result loss: 0.208336
2007 Tournament: Accuracy: 75.78%, Stats loss: 187.861479 Result loss: 0.159984
2008 Tournament: Accuracy: 77.34%, Stats loss: 186.278016 Result loss: 0.159429
2009 Tournament: Accuracy: 71.09%, Stats loss: 189.830288 Result loss: 0.168506
2010 Tournament: Accuracy: 75.20%, Stats loss: 174.879282 Result loss: 0.167447
2011 Tournament: Accuracy: 70.00%, Stats loss: 168.158943 Result loss: 0.172318
2012 Tournament: Accuracy: 77.69%, Stats loss: 169.856426 Result loss: 0.152798
2013 Tournament: Accuracy: 71.15%, Stats loss: 173.461048 Result loss: 0.182032
2014 Tournament: Accuracy: 76.54%, Stats loss: 170.790335 Result loss: 0.157908
2015 Tournament: Accuracy: 77.69%, Stats

In [13]:
stage1_loader = dataset.tourney_data(after=2021)
test(stage1_loader, model, device=device, label=f"Stage 1")

Stage 1: Accuracy: 72.32%, Stats loss: 172.305863 Result loss: 0.177956


0.17795590977595196

Breaking out by league

In [14]:
for season in dataset.tourney.Season.unique():
    for league in dataset.tourney[dataset.tourney.Season == season].League.unique():
        loader = dataset.tourney_data(year=season, league=league)
        test(loader, model, device, label=f"{season} {league} Tournament")

2003 M Tournament: Accuracy: 75.00%, Stats loss: 187.536896 Result loss: 0.177186
2004 M Tournament: Accuracy: 64.84%, Stats loss: 180.405032 Result loss: 0.187859
2005 M Tournament: Accuracy: 77.34%, Stats loss: 186.615882 Result loss: 0.165995
2006 M Tournament: Accuracy: 68.75%, Stats loss: 178.120440 Result loss: 0.208336
2007 M Tournament: Accuracy: 75.78%, Stats loss: 187.861479 Result loss: 0.159984
2008 M Tournament: Accuracy: 77.34%, Stats loss: 186.278016 Result loss: 0.159429
2009 M Tournament: Accuracy: 71.09%, Stats loss: 189.830288 Result loss: 0.168506
2010 M Tournament: Accuracy: 75.00%, Stats loss: 178.063221 Result loss: 0.190535
2010 W Tournament: Accuracy: 75.40%, Stats loss: 171.644804 Result loss: 0.143992
2011 M Tournament: Accuracy: 64.93%, Stats loss: 169.489887 Result loss: 0.218611
2011 W Tournament: Accuracy: 75.40%, Stats loss: 166.743494 Result loss: 0.123085
2012 M Tournament: Accuracy: 73.13%, Stats loss: 162.140174 Result loss: 0.186295
2012 W Tournamen

## Inspect the model
First what are the sizes of the smallest input and output weights

In [15]:
print(f"Program embedding min: {model.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {model.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {model.result_fc.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.502787
Team embedding min: 3.764464
FC min: 0.000026


Calculate the average gradient for each input feature

In [16]:
for param in model.team_embedding.parameters():
    param.requires_grad=True
for param in model.program_embedding.parameters():
    param.requires_grad=True

In [17]:
program_weights, team_weights, stats_weights = feature_eval(model, tourney_loader)

In [18]:
program_weights.abs().sum().item(), team_weights.abs().sum().item()

(0.018737902864813805, 0.005341209005564451)

In [19]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")

Year:	0.034320
Game:	-0.031676
League:	-0.078706


## Generating the submission file
### Phase 2

Write the results

In [20]:
odds = model_odds(dataset, 2025, 'M', model)

In [21]:
gen_submission(model, dataset)

## Save the model

In [22]:
torch.save(model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [23]:
moderated = ModeratedModel(model, 0.75)

In [24]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(season)
    test(loader, moderated, label=f"{season} Tournament")

2003 Tournament: Accuracy: 75.00%, Stats loss: 301.376594 Result loss: 0.177186
2004 Tournament: Accuracy: 64.84%, Stats loss: 290.868257 Result loss: 0.187859
2005 Tournament: Accuracy: 77.34%, Stats loss: 297.318479 Result loss: 0.165995
2006 Tournament: Accuracy: 68.75%, Stats loss: 283.473422 Result loss: 0.208336
2007 Tournament: Accuracy: 75.78%, Stats loss: 297.432305 Result loss: 0.159984
2008 Tournament: Accuracy: 77.34%, Stats loss: 296.645031 Result loss: 0.159429
2009 Tournament: Accuracy: 71.09%, Stats loss: 303.969298 Result loss: 0.168506
2010 Tournament: Accuracy: 75.20%, Stats loss: 282.131481 Result loss: 0.167447
2011 Tournament: Accuracy: 70.00%, Stats loss: 273.145139 Result loss: 0.172318
2012 Tournament: Accuracy: 77.69%, Stats loss: 272.809321 Result loss: 0.152798
2013 Tournament: Accuracy: 71.15%, Stats loss: 277.463082 Result loss: 0.182032
2014 Tournament: Accuracy: 76.54%, Stats loss: 278.978014 Result loss: 0.157908
2015 Tournament: Accuracy: 77.69%, Stats

## Dig into 2023 results

In [25]:
loader = dataset.tourney_data(2023)

x, y = loader.dataset.tensors

preds = model(x.to(device))

In [26]:
t_2023 = pd.DataFrame({'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual': y[:,0].reshape([-1]),
                       'predicted': np.array(preds[0].tolist()).reshape([-1])}).iloc[:67]

In [27]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted
23,F Dickinson,Purdue,1192,1345,1.0,0.003878
15,Princeton,Arizona,1343,1112,1.0,0.073796
8,Furman,Virginia,1202,1438,1.0,0.120435
53,FL Atlantic,Tennessee,1194,1397,1.0,0.136621
39,Princeton,Missouri,1343,1281,1.0,0.215313
57,Miami FL,Houston,1274,1222,1.0,0.22985
37,Arkansas,Kansas,1116,1242,1.0,0.284695
58,San Diego St,Alabama,1361,1104,1.0,0.312686
61,FL Atlantic,Kansas St,1194,1243,1.0,0.332678
5,Arkansas,Illinois,1116,1228,1.0,0.346291


The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a .4% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 7% and 15% chances of happening respectively.

In [28]:
t_2023['Upset'] = [dataset.upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [29]:
t_2023[t_2023.Upset].predicted.mean()

np.float64(0.323662156589116)

On average the upsets had a 32% chance of happening

In [30]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
60,Connecticut,Gonzaga,1163,1211,1.0,0.666928,True
14,Penn St,Texas A&M,1336,1401,1.0,0.586965,True


I correctly predicted 2 upsets, though all were closely ranked

In [31]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
5,Arkansas,Illinois,1116,1228,1.0,0.346291,False
48,Kansas St,Kentucky,1243,1246,1.0,0.378901,False
0,Pittsburgh,Mississippi St,1338,1280,1.0,0.388889,False
63,San Diego St,Creighton,1361,1166,1.0,0.415698,False
11,Maryland,West Virginia,1268,1452,1.0,0.456647,False
12,Missouri,Utah St,1281,1429,1.0,0.499771,False


I also incorrectly predicted 4 upsets

Looking at all the tourneys

In [32]:
x, y = tourney_loader.dataset.tensors
preds = model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual': y[:,0].reshape([-1]),
                           'predicted': np.array(preds[0].tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual == 1.0]
tourney_df['Upset'] = [dataset.upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [33]:
len(tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5) & (tourney_df.season > 2020)].sort_values('predicted', ascending=False))

25

In [34]:
len(tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5) & (tourney_df.season > 2020)].sort_values('predicted'))

40

Overall I predicted 19 upsets correctly, and 30 incorrectly

## Predicting by seeds
What if I predict just using the seeds?

In [35]:
odds = dataset.odds_by_seed_diff(before=2021)

In [36]:
dataset.tourney_df(after=2021).SeedDiff.map(lambda x: odds[x]**2).mean()

np.float64(0.18527460145235355)

This results in a test Brier score of about 0.185.

## Hybrid Model
Building a model using the neural net and seeds

In [37]:
seed_model = SeedModel(dataset, after=2021)
test(stage1_loader, seed_model, label=f"Seeds")

Seeds: Accuracy: 72.22%, Stats loss: 821.501009 Result loss: 0.182851


0.1828505115610905

In [38]:
test(stage1_loader, model, label="NN")

NN: Accuracy: 72.32%, Stats loss: 172.305863 Result loss: 0.177956


0.17795590977595196

In [39]:
hybrid = HybridModel([model, seed_model], [0.8, 0.2])

In [40]:
test(stage1_loader, hybrid, label=f"Hybrid")

Hybrid: Accuracy: 72.79%, Stats loss: 821.501009 Result loss: 0.176025


0.17602514206476164

They hybrid model outperforms both individual models

In [41]:
for season in range(2021, 2025):
    for league in ['M', 'W']:
        loader = dataset.tourney_data(season, league)
        test(loader, hybrid, label=f"{season} {league} Tournament")

2021 M Tournament: Accuracy: 70.54%, Stats loss: 799.078627 Result loss: 0.182122
2021 W Tournament: Accuracy: 70.54%, Stats loss: 799.078627 Result loss: 0.182122
2022 M Tournament: Accuracy: 75.00%, Stats loss: 813.262527 Result loss: 0.174047
2022 W Tournament: Accuracy: 75.00%, Stats loss: 813.262527 Result loss: 0.174047
2023 M Tournament: Accuracy: 70.15%, Stats loss: 815.193230 Result loss: 0.189847
2023 W Tournament: Accuracy: 70.15%, Stats loss: 815.193230 Result loss: 0.189847
2024 M Tournament: Accuracy: 75.37%, Stats loss: 857.632996 Result loss: 0.158311
2024 W Tournament: Accuracy: 75.37%, Stats loss: 857.632996 Result loss: 0.158311


## Generate a bracket

In [42]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(gen_bracket(dataset, 2024, 'M', hybrid).join(dataset.all_teams, on='Winner')[['Winner', 'TeamName']])

      Winner        TeamName
Slot                        
R1W1    1163     Connecticut
R1W2    1235         Iowa St
R1W3    1228        Illinois
R1W4    1120          Auburn
R1W5    1361    San Diego St
R1W6    1140             BYU
R1W7    1450   Washington St
R1W8    1321    Northwestern
R1X1    1314  North Carolina
R1X2    1112         Arizona
R1X3    1124          Baylor
R1X4    1104         Alabama
R1X5    1388    St Mary's CA
R1X6    1155         Clemson
R1X7    1173          Dayton
R1X8    1277     Michigan St
R1Y1    1345          Purdue
R1Y2    1397       Tennessee
R1Y3    1166       Creighton
R1Y4    1242          Kansas
R1Y5    1211         Gonzaga
R1Y6    1332          Oregon
R1Y7    1438        Virginia
R1Y8    1395             TCU
R1Z1    1222         Houston
R1Z2    1266       Marquette
R1Z3    1246        Kentucky
R1Z4    1181            Duke
R1Z5    1458       Wisconsin
R1Z6    1403      Texas Tech
R1Z7    1196         Florida
R1Z8    1304        Nebraska
R2W1    1163  