# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os
from sklearn.model_selection import train_test_split
import random
from data import Data, STATS_COLUMNS
from model import *

torch.manual_seed(20250222)
random.seed(20250222)

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
dataset = Data()

dataset.games.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,...,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0
mean,2015.470621,69.843291,2099.847868,74.183169,2097.450588,61.187026,0.061787,26.176339,57.063405,6.912005,...,19.248818,11.436922,16.826656,10.826832,21.949363,11.217125,15.21463,6.453946,2.848942,18.853504
std,6.024751,35.933736,986.382716,11.406085,989.676138,11.373007,0.287403,4.811306,7.828931,3.16658,...,6.325219,5.239163,6.987616,4.418293,4.708807,3.765042,5.028571,2.985335,2.037092,4.587468
min,2003.0,0.0,1101.0,30.0,1101.0,11.0,0.0,9.0,26.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0
25%,2011.0,37.0,1260.0,66.0,1253.0,53.0,0.0,23.0,52.0,5.0,...,15.0,8.0,12.0,8.0,19.0,9.0,12.0,4.0,1.0,16.0
50%,2016.0,73.0,1413.0,74.0,1407.0,61.0,0.0,26.0,57.0,7.0,...,19.0,11.0,16.0,10.0,22.0,11.0,15.0,6.0,3.0,19.0
75%,2020.0,101.0,3244.0,81.0,3245.0,69.0,0.0,29.0,62.0,9.0,...,23.0,15.0,21.0,14.0,25.0,14.0,18.0,8.0,4.0,22.0
max,2025.0,132.0,3480.0,149.0,3480.0,144.0,6.0,58.0,113.0,30.0,...,80.0,48.0,65.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The x's will be the indexes of two team IDs and program IDs, the y's will include and indicator of who won at the game stats

In [3]:
train_loader, validation_loader = dataset.train_test_data()

Loading cached data


## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [4]:
model = Model(embedding_sizes=[32, 1024], model_sizes=(256,256), dropout=0.5, dataset=dataset).to(device)

## Training the model

Train the model

In [5]:
train(train_loader, validation_loader, model, streak=10, learning_rate=0.001)

Initial: Accuracy: 49.97%, Stats loss: 779.717531 Result loss: 0.251805
Epoch 0
Train: Accuracy: 50.04%, Stats loss: 332.950312 Result loss: 0.249938
Test: Accuracy: 50.09%, Stats loss: 331.385604 Result loss: 0.249928
Epoch 1
Train: Accuracy: 52.84%, Stats loss: 318.473992 Result loss: 0.249037
Test: Accuracy: 52.65%, Stats loss: 316.938637 Result loss: 0.249111
Epoch 2
Train: Accuracy: 51.73%, Stats loss: 272.182236 Result loss: 0.246814
Test: Accuracy: 51.61%, Stats loss: 270.828944 Result loss: 0.247011
Epoch 3
Train: Accuracy: 63.56%, Stats loss: 226.443790 Result loss: 0.236886
Test: Accuracy: 63.44%, Stats loss: 225.220446 Result loss: 0.236973
Epoch 4
Train: Accuracy: 65.63%, Stats loss: 117.833823 Result loss: 0.224032
Test: Accuracy: 65.73%, Stats loss: 117.058685 Result loss: 0.224130
Epoch 5
Train: Accuracy: 64.78%, Stats loss: 37.641530 Result loss: 0.219517
Test: Accuracy: 64.89%, Stats loss: 37.468539 Result loss: 0.219800
Epoch 6
Train: Accuracy: 65.76%, Stats loss: 36.

Fine tune with only the result

In [6]:
train(train_loader, validation_loader, model, learning_rate=0.0001, full_loss=False)

Initial: Accuracy: 73.74%, Stats loss: 30.818408 Result loss: 0.175855
Epoch 0
Train: Accuracy: 75.71%, Stats loss: 31.431244 Result loss: 0.167611
Test: Accuracy: 73.80%, Stats loss: 32.049383 Result loss: 0.176447
Epoch 1
Train: Accuracy: 75.74%, Stats loss: 32.496431 Result loss: 0.166384
Test: Accuracy: 73.78%, Stats loss: 33.129390 Result loss: 0.175567
Epoch 2
Train: Accuracy: 75.77%, Stats loss: 34.107860 Result loss: 0.167503
Test: Accuracy: 73.67%, Stats loss: 34.765274 Result loss: 0.176390
Epoch 3
Train: Accuracy: 75.70%, Stats loss: 35.136596 Result loss: 0.165988
Test: Accuracy: 73.62%, Stats loss: 35.791058 Result loss: 0.175521
Epoch 4
Train: Accuracy: 75.84%, Stats loss: 36.935512 Result loss: 0.164271
Test: Accuracy: 73.62%, Stats loss: 37.619871 Result loss: 0.174215
Epoch 5
Train: Accuracy: 75.98%, Stats loss: 37.586101 Result loss: 0.164268
Test: Accuracy: 73.94%, Stats loss: 38.257915 Result loss: 0.174189
Epoch 6
Train: Accuracy: 75.79%, Stats loss: 39.113754 Resu

With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [7]:
tourney_dataset = dataset.gen_dataset(dataset.tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=500, shuffle=True)

In [8]:
test(tourney_loader, model, device, label="Tourney")

Tourney: Accuracy: 74.43%, Stats loss: 53.046376 Result loss: 0.170006


0.170006353825528

When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

Train with early torney data

In [9]:
tourney_df = dataset.tourney[dataset.tourney.Season < 2021]

tourney_train_df, tourney_validation_df = train_test_split(tourney_df, train_size=0.8)
tourney_train_data = dataset.gen_dataset(tourney_train_df)
tourney_validation_data = dataset.gen_dataset(tourney_validation_df)

tourney_train_loader = DataLoader(tourney_train_data, batch_size=500)
tourney_validation_loader = DataLoader(tourney_validation_data, batch_size=500)

In [10]:
for param in model.team_embedding.parameters():
    param.requires_grad=False
for param in model.program_embedding.parameters():
    param.requires_grad=False

In [11]:
train(tourney_train_loader, tourney_validation_loader, model, learning_rate=0.0001, full_loss=False)

Initial: Accuracy: 72.78%, Stats loss: 50.980049 Result loss: 0.176675
Epoch 0
Train: Accuracy: 74.32%, Stats loss: 54.820784 Result loss: 0.167484
Test: Accuracy: 71.06%, Stats loss: 51.735402 Result loss: 0.178341
Epoch 1
Train: Accuracy: 74.36%, Stats loss: 54.400267 Result loss: 0.166984
Test: Accuracy: 71.63%, Stats loss: 51.347660 Result loss: 0.177814
Epoch 2
Train: Accuracy: 75.04%, Stats loss: 53.846538 Result loss: 0.166395
Test: Accuracy: 72.49%, Stats loss: 50.835494 Result loss: 0.177212
Epoch 3
Train: Accuracy: 75.14%, Stats loss: 53.425259 Result loss: 0.166242
Test: Accuracy: 72.49%, Stats loss: 50.445406 Result loss: 0.177014
Epoch 4
Train: Accuracy: 75.36%, Stats loss: 53.082672 Result loss: 0.166420
Test: Accuracy: 72.21%, Stats loss: 50.135898 Result loss: 0.177196
Best Loss: 0.176675


### Performance by year


In [12]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(year=season)
    test(loader, model, device, label=f"{season} Tournament")

2003 Tournament: Accuracy: 73.44%, Stats loss: 41.515386 Result loss: 0.175513
2004 Tournament: Accuracy: 67.97%, Stats loss: 42.141219 Result loss: 0.193112
2005 Tournament: Accuracy: 77.34%, Stats loss: 49.714191 Result loss: 0.170388
2006 Tournament: Accuracy: 68.75%, Stats loss: 44.408278 Result loss: 0.203433
2007 Tournament: Accuracy: 76.56%, Stats loss: 41.584393 Result loss: 0.169004
2008 Tournament: Accuracy: 79.69%, Stats loss: 41.974294 Result loss: 0.167370
2009 Tournament: Accuracy: 75.78%, Stats loss: 38.860264 Result loss: 0.160685
2010 Tournament: Accuracy: 72.44%, Stats loss: 55.417293 Result loss: 0.170627
2011 Tournament: Accuracy: 72.69%, Stats loss: 59.221989 Result loss: 0.171243
2012 Tournament: Accuracy: 77.69%, Stats loss: 52.910825 Result loss: 0.152537
2013 Tournament: Accuracy: 73.08%, Stats loss: 56.071231 Result loss: 0.180318
2014 Tournament: Accuracy: 74.62%, Stats loss: 56.349579 Result loss: 0.167210
2015 Tournament: Accuracy: 80.38%, Stats loss: 65.25

In [13]:
stage1_loader = dataset.tourney_data(after=2021)
test(stage1_loader, model, device=device, label=f"Stage 1")

Stage 1: Accuracy: 72.98%, Stats loss: 51.808393 Result loss: 0.176454


0.17645428783175324

Breaking out by league

In [14]:
for season in dataset.tourney.Season.unique():
    for league in dataset.tourney[dataset.tourney.Season == season].League.unique():
        loader = dataset.tourney_data(year=season, league=league)
        test(loader, model, device, label=f"{season} {league} Tournament")

2003 M Tournament: Accuracy: 73.44%, Stats loss: 41.515386 Result loss: 0.175513
2004 M Tournament: Accuracy: 67.97%, Stats loss: 42.141219 Result loss: 0.193112
2005 M Tournament: Accuracy: 77.34%, Stats loss: 49.714191 Result loss: 0.170388
2006 M Tournament: Accuracy: 68.75%, Stats loss: 44.408278 Result loss: 0.203433
2007 M Tournament: Accuracy: 76.56%, Stats loss: 41.584393 Result loss: 0.169004
2008 M Tournament: Accuracy: 79.69%, Stats loss: 41.974294 Result loss: 0.167370
2009 M Tournament: Accuracy: 75.78%, Stats loss: 38.860264 Result loss: 0.160685
2010 M Tournament: Accuracy: 67.97%, Stats loss: 41.640482 Result loss: 0.199143
2010 W Tournament: Accuracy: 76.98%, Stats loss: 69.412784 Result loss: 0.141658
2011 M Tournament: Accuracy: 67.16%, Stats loss: 41.044958 Result loss: 0.212341
2011 W Tournament: Accuracy: 78.57%, Stats loss: 78.553117 Result loss: 0.127536
2012 M Tournament: Accuracy: 70.90%, Stats loss: 33.141167 Result loss: 0.185668
2012 W Tournament: Accuracy:

## Inspect the model
First what are the sizes of the smallest input and output weights

In [15]:
print(f"Program embedding min: {model.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {model.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {model.result_fc.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.469010
Team embedding min: 3.898871
FC min: 0.000035


Calculate the average gradient for each input feature

In [16]:
for param in model.team_embedding.parameters():
    param.requires_grad=True
for param in model.program_embedding.parameters():
    param.requires_grad=True

In [17]:
program_weights, team_weights, stats_weights = feature_eval(model, tourney_loader)

In [18]:
program_weights.abs().sum().item(), team_weights.abs().sum().item()

(0.00707292091101408, 0.002075385767966509)

In [19]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")

Year:	-0.000641
Game:	-0.001509
League:	0.161647


## Generating the submission file
### Phase 2

Write the results

In [20]:
odds = model_odds(dataset, 2025, 'M', model)

In [21]:
gen_submission(model, dataset)

## Save the model

In [22]:
torch.save(model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [23]:
moderated = ModeratedModel(model, 0.75)

In [24]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(season)
    test(loader, moderated, label=f"{season} Tournament")

2003 Tournament: Accuracy: 73.44%, Stats loss: 99.852550 Result loss: 0.175513
2004 Tournament: Accuracy: 67.97%, Stats loss: 92.603881 Result loss: 0.193112
2005 Tournament: Accuracy: 77.34%, Stats loss: 101.214382 Result loss: 0.170388
2006 Tournament: Accuracy: 68.75%, Stats loss: 95.639798 Result loss: 0.203433
2007 Tournament: Accuracy: 76.56%, Stats loss: 99.113837 Result loss: 0.169004
2008 Tournament: Accuracy: 79.69%, Stats loss: 94.479729 Result loss: 0.167370
2009 Tournament: Accuracy: 75.78%, Stats loss: 98.087652 Result loss: 0.160685
2010 Tournament: Accuracy: 72.44%, Stats loss: 93.392190 Result loss: 0.170627
2011 Tournament: Accuracy: 72.69%, Stats loss: 92.231896 Result loss: 0.171243
2012 Tournament: Accuracy: 77.69%, Stats loss: 89.471828 Result loss: 0.152537
2013 Tournament: Accuracy: 73.08%, Stats loss: 91.853646 Result loss: 0.180318
2014 Tournament: Accuracy: 74.62%, Stats loss: 96.466534 Result loss: 0.167210
2015 Tournament: Accuracy: 80.38%, Stats loss: 101.

## Dig into 2023 results

In [25]:
loader = dataset.tourney_data(2023)

x, y = loader.dataset.tensors

preds = model(x.to(device))

In [26]:
t_2023 = pd.DataFrame({'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual': y[:,0].reshape([-1]),
                       'predicted': np.array(preds[0].tolist()).reshape([-1])}).iloc[:67]

In [27]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted
23,F Dickinson,Purdue,1192,1345,1.0,0.052146
15,Princeton,Arizona,1343,1112,1.0,0.154607
8,Furman,Virginia,1202,1438,1.0,0.154886
53,FL Atlantic,Tennessee,1194,1397,1.0,0.282018
39,Princeton,Missouri,1343,1281,1.0,0.28648
37,Arkansas,Kansas,1116,1242,1.0,0.310562
58,San Diego St,Alabama,1361,1104,1.0,0.313391
57,Miami FL,Houston,1274,1222,1.0,0.360942
63,San Diego St,Creighton,1361,1166,1.0,0.370548
32,Pittsburgh,Iowa St,1338,1235,1.0,0.391482


The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a .4% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 7% and 15% chances of happening respectively.

In [28]:
t_2023['Upset'] = [dataset.upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [29]:
t_2023[t_2023.Upset].predicted.mean()

np.float64(0.3718946044226497)

On average the upsets had a 32% chance of happening

In [30]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
45,Creighton,Baylor,1166,1124,1.0,0.539444,True
60,Connecticut,Gonzaga,1163,1211,1.0,0.538088,True


I correctly predicted 2 upsets, though all were closely ranked

In [31]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
63,San Diego St,Creighton,1361,1166,1.0,0.370548,False
5,Arkansas,Illinois,1116,1228,1.0,0.457721,False
12,Missouri,Utah St,1281,1429,1.0,0.472966,False
59,Texas,Xavier,1400,1462,1.0,0.475819,False
48,Kansas St,Kentucky,1243,1246,1.0,0.479989,False
11,Maryland,West Virginia,1268,1452,1.0,0.481532,False
28,Kentucky,Providence,1246,1344,1.0,0.485881,False


I also incorrectly predicted 4 upsets

Looking at all the tourneys

In [32]:
x, y = tourney_loader.dataset.tensors
preds = model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual': y[:,0].reshape([-1]),
                           'predicted': np.array(preds[0].tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual == 1.0]
tourney_df['Upset'] = [dataset.upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [33]:
len(tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5) & (tourney_df.season > 2020)].sort_values('predicted', ascending=False))

24

In [34]:
len(tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5) & (tourney_df.season > 2020)].sort_values('predicted'))

42

Overall I predicted 19 upsets correctly, and 30 incorrectly

## Predicting by seeds
What if I predict just using the seeds?

In [35]:
odds = dataset.odds_by_seed_diff(before=2021)

In [36]:
dataset.tourney_df(after=2021).SeedDiff.map(lambda x: odds[x]**2).mean()

np.float64(0.18527460145235355)

This results in a test Brier score of about 0.185.

## Hybrid Model
Building a model using the neural net and seeds

In [37]:
seed_model = SeedModel(dataset, after=2021)
test(stage1_loader, seed_model, label=f"Seeds")

Seeds: Accuracy: 72.22%, Stats loss: 821.501009 Result loss: 0.182851


0.1828505115610905

In [38]:
test(stage1_loader, model, label="NN")

NN: Accuracy: 72.98%, Stats loss: 51.808393 Result loss: 0.176454


0.17645428783175324

In [39]:
hybrid = HybridModel([model, seed_model], [0.8, 0.2])

In [40]:
test(stage1_loader, hybrid, label=f"Hybrid")

Hybrid: Accuracy: 74.01%, Stats loss: 821.501009 Result loss: 0.175478


0.1754780095065946

They hybrid model outperforms both individual models

In [41]:
for season in range(2021, 2025):
    for league in ['M', 'W']:
        loader = dataset.tourney_data(season, league)
        test(loader, hybrid, label=f"{season} {league} Tournament")

2021 M Tournament: Accuracy: 71.71%, Stats loss: 799.078627 Result loss: 0.183947
2021 W Tournament: Accuracy: 71.71%, Stats loss: 799.078627 Result loss: 0.183947
2022 M Tournament: Accuracy: 75.37%, Stats loss: 813.262527 Result loss: 0.170425
2022 W Tournament: Accuracy: 75.37%, Stats loss: 813.262527 Result loss: 0.170425
2023 M Tournament: Accuracy: 72.01%, Stats loss: 815.193230 Result loss: 0.184606
2023 W Tournament: Accuracy: 72.01%, Stats loss: 815.193230 Result loss: 0.184606
2024 M Tournament: Accuracy: 76.87%, Stats loss: 857.632996 Result loss: 0.163250
2024 W Tournament: Accuracy: 76.87%, Stats loss: 857.632996 Result loss: 0.163250


## Generate a bracket

In [42]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(gen_bracket(dataset, 2024, 'M', hybrid).join(dataset.all_teams, on='Winner')[['Winner', 'TeamName']])

      Winner        TeamName
Slot                        
R1W1    1163     Connecticut
R1W2    1235         Iowa St
R1W3    1228        Illinois
R1W4    1120          Auburn
R1W5    1361    San Diego St
R1W6    1140             BYU
R1W7    1450   Washington St
R1W8    1321    Northwestern
R1X1    1314  North Carolina
R1X2    1112         Arizona
R1X3    1124          Baylor
R1X4    1104         Alabama
R1X5    1388    St Mary's CA
R1X6    1155         Clemson
R1X7    1173          Dayton
R1X8    1277     Michigan St
R1Y1    1345          Purdue
R1Y2    1397       Tennessee
R1Y3    1166       Creighton
R1Y4    1242          Kansas
R1Y5    1211         Gonzaga
R1Y6    1332          Oregon
R1Y7    1400           Texas
R1Y8    1395             TCU
R1Z1    1222         Houston
R1Z2    1266       Marquette
R1Z3    1246        Kentucky
R1Z4    1181            Duke
R1Z5    1458       Wisconsin
R1Z6    1403      Texas Tech
R1Z7    1196         Florida
R1Z8    1401       Texas A&M
R2W1    1163  