# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os
from sklearn.model_selection import train_test_split
import random
from data import Data, STATS_COLUMNS
from model import *

torch.manual_seed(20250222)
random.seed(20250222)

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
dataset = Data()

dataset.games.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,...,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0
mean,2015.470621,69.843291,2099.847868,74.183169,2097.450588,61.187026,0.061787,26.176339,57.063405,6.912005,...,19.248818,11.436922,16.826656,10.826832,21.949363,11.217125,15.21463,6.453946,2.848942,18.853504
std,6.024751,35.933736,986.382716,11.406085,989.676138,11.373007,0.287403,4.811306,7.828931,3.16658,...,6.325219,5.239163,6.987616,4.418293,4.708807,3.765042,5.028571,2.985335,2.037092,4.587468
min,2003.0,0.0,1101.0,30.0,1101.0,11.0,0.0,9.0,26.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0
25%,2011.0,37.0,1260.0,66.0,1253.0,53.0,0.0,23.0,52.0,5.0,...,15.0,8.0,12.0,8.0,19.0,9.0,12.0,4.0,1.0,16.0
50%,2016.0,73.0,1413.0,74.0,1407.0,61.0,0.0,26.0,57.0,7.0,...,19.0,11.0,16.0,10.0,22.0,11.0,15.0,6.0,3.0,19.0
75%,2020.0,101.0,3244.0,81.0,3245.0,69.0,0.0,29.0,62.0,9.0,...,23.0,15.0,21.0,14.0,25.0,14.0,18.0,8.0,4.0,22.0
max,2025.0,132.0,3480.0,149.0,3480.0,144.0,6.0,58.0,113.0,30.0,...,80.0,48.0,65.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The x's will be the indexes of two team IDs and program IDs, the y's will include and indicator of who won at the game stats

In [3]:
train_loader, validation_loader = dataset.train_test_data()

Loading cached data


## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [4]:
model = Model(embedding_sizes=[128, 512], model_sizes=(128,128), dropout=0.1, dataset=dataset).to(device)

## Training the model

In [5]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Train the model

In [6]:
n_epochs = 15
for i in range(n_epochs):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, device)
    test(train_loader, model, loss_fn, device, label="Train")
    test(validation_loader, model, loss_fn, device, label="Validation")

Epoch 0
Train: Accuracy: 52.51%, Stats loss: 47.206682 Result loss: 0.246384
Validation: Accuracy: 52.58%, Stats loss: 47.054583 Result loss: 0.246431
Epoch 1
Train: Accuracy: 63.83%, Stats loss: 47.754198 Result loss: 0.218775
Validation: Accuracy: 63.76%, Stats loss: 47.955850 Result loss: 0.218506
Epoch 2
Train: Accuracy: 67.70%, Stats loss: 52.876410 Result loss: 0.201621
Validation: Accuracy: 67.70%, Stats loss: 53.233140 Result loss: 0.202847
Epoch 3
Train: Accuracy: 65.86%, Stats loss: 49.094177 Result loss: 0.205607
Validation: Accuracy: 65.46%, Stats loss: 49.628423 Result loss: 0.208335
Epoch 4
Train: Accuracy: 72.39%, Stats loss: 47.344315 Result loss: 0.180020
Validation: Accuracy: 71.13%, Stats loss: 48.051276 Result loss: 0.186123
Epoch 5
Train: Accuracy: 73.41%, Stats loss: 49.878504 Result loss: 0.173423
Validation: Accuracy: 71.94%, Stats loss: 50.675102 Result loss: 0.181872
Epoch 6
Train: Accuracy: 75.07%, Stats loss: 48.135970 Result loss: 0.166600
Validation: Accur

Fine tune with only the result

In [7]:
for i in range(4):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, device, full_loss=False)
    test(train_loader, model, loss_fn, device, label="Train")
    test(validation_loader, model, loss_fn, device, label="Validation")

Epoch 0
Train: Accuracy: 76.47%, Stats loss: 28.738524 Result loss: 0.157151
Validation: Accuracy: 74.25%, Stats loss: 30.444574 Result loss: 0.171003
Epoch 1
Train: Accuracy: 76.21%, Stats loss: 31.124827 Result loss: 0.158332
Validation: Accuracy: 73.95%, Stats loss: 32.742888 Result loss: 0.172664
Epoch 2
Train: Accuracy: 76.24%, Stats loss: 34.360288 Result loss: 0.158166
Validation: Accuracy: 73.83%, Stats loss: 35.872392 Result loss: 0.173151
Epoch 3
Train: Accuracy: 76.76%, Stats loss: 38.160312 Result loss: 0.155620
Validation: Accuracy: 74.19%, Stats loss: 39.583336 Result loss: 0.171265


With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [8]:
tourney_dataset = dataset.gen_dataset(dataset.tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=500, shuffle=True)

In [9]:
test(tourney_loader, model, loss_fn, device, label="Tourney")

Tourney: Accuracy: 73.84%, Stats loss: 39.395060 Result loss: 0.167882


When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

Train with early torney data

In [10]:
tourney_df = dataset.tourney[dataset.tourney.Season < 2021]

tourney_train_df, tourney_validation_df = train_test_split(tourney_df, train_size=0.8)
tourney_train_data = dataset.gen_dataset(tourney_train_df)
tourney_validation_data = dataset.gen_dataset(tourney_validation_df)

tourney_train_loader = DataLoader(tourney_train_data, batch_size=500)
tourney_validation_loader = DataLoader(tourney_validation_data, batch_size=500)

In [11]:
for param in model.team_embedding.parameters():
    param.requires_grad=False
for param in model.program_embedding.parameters():
    param.requires_grad=False

In [12]:
for i in range(10):
    print(f"Epoch {i}")
    train(tourney_train_loader, model, loss_fn, optimizer, device, full_loss=False)
    test(tourney_train_loader, model, loss_fn, device, label="Train")
    test(tourney_validation_loader, model, loss_fn, device, label="Validation")

Epoch 0
Train: Accuracy: 73.14%, Stats loss: 40.523599 Result loss: 0.169425
Validation: Accuracy: 74.21%, Stats loss: 40.000788 Result loss: 0.171116
Epoch 1
Train: Accuracy: 67.84%, Stats loss: 41.984469 Result loss: 0.219457
Validation: Accuracy: 67.48%, Stats loss: 41.388264 Result loss: 0.219832
Epoch 2
Train: Accuracy: 73.50%, Stats loss: 42.924543 Result loss: 0.173968
Validation: Accuracy: 75.21%, Stats loss: 42.246563 Result loss: 0.173769
Epoch 3
Train: Accuracy: 74.18%, Stats loss: 43.872863 Result loss: 0.169553
Validation: Accuracy: 76.50%, Stats loss: 43.158166 Result loss: 0.170385
Epoch 4
Train: Accuracy: 72.74%, Stats loss: 44.921731 Result loss: 0.181968
Validation: Accuracy: 72.78%, Stats loss: 44.204015 Result loss: 0.184198
Epoch 5
Train: Accuracy: 73.85%, Stats loss: 45.770155 Result loss: 0.172250
Validation: Accuracy: 74.64%, Stats loss: 45.029332 Result loss: 0.175985
Epoch 6
Train: Accuracy: 74.68%, Stats loss: 46.620849 Result loss: 0.164455
Validation: Accur

### Performance by year


In [13]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(year=season)
    test(loader, model, loss_fn, device, label=f"{season} Tournament")

2003 Tournament: Accuracy: 75.78%, Stats loss: 46.292977 Result loss: 0.170337
2004 Tournament: Accuracy: 71.09%, Stats loss: 47.512802 Result loss: 0.178198
2005 Tournament: Accuracy: 75.78%, Stats loss: 50.357204 Result loss: 0.173980
2006 Tournament: Accuracy: 69.53%, Stats loss: 47.931528 Result loss: 0.196173
2007 Tournament: Accuracy: 72.66%, Stats loss: 50.362513 Result loss: 0.169179
2008 Tournament: Accuracy: 75.00%, Stats loss: 50.145625 Result loss: 0.164608
2009 Tournament: Accuracy: 77.34%, Stats loss: 48.571267 Result loss: 0.164459
2010 Tournament: Accuracy: 74.80%, Stats loss: 48.310326 Result loss: 0.162069
2011 Tournament: Accuracy: 72.31%, Stats loss: 45.640664 Result loss: 0.175736
2012 Tournament: Accuracy: 77.31%, Stats loss: 45.940925 Result loss: 0.153567
2013 Tournament: Accuracy: 73.08%, Stats loss: 48.004678 Result loss: 0.175580
2014 Tournament: Accuracy: 74.23%, Stats loss: 45.733544 Result loss: 0.161482
2015 Tournament: Accuracy: 79.62%, Stats loss: 49.53

In [14]:
stage1_loader = dataset.tourney_data(after=2021)
test(stage1_loader, model, loss_fn, device=device, label=f"Stage 1")

Stage 1: Accuracy: 71.37%, Stats loss: 46.900439 Result loss: 0.181693


Breaking out by league

In [15]:
for season in dataset.tourney.Season.unique():
    for league in dataset.tourney[dataset.tourney.Season == season].League.unique():
        loader = dataset.tourney_data(year=season, league=league)
        test(loader, model, loss_fn, device, label=f"{season} {league} Tournament")

2003 M Tournament: Accuracy: 75.78%, Stats loss: 46.292977 Result loss: 0.170337
2004 M Tournament: Accuracy: 71.09%, Stats loss: 47.512802 Result loss: 0.178198
2005 M Tournament: Accuracy: 75.78%, Stats loss: 50.357204 Result loss: 0.173980
2006 M Tournament: Accuracy: 69.53%, Stats loss: 47.931528 Result loss: 0.196173
2007 M Tournament: Accuracy: 72.66%, Stats loss: 50.362513 Result loss: 0.169179
2008 M Tournament: Accuracy: 75.00%, Stats loss: 50.145625 Result loss: 0.164608
2009 M Tournament: Accuracy: 77.34%, Stats loss: 48.571267 Result loss: 0.164459
2010 M Tournament: Accuracy: 71.88%, Stats loss: 46.688357 Result loss: 0.190434
2010 W Tournament: Accuracy: 77.78%, Stats loss: 49.958041 Result loss: 0.133254
2011 M Tournament: Accuracy: 66.42%, Stats loss: 43.013172 Result loss: 0.222603
2011 W Tournament: Accuracy: 78.57%, Stats loss: 48.434981 Result loss: 0.125892
2012 M Tournament: Accuracy: 70.90%, Stats loss: 38.685952 Result loss: 0.189210
2012 W Tournament: Accuracy:

## Inspect the model
First what are the sizes of the smallest input and output weights

In [16]:
print(f"Program embedding min: {model.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {model.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {model.result_fc.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.707756
Team embedding min: 3.589032
FC min: 0.000109


Calculate the average gradient for each input feature

In [17]:
for param in model.team_embedding.parameters():
    param.requires_grad=True
for param in model.program_embedding.parameters():
    param.requires_grad=True

In [18]:
program_weights, team_weights, stats_weights = feature_eval(model, tourney_loader)

In [19]:
program_weights.abs().sum().item(), team_weights.abs().sum().item()

(0.015611924231052399, 0.019144713878631592)

In [20]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")

Year:	0.068918
Game:	-0.030033
League:	-0.150893


## Generating the submission file
### Phase 2

Write the results

In [21]:
odds = model_odds(dataset, 2025, 'M', model)

In [22]:
gen_submission(model, dataset)

## Save the model

In [23]:
torch.save(model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [24]:
moderated = ModeratedModel(model, 0.75)

In [25]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(season)
    test(loader, moderated, loss_fn, label=f"{season} Tournament")

2003 Tournament: Accuracy: 75.78%, Stats loss: 135.510502 Result loss: 0.170337
2004 Tournament: Accuracy: 71.09%, Stats loss: 133.569301 Result loss: 0.178198
2005 Tournament: Accuracy: 75.78%, Stats loss: 137.189553 Result loss: 0.173980
2006 Tournament: Accuracy: 69.53%, Stats loss: 129.811370 Result loss: 0.196173
2007 Tournament: Accuracy: 72.66%, Stats loss: 139.023585 Result loss: 0.169179
2008 Tournament: Accuracy: 75.00%, Stats loss: 137.650415 Result loss: 0.164608
2009 Tournament: Accuracy: 77.34%, Stats loss: 139.123234 Result loss: 0.164459
2010 Tournament: Accuracy: 74.80%, Stats loss: 133.522514 Result loss: 0.162069
2011 Tournament: Accuracy: 72.31%, Stats loss: 127.846658 Result loss: 0.175736
2012 Tournament: Accuracy: 77.31%, Stats loss: 127.206225 Result loss: 0.153567
2013 Tournament: Accuracy: 73.08%, Stats loss: 130.906086 Result loss: 0.175580
2014 Tournament: Accuracy: 74.23%, Stats loss: 129.540168 Result loss: 0.161482
2015 Tournament: Accuracy: 79.62%, Stats

## Dig into 2023 results

In [26]:
loader = dataset.tourney_data(2023)

x, y = loader.dataset.tensors

preds = model(x.to(device))

In [27]:
t_2023 = pd.DataFrame({'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual': y[:,0].reshape([-1]),
                       'predicted': np.array(preds[0].tolist()).reshape([-1])}).iloc[:67]

In [28]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted
23,F Dickinson,Purdue,1192,1345,1.0,0.006511
15,Princeton,Arizona,1343,1112,1.0,0.049004
8,Furman,Virginia,1202,1438,1.0,0.118974
39,Princeton,Missouri,1343,1281,1.0,0.138603
53,FL Atlantic,Tennessee,1194,1397,1.0,0.159113
58,San Diego St,Alabama,1361,1104,1.0,0.210439
57,Miami FL,Houston,1274,1222,1.0,0.230937
54,Gonzaga,UCLA,1211,1417,1.0,0.240717
37,Arkansas,Kansas,1116,1242,1.0,0.267951
14,Penn St,Texas A&M,1336,1401,1.0,0.272074


The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a .4% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 7% and 15% chances of happening respectively.

In [29]:
t_2023['Upset'] = [dataset.upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [30]:
t_2023[t_2023.Upset].predicted.mean()

np.float64(0.26600579323515267)

On average the upsets had a 32% chance of happening

In [31]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset


I correctly predicted 2 upsets, though all were closely ranked

In [32]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
12,Missouri,Utah St,1281,1429,1.0,0.337931,False
55,Kansas St,Michigan St,1243,1277,1.0,0.342101,False
48,Kansas St,Kentucky,1243,1246,1.0,0.34277,False
2,Arizona St,Nevada,1113,1305,1.0,0.350637,False
63,San Diego St,Creighton,1361,1166,1.0,0.356783,False
0,Pittsburgh,Mississippi St,1338,1280,1.0,0.361718,False
5,Arkansas,Illinois,1116,1228,1.0,0.393605,False
11,Maryland,West Virginia,1268,1452,1.0,0.406713,False
3,F Dickinson,TX Southern,1192,1411,1.0,0.421772,False
59,Texas,Xavier,1400,1462,1.0,0.444377,False


I also incorrectly predicted 4 upsets

Looking at all the tourneys

In [33]:
x, y = tourney_loader.dataset.tensors
preds = model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual': y[:,0].reshape([-1]),
                           'predicted': np.array(preds[0].tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual == 1.0]
tourney_df['Upset'] = [dataset.upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [34]:
len(tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5) & (tourney_df.season > 2020)].sort_values('predicted', ascending=False))

10

In [35]:
len(tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5) & (tourney_df.season > 2020)].sort_values('predicted'))

83

Overall I predicted 19 upsets correctly, and 30 incorrectly

## Predicting by seeds
What if I predict just using the seeds?

In [36]:
odds = dataset.odds_by_seed_diff(before=2021)

In [37]:
dataset.tourney_df(after=2021).SeedDiff.map(lambda x: odds[x]**2).mean()

np.float64(0.18527460145235355)

This results in a test Brier score of about 0.185.

## Hybrid Model
Building a model using the neural net and seeds

In [41]:
seed_model = SeedModel(dataset, after=2021)
test(stage1_loader, seed_model, loss_fn, label=f"Seeds")

Seeds: Accuracy: 50.00%, Stats loss: 821.501009 Result loss: 0.250000


In [42]:
test(stage1_loader, model, loss_fn, label="NN")

NN: Accuracy: 71.37%, Stats loss: 46.900439 Result loss: 0.181693


In [43]:
hybrid = HybridModel([model, seed_model], [0.8, 0.2])

In [44]:
test(stage1_loader, hybrid, loss_fn, label=f"Hybrid")

Hybrid: Accuracy: 71.37%, Stats loss: 821.501009 Result loss: 0.180293


They hybrid model outperforms both individual models

In [45]:
for season in range(2021, 2025):
    for league in ['M', 'W']:
        loader = dataset.tourney_data(season, league)
        test(loader, hybrid, loss_fn, label=f"{season} {league} Tournament")

2021 M Tournament: Accuracy: 69.77%, Stats loss: 799.078627 Result loss: 0.180692
2021 W Tournament: Accuracy: 69.77%, Stats loss: 799.078627 Result loss: 0.180692
2022 M Tournament: Accuracy: 73.13%, Stats loss: 813.262527 Result loss: 0.181059
2022 W Tournament: Accuracy: 73.13%, Stats loss: 813.262527 Result loss: 0.181059
2023 M Tournament: Accuracy: 70.15%, Stats loss: 815.193230 Result loss: 0.190260
2023 W Tournament: Accuracy: 70.15%, Stats loss: 815.193230 Result loss: 0.190260
2024 M Tournament: Accuracy: 72.39%, Stats loss: 857.632996 Result loss: 0.169177
2024 W Tournament: Accuracy: 72.39%, Stats loss: 857.632996 Result loss: 0.169177


## Generate a bracket

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(gen_bracket(dataset, 2024, 'M', hybrid).join(dataset.all_teams, on='Winner')[['Winner', 'TeamName']])