# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os
from sklearn.model_selection import train_test_split
import random
from data import Data, STATS_COLUMNS
from model import *

torch.manual_seed(20250222)
random.seed(20250222)

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
dataset = Data()

dataset.games.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,...,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0
mean,2015.470621,69.843291,2099.847868,74.183169,2097.450588,61.187026,0.061787,26.176339,57.063405,6.912005,...,19.248818,11.436922,16.826656,10.826832,21.949363,11.217125,15.21463,6.453946,2.848942,18.853504
std,6.024751,35.933736,986.382716,11.406085,989.676138,11.373007,0.287403,4.811306,7.828931,3.16658,...,6.325219,5.239163,6.987616,4.418293,4.708807,3.765042,5.028571,2.985335,2.037092,4.587468
min,2003.0,0.0,1101.0,30.0,1101.0,11.0,0.0,9.0,26.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0
25%,2011.0,37.0,1260.0,66.0,1253.0,53.0,0.0,23.0,52.0,5.0,...,15.0,8.0,12.0,8.0,19.0,9.0,12.0,4.0,1.0,16.0
50%,2016.0,73.0,1413.0,74.0,1407.0,61.0,0.0,26.0,57.0,7.0,...,19.0,11.0,16.0,10.0,22.0,11.0,15.0,6.0,3.0,19.0
75%,2020.0,101.0,3244.0,81.0,3245.0,69.0,0.0,29.0,62.0,9.0,...,23.0,15.0,21.0,14.0,25.0,14.0,18.0,8.0,4.0,22.0
max,2025.0,132.0,3480.0,149.0,3480.0,144.0,6.0,58.0,113.0,30.0,...,80.0,48.0,65.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The x's will be the indexes of two team IDs and program IDs, the y's will include and indicator of who won at the game stats

In [3]:
train_loader, validation_loader = dataset.train_test_data()

Loading cached data


## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [4]:
model = Model(embedding_sizes=[32, 512], model_sizes=(128,128), dropout=0.1, dataset=dataset).to(device)

## Training the model

In [5]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Train the model

In [6]:
n_epochs = 23
for i in range(n_epochs):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, device)
    test(train_loader, model, loss_fn, device, label="Train")
    test(validation_loader, model, loss_fn, device, label="Validation")

Epoch 0
Train: Accuracy: 56.71%, Stats loss: 44.309577 Result loss: 0.241747
Validation: Accuracy: 57.00%, Stats loss: 44.083655 Result loss: 0.241565
Epoch 1
Train: Accuracy: 66.38%, Stats loss: 46.733114 Result loss: 0.208390
Validation: Accuracy: 66.52%, Stats loss: 46.625822 Result loss: 0.207741
Epoch 2
Train: Accuracy: 62.21%, Stats loss: 54.610461 Result loss: 0.220213
Validation: Accuracy: 62.42%, Stats loss: 54.693890 Result loss: 0.219926
Epoch 3
Train: Accuracy: 67.57%, Stats loss: 50.207528 Result loss: 0.201991
Validation: Accuracy: 67.63%, Stats loss: 50.434271 Result loss: 0.202223
Epoch 4
Train: Accuracy: 68.60%, Stats loss: 53.027286 Result loss: 0.197672
Validation: Accuracy: 68.00%, Stats loss: 53.388696 Result loss: 0.198950
Epoch 5
Train: Accuracy: 69.34%, Stats loss: 48.351659 Result loss: 0.194405
Validation: Accuracy: 68.50%, Stats loss: 48.833390 Result loss: 0.197436
Epoch 6
Train: Accuracy: 70.73%, Stats loss: 48.931170 Result loss: 0.188002
Validation: Accur

Fine tune with only the result

In [7]:
for i in range(2):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, device, full_loss=False)
    test(train_loader, model, loss_fn, device, label="Train")
    test(validation_loader, model, loss_fn, device, label="Validation")

Epoch 0
Train: Accuracy: 76.36%, Stats loss: 29.980636 Result loss: 0.157770
Validation: Accuracy: 73.84%, Stats loss: 31.377470 Result loss: 0.171196
Epoch 1
Train: Accuracy: 76.48%, Stats loss: 31.955826 Result loss: 0.157506
Validation: Accuracy: 73.94%, Stats loss: 33.295692 Result loss: 0.171049


With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [8]:
tourney_dataset = dataset.gen_dataset(dataset.tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=500, shuffle=True)

In [9]:
test(tourney_loader, model, loss_fn, device, label="Tourney")

Tourney: Accuracy: 73.51%, Stats loss: 32.342797 Result loss: 0.169533


When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

Train with early torney data

In [10]:
tourney_df = dataset.tourney[dataset.tourney.Season < 2021]

tourney_train_df, tourney_validation_df = train_test_split(tourney_df, train_size=0.8)
tourney_train_data = dataset.gen_dataset(tourney_train_df)
tourney_validation_data = dataset.gen_dataset(tourney_validation_df)

tourney_train_loader = DataLoader(tourney_train_data, batch_size=500)
tourney_validation_loader = DataLoader(tourney_validation_data, batch_size=500)

In [11]:
for param in model.team_embedding.parameters():
    param.requires_grad=False
for param in model.program_embedding.parameters():
    param.requires_grad=False

In [12]:
for i in range(0):
    print(f"Epoch {i}")
    train(tourney_train_loader, model, loss_fn, optimizer, device, full_loss=False)
    test(tourney_train_loader, model, loss_fn, device, label="Train")
    test(tourney_validation_loader, model, loss_fn, device, label="Validation")

### Performance by year


In [13]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(year=season)
    test(loader, model, loss_fn, device, label=f"{season} Tournament")

2003 Tournament: Accuracy: 69.53%, Stats loss: 31.472745 Result loss: 0.182067
2004 Tournament: Accuracy: 67.97%, Stats loss: 32.963798 Result loss: 0.183296
2005 Tournament: Accuracy: 75.78%, Stats loss: 35.640857 Result loss: 0.169420
2006 Tournament: Accuracy: 67.19%, Stats loss: 35.163873 Result loss: 0.204526
2007 Tournament: Accuracy: 73.44%, Stats loss: 35.376465 Result loss: 0.163633
2008 Tournament: Accuracy: 77.34%, Stats loss: 35.439781 Result loss: 0.159619
2009 Tournament: Accuracy: 71.09%, Stats loss: 33.374700 Result loss: 0.167339
2010 Tournament: Accuracy: 70.47%, Stats loss: 32.520668 Result loss: 0.172346
2011 Tournament: Accuracy: 73.08%, Stats loss: 31.003700 Result loss: 0.170791
2012 Tournament: Accuracy: 79.23%, Stats loss: 30.436748 Result loss: 0.151482
2013 Tournament: Accuracy: 73.46%, Stats loss: 32.796096 Result loss: 0.177028
2014 Tournament: Accuracy: 70.77%, Stats loss: 30.896619 Result loss: 0.169201
2015 Tournament: Accuracy: 80.00%, Stats loss: 32.31

In [14]:
stage1_loader = dataset.tourney_data(after=2021)
test(stage1_loader, model, loss_fn, device=device, label=f"Stage 1")

Stage 1: Accuracy: 72.60%, Stats loss: 30.729799 Result loss: 0.178758


Breaking out by league

In [15]:
for season in dataset.tourney.Season.unique():
    for league in dataset.tourney[dataset.tourney.Season == season].League.unique():
        loader = dataset.tourney_data(year=season, league=league)
        test(loader, model, loss_fn, device, label=f"{season} {league} Tournament")

2003 M Tournament: Accuracy: 69.53%, Stats loss: 31.472745 Result loss: 0.182067
2004 M Tournament: Accuracy: 67.97%, Stats loss: 32.963798 Result loss: 0.183296
2005 M Tournament: Accuracy: 75.78%, Stats loss: 35.640857 Result loss: 0.169420
2006 M Tournament: Accuracy: 67.19%, Stats loss: 35.163873 Result loss: 0.204526
2007 M Tournament: Accuracy: 73.44%, Stats loss: 35.376465 Result loss: 0.163633
2008 M Tournament: Accuracy: 77.34%, Stats loss: 35.439781 Result loss: 0.159619
2009 M Tournament: Accuracy: 71.09%, Stats loss: 33.374700 Result loss: 0.167339
2010 M Tournament: Accuracy: 64.84%, Stats loss: 32.756283 Result loss: 0.201122
2010 W Tournament: Accuracy: 76.19%, Stats loss: 32.281313 Result loss: 0.143113
2011 M Tournament: Accuracy: 67.16%, Stats loss: 30.326291 Result loss: 0.220250
2011 W Tournament: Accuracy: 79.37%, Stats loss: 31.724120 Result loss: 0.118192
2012 M Tournament: Accuracy: 73.13%, Stats loss: 26.649992 Result loss: 0.185663
2012 W Tournament: Accuracy:

## Inspect the model
First what are the sizes of the smallest input and output weights

In [16]:
print(f"Program embedding min: {model.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {model.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {model.result_fc.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.638656
Team embedding min: 3.609467
FC min: 0.000004


Calculate the average gradient for each input feature

In [17]:
for param in model.team_embedding.parameters():
    param.requires_grad=True
for param in model.program_embedding.parameters():
    param.requires_grad=True

In [18]:
program_weights, team_weights, stats_weights = feature_eval(model, tourney_loader)

In [19]:
program_weights.abs().sum().item(), team_weights.abs().sum().item()

(0.01589220017194748, 0.011467041447758675)

In [20]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")

Year:	0.063523
Game:	-0.027184
League:	-0.036515


## Generating the submission file
### Phase 2

Write the results

In [21]:
odds = model_odds(dataset, 2025, 'M', model)

In [22]:
gen_submission(model, dataset)

## Save the model

In [23]:
torch.save(model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [24]:
moderated = ModeratedModel(model, 0.75)

In [25]:
for season in dataset.tourney.Season.unique():
    loader = dataset.tourney_data(season)
    test(loader, moderated, loss_fn, label=f"{season} Tournament")

2003 Tournament: Accuracy: 69.53%, Stats loss: 104.386254 Result loss: 0.182067
2004 Tournament: Accuracy: 67.97%, Stats loss: 104.071582 Result loss: 0.183296
2005 Tournament: Accuracy: 75.78%, Stats loss: 106.920721 Result loss: 0.169420
2006 Tournament: Accuracy: 67.19%, Stats loss: 101.673995 Result loss: 0.204526
2007 Tournament: Accuracy: 73.44%, Stats loss: 109.998892 Result loss: 0.163633
2008 Tournament: Accuracy: 77.34%, Stats loss: 108.044995 Result loss: 0.159619
2009 Tournament: Accuracy: 71.09%, Stats loss: 107.572739 Result loss: 0.167339
2010 Tournament: Accuracy: 70.47%, Stats loss: 100.777349 Result loss: 0.172346
2011 Tournament: Accuracy: 73.08%, Stats loss: 96.804444 Result loss: 0.170791
2012 Tournament: Accuracy: 79.23%, Stats loss: 95.803116 Result loss: 0.151482
2013 Tournament: Accuracy: 73.46%, Stats loss: 99.457465 Result loss: 0.177028
2014 Tournament: Accuracy: 70.77%, Stats loss: 96.686637 Result loss: 0.169201
2015 Tournament: Accuracy: 80.00%, Stats los

## Dig into 2023 results

In [26]:
loader = dataset.tourney_data(2023)

x, y = loader.dataset.tensors

preds = model(x.to(device))

In [27]:
t_2023 = pd.DataFrame({'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual': y[:,0].reshape([-1]),
                       'predicted': np.array(preds[0].tolist()).reshape([-1])}).iloc[:67]

In [28]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted
23,F Dickinson,Purdue,1192,1345,1.0,0.0083
15,Princeton,Arizona,1343,1112,1.0,0.092517
8,Furman,Virginia,1202,1438,1.0,0.16557
57,Miami FL,Houston,1274,1222,1.0,0.23756
39,Princeton,Missouri,1343,1281,1.0,0.271259
53,FL Atlantic,Tennessee,1194,1397,1.0,0.276748
37,Arkansas,Kansas,1116,1242,1.0,0.278558
50,Michigan St,Marquette,1277,1266,1.0,0.316798
63,San Diego St,Creighton,1361,1166,1.0,0.329716
58,San Diego St,Alabama,1361,1104,1.0,0.342397


The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a .4% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 7% and 15% chances of happening respectively.

In [29]:
t_2023['Upset'] = [dataset.upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [30]:
t_2023[t_2023.Upset].predicted.mean()

np.float64(0.35891693297722116)

On average the upsets had a 32% chance of happening

In [31]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
45,Creighton,Baylor,1166,1124,1.0,0.629418,True
60,Connecticut,Gonzaga,1163,1211,1.0,0.551378,True
24,FL Atlantic,Memphis,1194,1272,1.0,0.526502,True
14,Penn St,Texas A&M,1336,1401,1.0,0.516735,True


I correctly predicted 2 upsets, though all were closely ranked

In [32]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual,predicted,Upset
63,San Diego St,Creighton,1361,1166,1.0,0.329716,False
5,Arkansas,Illinois,1116,1228,1.0,0.385504,False
12,Missouri,Utah St,1281,1429,1.0,0.413072,False
59,Texas,Xavier,1400,1462,1.0,0.455502,False
11,Maryland,West Virginia,1268,1452,1.0,0.465191,False
28,Kentucky,Providence,1246,1344,1.0,0.485546,False


I also incorrectly predicted 4 upsets

Looking at all the tourneys

In [33]:
x, y = tourney_loader.dataset.tensors
preds = model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [dataset.all_teams.loc[dataset.programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [dataset.programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [dataset.programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual': y[:,0].reshape([-1]),
                           'predicted': np.array(preds[0].tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual == 1.0]
tourney_df['Upset'] = [dataset.upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [34]:
len(tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5) & (tourney_df.season > 2020)].sort_values('predicted', ascending=False))

27

In [35]:
len(tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5) & (tourney_df.season > 2020)].sort_values('predicted'))

31

Overall I predicted 19 upsets correctly, and 30 incorrectly

## Predicting by seeds
What if I predict just using the seeds?

In [36]:
odds = dataset.odds_by_seed_diff(before=2021)

In [37]:
dataset.tourney_df(after=2021).SeedDiff.map(lambda x: odds[x]**2).mean()

np.float64(0.18527460145235355)

This results in a test Brier score of about 0.185.

## Hybrid Model
Building a model using the neural net and seeds

In [38]:
seed_model = SeedModel(dataset, after=2021)
test(stage1_loader, seed_model, loss_fn, label=f"Seeds")

Seeds: Accuracy: 72.22%, Stats loss: 821.501009 Result loss: 0.182851


In [39]:
test(stage1_loader, model, loss_fn, label="NN")

NN: Accuracy: 72.60%, Stats loss: 30.729799 Result loss: 0.178758


In [40]:
hybrid = HybridModel([model, seed_model], [0.8, 0.2])

In [41]:
test(stage1_loader, hybrid, loss_fn, label=f"Hybrid")

Hybrid: Accuracy: 73.16%, Stats loss: 821.501009 Result loss: 0.176619


They hybrid model outperforms both individual models

In [42]:
for season in range(2021, 2025):
    for league in ['M', 'W']:
        loader = dataset.tourney_data(season, league)
        test(loader, hybrid, loss_fn, label=f"{season} {league} Tournament")

2021 M Tournament: Accuracy: 71.71%, Stats loss: 799.078627 Result loss: 0.180512
2021 W Tournament: Accuracy: 71.71%, Stats loss: 799.078627 Result loss: 0.180512
2022 M Tournament: Accuracy: 73.88%, Stats loss: 813.262527 Result loss: 0.176241
2022 W Tournament: Accuracy: 73.88%, Stats loss: 813.262527 Result loss: 0.176241
2023 M Tournament: Accuracy: 71.27%, Stats loss: 815.193230 Result loss: 0.189105
2023 W Tournament: Accuracy: 71.27%, Stats loss: 815.193230 Result loss: 0.189105
2024 M Tournament: Accuracy: 75.75%, Stats loss: 857.632996 Result loss: 0.160764
2024 W Tournament: Accuracy: 75.75%, Stats loss: 857.632996 Result loss: 0.160764


## Generate a bracket

In [43]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(gen_bracket(dataset, 2024, 'M', hybrid).join(dataset.all_teams, on='Winner')[['Winner', 'TeamName']])

      Winner        TeamName
Slot                        
R1W1    1163     Connecticut
R1W2    1235         Iowa St
R1W3    1228        Illinois
R1W4    1120          Auburn
R1W5    1361    San Diego St
R1W6    1140             BYU
R1W7    1450   Washington St
R1W8    1194     FL Atlantic
R1X1    1314  North Carolina
R1X2    1112         Arizona
R1X3    1124          Baylor
R1X4    1104         Alabama
R1X5    1388    St Mary's CA
R1X6    1155         Clemson
R1X7    1173          Dayton
R1X8    1277     Michigan St
R1Y1    1345          Purdue
R1Y2    1397       Tennessee
R1Y3    1166       Creighton
R1Y4    1242          Kansas
R1Y5    1211         Gonzaga
R1Y6    1332          Oregon
R1Y7    1400           Texas
R1Y8    1395             TCU
R1Z1    1222         Houston
R1Z2    1266       Marquette
R1Z3    1246        Kentucky
R1Z4    1181            Duke
R1Z5    1458       Wisconsin
R1Z6    1403      Texas Tech
R1Z7    1160        Colorado
R1Z8    1304        Nebraska
R2W1    1163  