# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
mens = pd.read_csv('data/MRegularSeasonDetailedResults.csv')
mens['League'] = 'M'
mens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,...,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0
mean,2014.146355,70.294986,1288.25451,75.878936,1283.13883,63.888287,0.068689,26.401824,55.760242,7.347445,...,20.15979,12.073403,17.732454,10.46174,21.62565,11.409722,13.888907,6.004739,2.868185,19.30578
std,6.515929,35.772556,105.3475,10.998547,104.795432,10.848767,0.305098,4.680314,7.456374,3.11926,...,6.068136,5.344049,7.081056,4.221039,4.518197,3.724567,4.3827,2.745969,2.01905,4.553353
min,2003.0,0.0,1101.0,34.0,1101.0,20.0,0.0,10.0,26.0,0.0,...,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0
25%,2009.0,40.0,1199.0,68.0,1192.0,57.0,0.0,23.0,51.0,5.0,...,16.0,8.0,13.0,7.0,19.0,9.0,11.0,4.0,1.0,16.0
50%,2014.0,73.0,1287.0,75.0,1282.0,64.0,0.0,26.0,55.0,7.0,...,20.0,12.0,17.0,10.0,21.0,11.0,14.0,6.0,3.0,19.0
75%,2020.0,101.0,1381.0,83.0,1374.0,71.0,0.0,29.0,60.0,9.0,...,24.0,15.0,22.0,13.0,25.0,14.0,17.0,8.0,4.0,22.0
max,2025.0,132.0,1480.0,149.0,1480.0,144.0,6.0,57.0,103.0,26.0,...,59.0,48.0,65.0,36.0,49.0,31.0,41.0,22.0,18.0,45.0


In [3]:
womens = pd.read_csv('data/WRegularSeasonDetailedResults.csv')
womens['League'] = 'W'
womens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,...,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0
mean,2017.404609,69.183626,3285.116823,71.706633,3286.689554,57.242044,0.051708,25.847034,58.966574,6.276077,...,17.918413,10.507392,15.503808,11.36002,22.422122,10.935852,17.150745,7.109977,2.820839,18.19299
std,4.582659,36.157922,104.073477,11.536993,105.505327,10.960867,0.259072,4.978157,7.969144,3.127369,...,6.456006,4.936838,6.630184,4.640191,4.936106,3.805935,5.27718,3.1923,2.062848,4.556919
min,2010.0,0.0,3101.0,30.0,3101.0,11.0,0.0,9.0,30.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,3.0
25%,2013.0,36.0,3196.0,64.0,3195.0,50.0,0.0,22.0,53.0,4.0,...,13.0,7.0,11.0,8.0,19.0,8.0,13.0,5.0,1.0,15.0
50%,2017.0,73.0,3283.0,71.0,3287.0,57.0,0.0,25.0,59.0,6.0,...,17.0,10.0,15.0,11.0,22.0,11.0,17.0,7.0,2.0,18.0
75%,2022.0,101.0,3376.0,79.0,3377.0,64.0,0.0,29.0,64.0,8.0,...,22.0,14.0,20.0,14.0,26.0,13.0,20.0,9.0,4.0,21.0
max,2025.0,132.0,3480.0,140.0,3480.0,130.0,5.0,58.0,113.0,30.0,...,80.0,37.0,52.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The IDs are definitely distinct so we can combine into a single dataframe

In [4]:
data = pd.concat([mens, womens])

Get the distinct team/Years

In [5]:
teams = pd.concat([data[['WTeamID', 'Season', 'League']].rename(columns={'WTeamID': 'TeamID'}),
                   data[['LTeamID', 'Season', 'League']].rename(columns={'LTeamID': 'TeamID'})]).drop_duplicates().reset_index()

teamMapping = {(x.TeamID, x.Season): x.Index for x in teams.itertuples()}

And get the distinct programs

In [6]:
programs = teams.TeamID.drop_duplicates().reset_index()

programMapping = {x.TeamID: x.Index for x in programs.itertuples()}

Define the training data. The x's will be the indexes of two team IDs and their average stats, the y's will be the score difference.

In [7]:
winningStats = data[['WTeamID', 'Season', 'WScore', 'WFGM', 'WFGA', 'WFGM3', 'WFGA3', 
                    'WFTM', 'WFTA', 'WOR', 'WDR', 'WAst', 'WTO', 'WStl', 'WBlk', 'WPF']]
winningStats.columns=['TeamID', 'Season', 'Score', 'FGM', 'FGA', 'FGM3', 'FGA3', 
                      'FTM', 'FTA', 'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF']
losingStats = data[['LTeamID', 'Season', 'LScore', 'LFGM', 'LFGA', 'LFGM3', 'LFGA3', 
                             'LFTM', 'LFTA', 'LOR', 'LDR', 'LAst', 'LTO', 'LStl', 'LBlk', 'LPF']]
losingStats.columns=['TeamID', 'Season', 'Score', 'FGM', 'FGA', 'FGM3', 'FGA3', 
                     'FTM', 'FTA', 'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF']
teamStats = pd.concat([winningStats, losingStats]).groupby(['TeamID', 'Season']).mean()

In [8]:
def gen_dataset(data):
    winning_team = data.apply(lambda x: teamMapping[(x.WTeamID, x.Season)], axis=1)
    losing_team = data.apply(lambda x: teamMapping[(x.LTeamID, x.Season)], axis=1)
    winning_program = data.apply(lambda x: programMapping[x.WTeamID], axis=1)
    losing_program = data.apply(lambda x: programMapping[x.LTeamID], axis=1)
    winner_stats = data.apply(lambda x: teamStats.loc[(x.WTeamID, x.Season)], axis=1)
    loser_stats = data.apply(lambda x: teamStats.loc[(x.LTeamID, x.Season)], axis=1)
    winning_matchups = np.concatenate([np.stack([winning_program, winning_team, losing_program, losing_team], axis=1),
                                       winner_stats, loser_stats], axis=1)
    losing_matchups = np.concatenate([np.stack([losing_program, losing_team, winning_program, winning_team], axis=1),
                                      loser_stats, winner_stats], axis=1)
    x_tensor = torch.from_numpy(np.concatenate([winning_matchups, losing_matchups])).double()
    y_tensor = torch.from_numpy(np.concatenate([(data.WScore-data.LScore), (data.LScore-data.WScore)]).reshape((-1,1))).double()
    return TensorDataset(x_tensor, y_tensor)

In [9]:
fname = 'dataset.pt'
if os.path.isfile(fname):
    dataset=torch.load(fname, weights_only=False)
else:
    dataset = gen_dataset(data)
    torch.save(dataset, fname)

Generate the train/validation split

In [10]:
batch_size=200

generator = torch.Generator().manual_seed(20250217)
train_data, validation_data = torch.utils.data.random_split(dataset, [0.95, 0.05], generator=generator)
train_loader = DataLoader(train_data, batch_size=batch_size)
validation_loader = DataLoader(validation_data, batch_size=batch_size)

## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [11]:
class Model(nn.Module):
    def __init__(self, embedding_size, model_sizes, dropout, stats_size=14):
        super(Model, self).__init__()
        hid1, hid2 = model_sizes
        self.team_embedding = nn.Embedding(len(teams), embedding_size)
        self.program_embedding = nn.Embedding(len(programs), embedding_size)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
        self.fc1 = nn.Linear(4*embedding_size+2*stats_size, hid1)
        self.fc2 = nn.Linear(hid1, hid2)
        self.score_fc = nn.Linear(hid2, 1)
        self.result_fc = nn.Linear(hid2, 1)
        self.double()

    def forward(self, x):
        program = self.program_embedding(x[:,0].int())
        team = self.team_embedding(x[:,1].int())
        opponent_program = self.program_embedding(x[:,2].int())
        opponent = self.team_embedding(x[:,3].int())
        matchup = self.dropout1(torch.cat([program, team, opponent_program, opponent, x[:,4:]], axis=1))
        hidden1 = self.dropout2(F.relu(self.fc1(matchup)))
        hidden2 = self.dropout3(F.relu(self.fc2(hidden1)))
        score = self.score_fc(hidden2)
        result = F.sigmoid(self.result_fc(hidden2))
        return score, result
        

In [12]:
model = Model(embedding_size=128, model_sizes=(64,64), dropout=0.25).to(device)

## Training the model

Define the training function

In [13]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)

def train(data, model, loss_fn, optimizer, full_loss=True):
    size = len(data.dataset)
    model.train()
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        pred_score, pred_result = model(x)
        actual_result = (y > 0).double()
        score_loss = loss_fn(pred_score, y)
        result_loss = loss_fn(pred_result, actual_result)
        if full_loss:
            (score_loss + 10 * result_loss).backward()
        else:
            result_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            score_loss, result_loss, current = score_loss.item(), result_loss.item(), (batch + 1) * len(x)
            print(f"score loss: {score_loss:>7f}, result loss: {result_loss:>7f} [{current:>6d}/{size:>6d}]", end="\r")

Define the testing function

In [14]:
def test(data, model, loss_fn, label="Test"):
    size = len(data.dataset)
    num_batches = len(data)
    model.eval()
    score_loss, result_loss, correct = 0, 0, 0
    with torch.no_grad():
        for x, y in data:
            x = x.to(device)
            y = y.to(device)
            score_pred, result_pred = model(x)
            actual_result = (y > 0).double()
            score_loss += loss_fn(score_pred, y).item()
            result_loss += loss_fn(result_pred, actual_result).item()
            correct += ((result_pred >= 0.5) == (actual_result == 1)).type(torch.float).sum().item()
    score_loss /= num_batches
    result_loss /= num_batches
    correct /= size
    print(f"{label}: Accuracy: {(100*correct):>0.1f}%, Score loss: {score_loss:>8f}, Result loss: {result_loss:>8f}")

Train the model

In [15]:
n_epochs = 20
for i in range(n_epochs):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 68.6%, Score loss: 197.555089, Result loss: 0.205350
Validation: Accuracy: 68.4%, Score loss: 198.029947, Result loss: 0.206468
Epoch 1
Train: Accuracy: 70.8%, Score loss: 179.393789, Result loss: 0.195397
Validation: Accuracy: 70.3%, Score loss: 180.446321, Result loss: 0.196984
Epoch 2
Train: Accuracy: 71.7%, Score loss: 165.851551, Result loss: 0.188384
Validation: Accuracy: 71.1%, Score loss: 167.284413, Result loss: 0.190286
Epoch 3
Train: Accuracy: 73.1%, Score loss: 156.858295, Result loss: 0.183095
Validation: Accuracy: 72.0%, Score loss: 159.470698, Result loss: 0.186138
Epoch 4
Train: Accuracy: 73.8%, Score loss: 149.130023, Result loss: 0.177523
Validation: Accuracy: 72.5%, Score loss: 152.964211, Result loss: 0.181714
Epoch 5
Train: Accuracy: 74.2%, Score loss: 143.905505, Result loss: 0.173508
Validation: Accuracy: 73.1%, Score loss: 148.238865, Result loss: 0.178322
Epoch 6
Train: Accuracy: 74.9%, Score loss: 140.701246, Result loss: 0.170577
Vali

Fine tune with only the result

In [16]:
for i in range(5):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, full_loss=False)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 76.4%, Score loss: 120.068873, Result loss: 0.158468
Validation: Accuracy: 74.9%, Score loss: 128.319781, Result loss: 0.166488
Epoch 1
Train: Accuracy: 76.4%, Score loss: 120.121794, Result loss: 0.158392
Validation: Accuracy: 74.9%, Score loss: 128.339633, Result loss: 0.166466
Epoch 2
Train: Accuracy: 76.4%, Score loss: 120.205301, Result loss: 0.158325
Validation: Accuracy: 74.9%, Score loss: 128.379837, Result loss: 0.166460
Epoch 3
Train: Accuracy: 76.5%, Score loss: 120.545692, Result loss: 0.158032
Validation: Accuracy: 74.9%, Score loss: 128.588162, Result loss: 0.166302
Epoch 4
Train: Accuracy: 76.5%, Score loss: 120.937601, Result loss: 0.157977
Validation: Accuracy: 74.9%, Score loss: 128.823520, Result loss: 0.166271


With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [17]:
mens_tourney = pd.read_csv('data/MNCAATourneyDetailedResults.csv')
womens_tourney = pd.read_csv('data/WNCAATourneyDetailedResults.csv')
tourney = pd.concat([mens_tourney, womens_tourney])

tourney_dataset = gen_dataset(tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=batch_size)

In [18]:
test(tourney_loader, model, loss_fn, label="Tourney")

Tourney: Accuracy: 74.2%, Score loss: 135.399772, Result loss: 0.168483


When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

### Performance by year


In [19]:
for season in tourney.Season.unique():
    loader = DataLoader(gen_dataset(tourney[tourney.Season == season]), batch_size=batch_size)
    test(loader, model, loss_fn, label=f"{season} Tournament")

2003 Tournament: Accuracy: 69.5%, Score loss: 116.318529, Result loss: 0.185653
2004 Tournament: Accuracy: 71.9%, Score loss: 101.598035, Result loss: 0.173708
2005 Tournament: Accuracy: 76.6%, Score loss: 93.097688, Result loss: 0.169688
2006 Tournament: Accuracy: 70.3%, Score loss: 99.891114, Result loss: 0.202148
2007 Tournament: Accuracy: 78.9%, Score loss: 108.093486, Result loss: 0.157088
2008 Tournament: Accuracy: 78.9%, Score loss: 142.428645, Result loss: 0.162599
2009 Tournament: Accuracy: 74.2%, Score loss: 140.234300, Result loss: 0.169499
2010 Tournament: Accuracy: 74.8%, Score loss: 134.872170, Result loss: 0.170486
2011 Tournament: Accuracy: 71.9%, Score loss: 127.257268, Result loss: 0.162249
2012 Tournament: Accuracy: 76.2%, Score loss: 107.690937, Result loss: 0.150190
2013 Tournament: Accuracy: 73.5%, Score loss: 141.469506, Result loss: 0.174289
2014 Tournament: Accuracy: 72.7%, Score loss: 147.288631, Result loss: 0.151803
2015 Tournament: Accuracy: 81.5%, Score lo

## Inspect the model
First what are the sizes of the smallest input and output weights

In [20]:
print(f"Program embedding min: {model.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {model.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {model.result_fc.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.703743
Team embedding min: 3.599445
FC min: 0.007405


Calculate the average gradient for each input feature

In [21]:
def feature_eval(model, data):
    model.eval()
    team_grads = torch.zeros(model.team_embedding.weight.shape[1]).to(device)
    program_grads = torch.zeros(model.program_embedding.weight.shape[1]).to(device)
    stats_grads = torch.zeros(28).to(device)
    size = len(data.dataset)
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        x.requires_grad = True
        _, pred_result = model(x)
        team_grads += torch.autograd.grad(model(x)[1].mean(), model.team_embedding.parameters())[0].sum(axis=0)
        program_grads += torch.autograd.grad(model(x)[1].mean(), model.program_embedding.parameters())[0].sum(axis=0)
        stats_grads += torch.autograd.grad(model(x)[1].mean(), x)[0].sum(axis=0)[4:]
    return program_grads/size, team_grads/size, stats_grads

In [22]:
program_weights, team_weights, stats_weights = feature_eval(model, train_loader)

In [23]:
program_weights.std(), team_weights.std()

(tensor(2.2126e-06, device='cuda:0'), tensor(2.1258e-06, device='cuda:0'))

In [24]:
for stat, weight in zip([f"W{stat}" for stat in teamStats] + [f"L{stat}" for stat in teamStats], stats_weights):
    print(f"{stat}:\t{weight:>4f}")

WScore:	5.361473
WFGM:	1.086517
WFGA:	0.089199
WFGM3:	1.824094
WFGA3:	0.184591
WFTM:	0.767693
WFTA:	0.421476
WOR:	0.422579
WDR:	0.491017
WAst:	2.235169
WTO:	-1.073484
WStl:	1.065839
WBlk:	3.345615
WPF:	-0.147888
LScore:	-5.569123
LFGM:	-1.137421
LFGA:	-0.039924
LFGM3:	-1.289743
LFGA3:	-0.274150
LFTM:	-0.210431
LFTA:	-0.151069
LOR:	-0.358148
LDR:	-0.015384
LAst:	-1.849719
LTO:	0.788442
LStl:	-0.476964
LBlk:	-4.074558
LPF:	0.290857


## Generating the submission file
### Phase 2

Write the results

In [25]:
with open('submission.csv', 'w') as f:
    f.write("ID,Pred\n")
    season=2025
    for league in ('M', 'W'):
        teams_to_test = sorted(teams[(teams.Season==season) & (teams.League==league)].TeamID.values)
        matchups = [(t1, t2) for t1 in teams_to_test for t2 in teams_to_test if t1 < t2]
        matchups_tensor = torch.Tensor(np.array(
            [np.concat([[programMapping[t1], teamMapping[(t1, season)],
                         programMapping[t2], teamMapping[(t2, season)]],
                        teamStats.loc[(t1, season)].values,
                        teamStats.loc[(t2, season)].values])
             for (t1, t2) in matchups])).int().to(device)
        _, predictions = model(matchups_tensor)
        for (t1, t2), pred in zip(matchups, predictions):
            f.write(f"{season}_{t1.item()}_{t2.item()},{pred.item()}\n")

## Save the model

In [26]:
torch.save(model.state_dict(), 'model.pth')