# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
mens = pd.read_csv('data/MRegularSeasonDetailedResults.csv')
mens['League'] = 'M'
mens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,...,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0
mean,2014.146355,70.294986,1288.25451,75.878936,1283.13883,63.888287,0.068689,26.401824,55.760242,7.347445,...,20.15979,12.073403,17.732454,10.46174,21.62565,11.409722,13.888907,6.004739,2.868185,19.30578
std,6.515929,35.772556,105.3475,10.998547,104.795432,10.848767,0.305098,4.680314,7.456374,3.11926,...,6.068136,5.344049,7.081056,4.221039,4.518197,3.724567,4.3827,2.745969,2.01905,4.553353
min,2003.0,0.0,1101.0,34.0,1101.0,20.0,0.0,10.0,26.0,0.0,...,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0
25%,2009.0,40.0,1199.0,68.0,1192.0,57.0,0.0,23.0,51.0,5.0,...,16.0,8.0,13.0,7.0,19.0,9.0,11.0,4.0,1.0,16.0
50%,2014.0,73.0,1287.0,75.0,1282.0,64.0,0.0,26.0,55.0,7.0,...,20.0,12.0,17.0,10.0,21.0,11.0,14.0,6.0,3.0,19.0
75%,2020.0,101.0,1381.0,83.0,1374.0,71.0,0.0,29.0,60.0,9.0,...,24.0,15.0,22.0,13.0,25.0,14.0,17.0,8.0,4.0,22.0
max,2025.0,132.0,1480.0,149.0,1480.0,144.0,6.0,57.0,103.0,26.0,...,59.0,48.0,65.0,36.0,49.0,31.0,41.0,22.0,18.0,45.0


In [3]:
womens = pd.read_csv('data/WRegularSeasonDetailedResults.csv')
womens['League'] = 'W'
womens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,...,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0
mean,2017.404609,69.183626,3285.116823,71.706633,3286.689554,57.242044,0.051708,25.847034,58.966574,6.276077,...,17.918413,10.507392,15.503808,11.36002,22.422122,10.935852,17.150745,7.109977,2.820839,18.19299
std,4.582659,36.157922,104.073477,11.536993,105.505327,10.960867,0.259072,4.978157,7.969144,3.127369,...,6.456006,4.936838,6.630184,4.640191,4.936106,3.805935,5.27718,3.1923,2.062848,4.556919
min,2010.0,0.0,3101.0,30.0,3101.0,11.0,0.0,9.0,30.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,3.0
25%,2013.0,36.0,3196.0,64.0,3195.0,50.0,0.0,22.0,53.0,4.0,...,13.0,7.0,11.0,8.0,19.0,8.0,13.0,5.0,1.0,15.0
50%,2017.0,73.0,3283.0,71.0,3287.0,57.0,0.0,25.0,59.0,6.0,...,17.0,10.0,15.0,11.0,22.0,11.0,17.0,7.0,2.0,18.0
75%,2022.0,101.0,3376.0,79.0,3377.0,64.0,0.0,29.0,64.0,8.0,...,22.0,14.0,20.0,14.0,26.0,13.0,20.0,9.0,4.0,21.0
max,2025.0,132.0,3480.0,140.0,3480.0,130.0,5.0,58.0,113.0,30.0,...,80.0,37.0,52.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The IDs are definitely distinct so we can combine into a single dataframe

In [4]:
data = pd.concat([mens, womens])

Get the distinct team/Years

In [5]:
teams = pd.concat([data[['WTeamID', 'Season', 'League']].rename(columns={'WTeamID': 'TeamID'}),
                   data[['LTeamID', 'Season', 'League']].rename(columns={'LTeamID': 'TeamID'})]).drop_duplicates().reset_index()

Define the training data. The x's will be the indexes of two team IDs, the y's will be 1 if the first team won, 0 otherwise.

In [6]:
teamMapping = {(x.TeamID, x.Season): x.Index for x in teams.itertuples()}

In [7]:
data

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF,League
0,2003,10,1104,68,1328,62,N,0,27,58,...,16,22,10,22,8,18,9,2,20,M
1,2003,10,1272,70,1393,63,N,0,26,62,...,9,20,20,25,7,12,8,6,16,M
2,2003,11,1266,73,1437,61,N,0,24,58,...,14,23,31,22,9,12,2,5,23,M
3,2003,11,1296,56,1457,50,N,0,18,38,...,8,15,17,20,9,19,4,3,23,M
4,2003,11,1400,77,1208,71,N,0,30,61,...,17,27,21,15,12,10,7,1,14,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80621,2025,106,3242,63,3416,58,H,0,17,46,...,5,11,15,21,11,12,4,2,21,W
80622,2025,106,3329,68,3428,64,A,0,23,63,...,9,16,8,18,16,11,8,6,20,W
80623,2025,106,3349,72,3194,39,H,0,30,63,...,16,23,4,25,5,10,4,3,9,W
80624,2025,106,3378,70,3150,52,A,0,25,59,...,11,14,7,24,5,15,8,2,15,W


In [8]:
def gen_dataset(data):
    winners = data.apply(lambda x: teamMapping[(x.WTeamID, x.Season)], axis=1)
    losers = data.apply(lambda x: teamMapping[(x.LTeamID, x.Season)], axis=1)
    x_tensor = torch.from_numpy(np.concatenate([np.stack([winners, losers], axis=1), np.stack([losers, winners], axis=1)]))
    y_tensor = torch.from_numpy(np.concatenate([(data.WScore-data.LScore), (data.LScore-data.WScore)]).reshape((-1,1))).double()
    return TensorDataset(x_tensor, y_tensor)

In [9]:
dataset = gen_dataset(data)

Generate the train/validation split

In [10]:
batch_size=100

generator = torch.Generator().manual_seed(20250217)
train_data, validation_data = torch.utils.data.random_split(dataset, [0.95, 0.05], generator=generator)
train_loader = DataLoader(train_data, batch_size=batch_size)
validation_loader = DataLoader(validation_data, batch_size=batch_size)

## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [11]:
class Model(nn.Module):
    def __init__(self, embedding_size=64, model_size=16, dropout=0.1):
        super(Model, self).__init__()
        self.embedding = nn.Embedding(len(teams), embedding_size)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.fc1 = nn.Linear(2*embedding_size, model_size)
        self.score_fc = nn.Linear(model_size, 1)
        self.result_fc = nn.Linear(model_size, 1)
        self.double()

    def forward(self, x):
        team = self.embedding(x[:,0])
        opponent = self.embedding(x[:,1])
        matchup = self.dropout1(torch.cat([team, opponent], axis=1))
        hidden = self.dropout2(F.relu(self.fc1(matchup)))
        score = self.score_fc(hidden)
        result = F.sigmoid(self.result_fc(hidden))
        return score, result
        

In [12]:
model = Model(embedding_size=128, model_size=64, dropout=0.25).to(device)

## Training the model

Define the training function

In [13]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)

def train(data, model, loss_fn, optimizer, full_loss=True):
    size = len(data.dataset)
    model.train()
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        pred_score, pred_result = model(x)
        actual_result = (y > 0).double()
        score_loss = loss_fn(pred_score, y)
        result_loss = loss_fn(pred_result, actual_result)
        if full_loss:
            (score_loss + 10 * result_loss).backward()
        else:
            result_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            score_loss, result_loss, current = score_loss.item(), result_loss.item(), (batch + 1) * len(x)
            print(f"score loss: {score_loss:>7f}, result loss: {result_loss:>7f} [{current:>6d}/{size:>6d}]", end="\r")

Define the testing function

In [14]:
def test(data, model, loss_fn, label="Test"):
    size = len(data.dataset)
    num_batches = len(data)
    model.eval()
    score_loss, result_loss, correct = 0, 0, 0
    with torch.no_grad():
        for x, y in data:
            x = x.to(device)
            y = y.to(device)
            score_pred, result_pred = model(x)
            actual_result = (y > 0).double()
            score_loss += loss_fn(score_pred, y).item()
            result_loss += loss_fn(result_pred, actual_result)
            correct += ((result_pred >= 0.5) == (actual_result == 1)).type(torch.float).sum().item()
    score_loss /= num_batches
    result_loss /= num_batches
    correct /= size
    print(f"{label}: Accuracy: {(100*correct):>0.1f}%, Score loss: {score_loss:>8f}, Result loss: {result_loss:>8f}")

Train the model

In [15]:
n_epochs = 20
for i in range(n_epochs):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 58.1%, Score loss: 259.632630, Result loss: 0.241159
Validation: Accuracy: 57.5%, Score loss: 260.685737, Result loss: 0.242353
Epoch 1
Train: Accuracy: 64.9%, Score loss: 225.729596, Result loss: 0.219980
Validation: Accuracy: 64.0%, Score loss: 230.557499, Result loss: 0.223598
Epoch 2
Train: Accuracy: 69.3%, Score loss: 195.021155, Result loss: 0.200477
Validation: Accuracy: 68.1%, Score loss: 202.399891, Result loss: 0.206178
Epoch 3
Train: Accuracy: 72.1%, Score loss: 171.112397, Result loss: 0.185808
Validation: Accuracy: 70.6%, Score loss: 180.220591, Result loss: 0.193139
Epoch 4
Train: Accuracy: 73.9%, Score loss: 154.755154, Result loss: 0.175642
Validation: Accuracy: 72.0%, Score loss: 164.617941, Result loss: 0.183754
Epoch 5
Train: Accuracy: 75.0%, Score loss: 143.922457, Result loss: 0.169271
Validation: Accuracy: 73.0%, Score loss: 154.132503, Result loss: 0.177815
Epoch 6
Train: Accuracy: 75.6%, Score loss: 136.800581, Result loss: 0.165415
Vali

Fine tune with only the result

In [16]:
for i in range(10):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, full_loss=False)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 76.6%, Score loss: 118.416099, Result loss: 0.157343
Validation: Accuracy: 75.0%, Score loss: 127.888337, Result loss: 0.166071
Epoch 1
Train: Accuracy: 76.6%, Score loss: 118.401010, Result loss: 0.157100
Validation: Accuracy: 75.1%, Score loss: 127.880492, Result loss: 0.165969
Epoch 2
Train: Accuracy: 76.7%, Score loss: 118.373521, Result loss: 0.156441
Validation: Accuracy: 75.0%, Score loss: 127.874743, Result loss: 0.165722
Epoch 3
Train: Accuracy: 77.1%, Score loss: 118.545003, Result loss: 0.154547
Validation: Accuracy: 75.1%, Score loss: 128.064450, Result loss: 0.164978
Epoch 4
Train: Accuracy: 77.5%, Score loss: 118.988274, Result loss: 0.152649
Validation: Accuracy: 75.2%, Score loss: 128.603569, Result loss: 0.164350
Epoch 5
Train: Accuracy: 77.8%, Score loss: 119.890495, Result loss: 0.151092
Validation: Accuracy: 75.2%, Score loss: 129.660673, Result loss: 0.164154
Epoch 6
Train: Accuracy: 78.1%, Score loss: 121.145905, Result loss: 0.150126
Vali

With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [17]:
mens_tourney = pd.read_csv('data/MNCAATourneyDetailedResults.csv')
womens_tourney = pd.read_csv('data/WNCAATourneyDetailedResults.csv')
tourney = pd.concat([mens_tourney, womens_tourney])

tourney_dataset = gen_dataset(tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=batch_size)

In [18]:
test(tourney_loader, model, loss_fn, label="Tourney")

Tourney: Accuracy: 72.9%, Score loss: 143.922916, Result loss: 0.173403


When it comes to tournament results we get about 7 out of 10 results. The lower result is likely due to teams having increased pairity.

### Performance by year


In [19]:
for season in tourney.Season.unique():
    loader = DataLoader(gen_dataset(tourney[tourney.Season == season]), batch_size=batch_size)
    test(loader, model, loss_fn, label=f"{season} Tournament")

2003 Tournament: Accuracy: 67.2%, Score loss: 148.399814, Result loss: 0.207213
2004 Tournament: Accuracy: 71.9%, Score loss: 120.700810, Result loss: 0.210350
2005 Tournament: Accuracy: 74.2%, Score loss: 93.888995, Result loss: 0.185747
2006 Tournament: Accuracy: 68.0%, Score loss: 112.387519, Result loss: 0.227212
2007 Tournament: Accuracy: 81.2%, Score loss: 92.082273, Result loss: 0.153581
2008 Tournament: Accuracy: 73.4%, Score loss: 153.820765, Result loss: 0.167643
2009 Tournament: Accuracy: 77.3%, Score loss: 131.938593, Result loss: 0.165084
2010 Tournament: Accuracy: 71.7%, Score loss: 124.564780, Result loss: 0.172917
2011 Tournament: Accuracy: 68.5%, Score loss: 145.110581, Result loss: 0.179811
2012 Tournament: Accuracy: 77.7%, Score loss: 113.500898, Result loss: 0.147950
2013 Tournament: Accuracy: 70.8%, Score loss: 167.666320, Result loss: 0.181573
2014 Tournament: Accuracy: 70.0%, Score loss: 149.198077, Result loss: 0.164372
2015 Tournament: Accuracy: 77.3%, Score lo

## Final tuning
Finally we can train with the early tournament data, pre 2021, as the latter will be used in the submission and we don't want to overfit. We should freeze the embedding layer as the actual evaluation will be on teams where we don't have tourney data to train for. We only want to train the inner layers that determine whether or not a particular vector will win

tourney_train_loader = DataLoader(gen_dataset(tourney[tourney.Season < 2021]), shuffle=True, batch_size=8)
tourney_test_loader = DataLoader(gen_dataset(tourney[tourney.Season >= 2021]), batch_size=8)

for param in model.embedding.parameters():
    param.requires_grad = False
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)


for i in range(10):
    print(f"Epoch {i}")
    train(tourney_train_loader, model, loss_fn, optimizer, full_loss=False)
    test(tourney_train_loader, model, loss_fn, label="Train")
    test(tourney_test_loader, model, loss_fn, label="Test")


## Generating the submission file
### Phase 1

Write the results

In [20]:
with open('submission.csv', 'w') as f:
    f.write("ID,Pred\n")
    season=2025
    for league in ('M', 'W'):
        teams_to_test = sorted(teams[(teams.Season==season) & (teams.League==league)].TeamID.values)
        matchups = [(t1, t2) for t1 in teams_to_test for t2 in teams_to_test if t1 < t2]
        matchups_tensor = torch.Tensor([(teamMapping[(t1, season)], teamMapping[(t2, season)])
                                 for (t1, t2) in matchups]).int().to(device)
        _, predictions = model(matchups_tensor)
        for (t1, t2), pred in zip(matchups, predictions):
            f.write(f"{season}_{t1.item()}_{t2.item()},{pred.item()}\n")

Two teams canceled their 2021 season due to covid but are still in the sample submission. Add in their results

## Save the model

In [21]:
torch.save(model.state_dict(), 'model.pth')