# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os


torch.manual_seed(20250222)

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
mens = pd.read_csv('data/MRegularSeasonDetailedResults.csv')
mens['League'] = 'M'
mens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,...,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0
mean,2014.146355,70.294986,1288.25451,75.878936,1283.13883,63.888287,0.068689,26.401824,55.760242,7.347445,...,20.15979,12.073403,17.732454,10.46174,21.62565,11.409722,13.888907,6.004739,2.868185,19.30578
std,6.515929,35.772556,105.3475,10.998547,104.795432,10.848767,0.305098,4.680314,7.456374,3.11926,...,6.068136,5.344049,7.081056,4.221039,4.518197,3.724567,4.3827,2.745969,2.01905,4.553353
min,2003.0,0.0,1101.0,34.0,1101.0,20.0,0.0,10.0,26.0,0.0,...,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0
25%,2009.0,40.0,1199.0,68.0,1192.0,57.0,0.0,23.0,51.0,5.0,...,16.0,8.0,13.0,7.0,19.0,9.0,11.0,4.0,1.0,16.0
50%,2014.0,73.0,1287.0,75.0,1282.0,64.0,0.0,26.0,55.0,7.0,...,20.0,12.0,17.0,10.0,21.0,11.0,14.0,6.0,3.0,19.0
75%,2020.0,101.0,1381.0,83.0,1374.0,71.0,0.0,29.0,60.0,9.0,...,24.0,15.0,22.0,13.0,25.0,14.0,17.0,8.0,4.0,22.0
max,2025.0,132.0,1480.0,149.0,1480.0,144.0,6.0,57.0,103.0,26.0,...,59.0,48.0,65.0,36.0,49.0,31.0,41.0,22.0,18.0,45.0


In [3]:
womens = pd.read_csv('data/WRegularSeasonDetailedResults.csv')
womens['League'] = 'W'
womens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,...,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0
mean,2017.404609,69.183626,3285.116823,71.706633,3286.689554,57.242044,0.051708,25.847034,58.966574,6.276077,...,17.918413,10.507392,15.503808,11.36002,22.422122,10.935852,17.150745,7.109977,2.820839,18.19299
std,4.582659,36.157922,104.073477,11.536993,105.505327,10.960867,0.259072,4.978157,7.969144,3.127369,...,6.456006,4.936838,6.630184,4.640191,4.936106,3.805935,5.27718,3.1923,2.062848,4.556919
min,2010.0,0.0,3101.0,30.0,3101.0,11.0,0.0,9.0,30.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,3.0
25%,2013.0,36.0,3196.0,64.0,3195.0,50.0,0.0,22.0,53.0,4.0,...,13.0,7.0,11.0,8.0,19.0,8.0,13.0,5.0,1.0,15.0
50%,2017.0,73.0,3283.0,71.0,3287.0,57.0,0.0,25.0,59.0,6.0,...,17.0,10.0,15.0,11.0,22.0,11.0,17.0,7.0,2.0,18.0
75%,2022.0,101.0,3376.0,79.0,3377.0,64.0,0.0,29.0,64.0,8.0,...,22.0,14.0,20.0,14.0,26.0,13.0,20.0,9.0,4.0,21.0
max,2025.0,132.0,3480.0,140.0,3480.0,130.0,5.0,58.0,113.0,30.0,...,80.0,37.0,52.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The IDs are definitely distinct so we can combine into a single dataframe

In [4]:
data = pd.concat([mens, womens])

Get the distinct team/Years

In [5]:
teams = pd.concat([data[['WTeamID', 'Season', 'League']].rename(columns={'WTeamID': 'TeamID'}),
                   data[['LTeamID', 'Season', 'League']].rename(columns={'LTeamID': 'TeamID'})]).drop_duplicates().reset_index()

teamMapping = {(x.TeamID, x.Season): x.Index for x in teams.itertuples()}

And get the distinct programs

In [6]:
programs = teams.TeamID.drop_duplicates().reset_index()

programMapping = {x.TeamID: x.Index for x in programs.itertuples()}

Define the training data. The x's will be the indexes of two team IDs and program IDs, the y's will be the score difference.

In [7]:
data

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF,League
0,2003,10,1104,68,1328,62,N,0,27,58,...,16,22,10,22,8,18,9,2,20,M
1,2003,10,1272,70,1393,63,N,0,26,62,...,9,20,20,25,7,12,8,6,16,M
2,2003,11,1266,73,1437,61,N,0,24,58,...,14,23,31,22,9,12,2,5,23,M
3,2003,11,1296,56,1457,50,N,0,18,38,...,8,15,17,20,9,19,4,3,23,M
4,2003,11,1400,77,1208,71,N,0,30,61,...,17,27,21,15,12,10,7,1,14,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80621,2025,106,3242,63,3416,58,H,0,17,46,...,5,11,15,21,11,12,4,2,21,W
80622,2025,106,3329,68,3428,64,A,0,23,63,...,9,16,8,18,16,11,8,6,20,W
80623,2025,106,3349,72,3194,39,H,0,30,63,...,16,23,4,25,5,10,4,3,9,W
80624,2025,106,3378,70,3150,52,A,0,25,59,...,11,14,7,24,5,15,8,2,15,W


In [8]:
def gen_dataset(data):
    winning_team = data.apply(lambda x: teamMapping[(x.WTeamID, x.Season)], axis=1)
    losing_team = data.apply(lambda x: teamMapping[(x.LTeamID, x.Season)], axis=1)
    winning_program = data.apply(lambda x: programMapping[x.WTeamID], axis=1)
    losing_program = data.apply(lambda x: programMapping[x.LTeamID], axis=1)
    winning_matchups = np.stack([winning_program, winning_team,
                                 losing_program, losing_team,
                                 data.Season, data.DayNum, data.League == 'M'], axis=1)
    losing_matchups = np.stack([losing_program, losing_team,
                                winning_program, winning_team, 
                                data.Season, data.DayNum, data.League == 'M'], axis=1)
    x_tensor = torch.from_numpy(np.concatenate([winning_matchups, losing_matchups])).double()
    y_tensor = torch.from_numpy(np.concatenate([(data.WScore-data.LScore), (data.LScore-data.WScore)]).reshape((-1,1))).double()
    return TensorDataset(x_tensor, y_tensor)

In [9]:
fname = 'dataset.pt'
if os.path.isfile(fname):
    dataset=torch.load(fname, weights_only=False)
else:
    dataset = gen_dataset(data)
    torch.save(dataset, fname)

Generate the train/validation split

In [10]:
batch_size=500

generator = torch.Generator().manual_seed(20250217)
train_data, validation_data = torch.utils.data.random_split(dataset, [0.95, 0.05], generator=generator)
train_loader = DataLoader(train_data, batch_size=batch_size)
validation_loader = DataLoader(validation_data, batch_size=batch_size)

## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [11]:
class Model(nn.Module):
    def __init__(self, embedding_sizes, model_sizes, dropout):
        super(Model, self).__init__()
        p_embedding_size, t_embedding_size = embedding_sizes
        hid1, hid2 = model_sizes
        self.team_embedding = nn.Embedding(len(teams), p_embedding_size)
        self.program_embedding = nn.Embedding(len(programs), t_embedding_size)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
        self.fc1 = nn.Linear(2*p_embedding_size+2*t_embedding_size+3, hid1)
        self.fc2 = nn.Linear(hid1, hid2)
        self.score_fc = nn.Linear(hid2, 1)
        self.result_fc = nn.Linear(hid2, 1)
        self.double()

    def forward(self, x):
        program = self.program_embedding(x[:,0].int())
        team = self.team_embedding(x[:,1].int())
        opponent_program = self.program_embedding(x[:,2].int())
        opponent = self.team_embedding(x[:,3].int())
        matchup = self.dropout1(torch.cat([program, team, opponent_program, opponent, x[:,4:]], axis=1))
        hidden1 = self.dropout2(F.relu(self.fc1(matchup)))
        hidden2 = self.dropout3(F.relu(self.fc2(hidden1)))
        score = self.score_fc(hidden2)
        result = F.sigmoid(self.result_fc(hidden2))
        return score, result
        

In [12]:
model = Model(embedding_sizes=[64, 512], model_sizes=(32,32), dropout=0.1).to(device)

## Training the model

Define the training function

In [13]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)

def train(data, model, loss_fn, optimizer, full_loss=True):
    size = len(data.dataset)
    model.train()
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        pred_score, pred_result = model(x)
        actual_result = (y > 0).double()
        score_loss = loss_fn(pred_score, y)
        result_loss = loss_fn(pred_result, actual_result)
        if full_loss:
            (score_loss + 10 * result_loss).backward()
        else:
            result_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            score_loss, result_loss, current = score_loss.item(), result_loss.item(), (batch + 1) * len(x)
            print(f"result loss: {result_loss:>7f} [{current:>6d}/{size:>6d}]", end="\r")

Define the testing function

In [14]:
def test(data, model, loss_fn, label="Test"):
    size = len(data.dataset)
    num_batches = len(data)
    model.eval()
    score_loss, result_loss, correct = 0, 0, 0
    with torch.no_grad():
        for x, y in data:
            x = x.to(device)
            y = y.to(device)
            score_pred, result_pred = model(x)
            actual_result = (y > 0).double()
            score_loss += loss_fn(score_pred, y).item()
            result_loss += loss_fn(result_pred, actual_result).item()
            correct += ((result_pred >= 0.5) == (actual_result == 1)).type(torch.float).sum().item()
    score_loss /= num_batches
    result_loss /= num_batches
    correct /= size
    print(f"{label}: Accuracy: {(100*correct):>0.2f}%, Result loss: {result_loss:>8f}")

Train the model

In [15]:
n_epochs = 25
for i in range(n_epochs):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 64.56%, Result loss: 0.218214
Validation: Accuracy: 64.56%, Result loss: 0.218866
Epoch 1
Train: Accuracy: 67.74%, Result loss: 0.202124
Validation: Accuracy: 67.41%, Result loss: 0.202969
Epoch 2
Train: Accuracy: 68.83%, Result loss: 0.196994
Validation: Accuracy: 68.32%, Result loss: 0.198381
Epoch 3
Train: Accuracy: 70.11%, Result loss: 0.190910
Validation: Accuracy: 69.35%, Result loss: 0.193578
Epoch 4
Train: Accuracy: 71.17%, Result loss: 0.185372
Validation: Accuracy: 70.25%, Result loss: 0.189112
Epoch 5
Train: Accuracy: 72.26%, Result loss: 0.180383
Validation: Accuracy: 71.19%, Result loss: 0.185210
Epoch 6
Train: Accuracy: 73.02%, Result loss: 0.176026
Validation: Accuracy: 72.01%, Result loss: 0.181760
Epoch 7
Train: Accuracy: 73.63%, Result loss: 0.172762
Validation: Accuracy: 72.60%, Result loss: 0.178976
Epoch 8
Train: Accuracy: 74.30%, Result loss: 0.169587
Validation: Accuracy: 73.05%, Result loss: 0.176604
Epoch 9
Train: Accuracy: 74.70%, Resu

Fine tune with only the result

In [16]:
for i in range(25):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, full_loss=False)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 76.53%, Result loss: 0.156887
Validation: Accuracy: 75.21%, Result loss: 0.165727
Epoch 1
Train: Accuracy: 76.57%, Result loss: 0.156772
Validation: Accuracy: 75.27%, Result loss: 0.165683
Epoch 2
Train: Accuracy: 76.59%, Result loss: 0.156666
Validation: Accuracy: 75.30%, Result loss: 0.165652
Epoch 3
Train: Accuracy: 76.62%, Result loss: 0.156521
Validation: Accuracy: 75.27%, Result loss: 0.165645
Epoch 4
Train: Accuracy: 76.65%, Result loss: 0.156434
Validation: Accuracy: 75.22%, Result loss: 0.165610
Epoch 5
Train: Accuracy: 76.69%, Result loss: 0.156285
Validation: Accuracy: 75.20%, Result loss: 0.165622
Epoch 6
Train: Accuracy: 76.71%, Result loss: 0.156135
Validation: Accuracy: 75.18%, Result loss: 0.165621
Epoch 7
Train: Accuracy: 76.74%, Result loss: 0.155954
Validation: Accuracy: 75.19%, Result loss: 0.165641
Epoch 8
Train: Accuracy: 76.76%, Result loss: 0.155879
Validation: Accuracy: 75.13%, Result loss: 0.165709
Epoch 9
Train: Accuracy: 76.76%, Resu

With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [17]:
mens_tourney = pd.read_csv('data/MNCAATourneyDetailedResults.csv')
mens_tourney['League'] = 'M'
womens_tourney = pd.read_csv('data/WNCAATourneyDetailedResults.csv')
womens_tourney['League'] = 'W'
tourney = pd.concat([mens_tourney, womens_tourney])

tourney_dataset = gen_dataset(tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=batch_size, shuffle=True)

In [18]:
test(tourney_loader, model, loss_fn, label="Tourney")

Tourney: Accuracy: 73.33%, Result loss: 0.173327


When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

Train with early torney data

In [19]:
tourney_pre_2021 = gen_dataset(tourney[tourney.Season < 2021])
tourney_train_data, tourney_validation_data = torch.utils.data.random_split(tourney_pre_2021, [0.5, 0.5], generator=generator)
tourney_train_loader = DataLoader(tourney_train_data, batch_size=batch_size)
tourney_validation_loader = DataLoader(tourney_validation_data, batch_size=batch_size)

In [20]:
for param in model.team_embedding.parameters():
    param.requires_grad=False
for param in model.program_embedding.parameters():
    param.requires_grad=False

In [21]:
for i in range(8):
    print(f"Epoch {i}")
    train(tourney_train_loader, model, loss_fn, optimizer, full_loss=False)
    test(tourney_train_loader, model, loss_fn, label="Train")
    test(tourney_validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 74.04%, Result loss: 0.162338
Validation: Accuracy: 72.72%, Result loss: 0.173234
Epoch 1
Train: Accuracy: 74.38%, Result loss: 0.160778
Validation: Accuracy: 73.01%, Result loss: 0.173577
Epoch 2
Train: Accuracy: 74.90%, Result loss: 0.160015
Validation: Accuracy: 72.44%, Result loss: 0.174314
Epoch 3
Train: Accuracy: 75.36%, Result loss: 0.158639
Validation: Accuracy: 72.55%, Result loss: 0.173054
Epoch 4
Train: Accuracy: 75.64%, Result loss: 0.157272
Validation: Accuracy: 72.44%, Result loss: 0.172674
Epoch 5
Train: Accuracy: 75.76%, Result loss: 0.155690
Validation: Accuracy: 72.09%, Result loss: 0.173612
Epoch 6
Train: Accuracy: 76.10%, Result loss: 0.154674
Validation: Accuracy: 72.32%, Result loss: 0.173951
Epoch 7
Train: Accuracy: 76.16%, Result loss: 0.153974
Validation: Accuracy: 72.78%, Result loss: 0.172752


### Performance by year


In [22]:
for season in tourney.Season.unique():
    loader = DataLoader(gen_dataset(tourney[tourney.Season == season]), batch_size=batch_size)
    test(loader, model, loss_fn, label=f"{season} Tournament")

2003 Tournament: Accuracy: 70.31%, Result loss: 0.178766
2004 Tournament: Accuracy: 65.62%, Result loss: 0.178215
2005 Tournament: Accuracy: 76.56%, Result loss: 0.163526
2006 Tournament: Accuracy: 67.97%, Result loss: 0.203621
2007 Tournament: Accuracy: 80.47%, Result loss: 0.147106
2008 Tournament: Accuracy: 79.69%, Result loss: 0.160090
2009 Tournament: Accuracy: 75.78%, Result loss: 0.155783
2010 Tournament: Accuracy: 72.83%, Result loss: 0.167023
2011 Tournament: Accuracy: 71.54%, Result loss: 0.171952
2012 Tournament: Accuracy: 78.85%, Result loss: 0.147145
2013 Tournament: Accuracy: 72.31%, Result loss: 0.174178
2014 Tournament: Accuracy: 71.92%, Result loss: 0.162739
2015 Tournament: Accuracy: 81.15%, Result loss: 0.142953
2016 Tournament: Accuracy: 71.15%, Result loss: 0.180340
2017 Tournament: Accuracy: 78.08%, Result loss: 0.156623
2018 Tournament: Accuracy: 73.85%, Result loss: 0.167520
2019 Tournament: Accuracy: 75.38%, Result loss: 0.152299
2021 Tournament: Accuracy: 69.7

In [23]:
stage1_loader = DataLoader(gen_dataset(tourney[tourney.Season >= 2021]), batch_size=batch_size)
test(stage1_loader, model, loss_fn, label=f"Stage 1")

Stage 1: Accuracy: 72.79%, Result loss: 0.157121


Breaking out by league

In [24]:
for season in tourney.Season.unique():
    for league in tourney[tourney.Season == season].League.unique():
        loader = DataLoader(gen_dataset(tourney[(tourney.Season == season) & (tourney.League == league)]),
                            batch_size=batch_size)
        test(loader, model, loss_fn, label=f"{season} {league} Tournament")

2003 M Tournament: Accuracy: 70.31%, Result loss: 0.178766
2004 M Tournament: Accuracy: 65.62%, Result loss: 0.178215
2005 M Tournament: Accuracy: 76.56%, Result loss: 0.163526
2006 M Tournament: Accuracy: 67.97%, Result loss: 0.203621
2007 M Tournament: Accuracy: 80.47%, Result loss: 0.147106
2008 M Tournament: Accuracy: 79.69%, Result loss: 0.160090
2009 M Tournament: Accuracy: 75.78%, Result loss: 0.155783
2010 M Tournament: Accuracy: 69.53%, Result loss: 0.192051
2010 W Tournament: Accuracy: 76.19%, Result loss: 0.141597
2011 M Tournament: Accuracy: 62.69%, Result loss: 0.211997
2011 W Tournament: Accuracy: 80.95%, Result loss: 0.129365
2012 M Tournament: Accuracy: 71.64%, Result loss: 0.180990
2012 W Tournament: Accuracy: 86.51%, Result loss: 0.111151
2013 M Tournament: Accuracy: 69.40%, Result loss: 0.190619
2013 W Tournament: Accuracy: 75.40%, Result loss: 0.156693
2014 M Tournament: Accuracy: 63.43%, Result loss: 0.195958
2014 W Tournament: Accuracy: 80.95%, Result loss: 0.1274

## Inspect the model
First what are the sizes of the smallest input and output weights

In [25]:
print(f"Program embedding min: {model.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {model.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {model.result_fc.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.697557
Team embedding min: 3.537949
FC min: 0.021958


Calculate the average gradient for each input feature

In [26]:
def feature_eval(model, data):
    model.eval()
    team_grads = torch.zeros(model.team_embedding.embedding_dim).to(device)
    program_grads = torch.zeros(model.program_embedding.embedding_dim).to(device)
    stats_grads = torch.zeros(3).to(device)
    size = len(data.dataset)
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        x.requires_grad = True
        _, pred_result = model(x)
        team_grads += torch.autograd.grad(model(x)[1].mean(), model.team_embedding.parameters())[0].sum(axis=0)
        program_grads += torch.autograd.grad(model(x)[1].mean(), model.program_embedding.parameters())[0].sum(axis=0)
        stats_grads += torch.autograd.grad(model(x)[1].mean(), x)[0].sum(axis=0)[4:]
    return program_grads/size, team_grads/size, stats_grads

In [27]:
for param in model.team_embedding.parameters():
    param.requires_grad=True
for param in model.program_embedding.parameters():
    param.requires_grad=True

In [28]:
program_weights, team_weights, stats_weights = feature_eval(model, tourney_loader)

In [29]:
program_weights.abs().sum(), team_weights.abs().sum()

(tensor(0.0005, device='cuda:0'), tensor(6.8373e-05, device='cuda:0'))

In [30]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")

Year:	-0.000047
Game:	0.000179
League:	-0.015162


## Generating the submission file
### Phase 2

Write the results

In [31]:
with open('submission.csv', 'w') as f:
    f.write("ID,Pred\n")
    season=2025
    for league in ('M', 'W'):
        teams_to_test = sorted(teams[(teams.Season==season) & (teams.League==league)].TeamID.values)
        matchups = [(t1, t2) for t1 in teams_to_test for t2 in teams_to_test if t1 < t2]
        matchups_tensor = torch.Tensor(np.array(
            [[programMapping[t1], teamMapping[(t1, season)],
              programMapping[t2], teamMapping[(t2, season)],
              season, 140, league == 'M']
             for (t1, t2) in matchups])).int().to(device)
        _, predictions = model(matchups_tensor)
        for (t1, t2), pred in zip(matchups, predictions):
            f.write(f"{season}_{t1.item()}_{t2.item()},{pred.item()}\n")

## Save the model

In [32]:
torch.save(model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [33]:
class ModeratedModel:
    def __init__(self, model, weight):
        self.model = model
        self.weight = weight

    def eval(self):
        pass

    def __call__(self, x):
        scores, model_score = self.model(x)
        neutral = torch.Tensor(np.array([0.5]*len(model_score)).reshape((-1,1))).to(device)
        return scores, model_score * self.weight + neutral * (1-self.weight)


In [34]:
moderated = ModeratedModel(model, 0.75)

In [35]:
for season in tourney.Season.unique():
    loader = DataLoader(gen_dataset(tourney[tourney.Season == season]), batch_size=batch_size)
    test(loader, moderated, loss_fn, label=f"{season} Tournament")

2003 Tournament: Accuracy: 70.31%, Result loss: 0.184599
2004 Tournament: Accuracy: 65.62%, Result loss: 0.184035
2005 Tournament: Accuracy: 76.56%, Result loss: 0.171600
2006 Tournament: Accuracy: 67.97%, Result loss: 0.203480
2007 Tournament: Accuracy: 80.47%, Result loss: 0.161862
2008 Tournament: Accuracy: 79.69%, Result loss: 0.169501
2009 Tournament: Accuracy: 75.78%, Result loss: 0.166972
2010 Tournament: Accuracy: 72.83%, Result loss: 0.172833
2011 Tournament: Accuracy: 71.54%, Result loss: 0.175553
2012 Tournament: Accuracy: 78.85%, Result loss: 0.158428
2013 Tournament: Accuracy: 72.31%, Result loss: 0.178191
2014 Tournament: Accuracy: 71.92%, Result loss: 0.169267
2015 Tournament: Accuracy: 81.15%, Result loss: 0.153645
2016 Tournament: Accuracy: 71.15%, Result loss: 0.181665
2017 Tournament: Accuracy: 78.08%, Result loss: 0.162827
2018 Tournament: Accuracy: 73.85%, Result loss: 0.172216
2019 Tournament: Accuracy: 75.38%, Result loss: 0.160176
2021 Tournament: Accuracy: 69.7

## Dig into 2023 results

In [36]:
loader = DataLoader(gen_dataset(mens_tourney[mens_tourney.Season == 2023]))

x, y = loader.dataset.tensors

preds = model(x.to(device))

In [37]:
mteams = pd.read_csv('data/MTeams.csv').set_index('TeamID')
wteams = pd.read_csv('data/WTeams.csv').set_index('TeamID')
allteams = pd.concat([mteams, wteams])

In [38]:
mens_seeds = pd.read_csv('data/MNCAATourneySeeds.csv')
womens_seeds = pd.read_csv('data/WNCAATourneySeeds.csv')
seeds = pd.concat([mens_seeds, womens_seeds]).set_index(['Season', 'TeamID'])

In [39]:
def upset(season, winner, loser):
    winner_seed = seeds.loc[season, winner].Seed
    loser_seed = seeds.loc[season, loser].Seed
    return winner_seed[1:3] > loser_seed[1:3]

In [40]:
t_2023 = pd.DataFrame({'winner_name': [mteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [mteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual_score': np.array(y.tolist()).reshape([-1]),
                       'actual': np.array((y>0).tolist()).reshape([-1]),
                       'predicted': np.array(preds[1].tolist()).reshape([-1])}).iloc[:67]

In [41]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual_score,actual,predicted
23,F Dickinson,Purdue,1192,1345,5.0,True,0.007502
15,Princeton,Arizona,1343,1112,4.0,True,0.136717
8,Furman,Virginia,1202,1438,1.0,True,0.165308
37,Arkansas,Kansas,1116,1242,1.0,True,0.24324
58,San Diego St,Alabama,1361,1104,7.0,True,0.258688
50,Michigan St,Marquette,1277,1266,9.0,True,0.291212
39,Princeton,Missouri,1343,1281,15.0,True,0.306715
57,Miami FL,Houston,1274,1222,14.0,True,0.310092
53,FL Atlantic,Tennessee,1194,1397,7.0,True,0.374292
62,Miami FL,Texas,1274,1400,7.0,True,0.399653


The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a .75% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 14% and 17% chances of happening respectively.

In [42]:
t_2023['Upset'] = [upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [43]:
t_2023[t_2023.Upset].predicted.mean()

np.float64(0.3523974131813644)

On average the upsets had a 35% chance of happening

In [44]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,winner_name,loser_name,winner,loser,actual_score,actual,predicted,Upset
6,Auburn,Iowa,1120,1234,8.0,True,0.547021,True
49,Miami FL,Indiana,1274,1231,16.0,True,0.537359,True


I correctly predicted 2 upsets, though all were closely ranked

In [45]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual_score,actual,predicted,Upset
12,Missouri,Utah St,1281,1429,11.0,True,0.416491,False
5,Arkansas,Illinois,1116,1228,10.0,True,0.456539,False
31,Michigan St,USC,1277,1425,10.0,True,0.470366,False
0,Pittsburgh,Mississippi St,1338,1280,1.0,True,0.492005,False


I also incorrectly predicted 4 upsets

Looking at all the tourneys

In [46]:
x, y = tourney_loader.dataset.tensors
preds = model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [allteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [allteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual_score': np.array(y.tolist()).reshape([-1]),
                           'actual': np.array((y>0).tolist()).reshape([-1]),
                           'predicted': np.array(preds[1].tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual]
tourney_df['Upset'] = [upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [47]:
len(tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5) & (tourney_df.season > 2020)].sort_values('predicted', ascending=False))

19

In [48]:
len(tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5) & (tourney_df.season > 2020)].sort_values('predicted'))

30

Overall I predicted 19 upsets correctly, and 30 incorrectly

## Predicting by seeds
What if I predict just using the seeds?

In [49]:
seeded_tourney = tourney.join(seeds, on=['Season', 'WTeamID']).join(seeds, on=['Season', 'LTeamID'], rsuffix='L')

In [50]:
tourney['WSeed'] = seeded_tourney.Seed.map(lambda x: int(x[1:3]))
tourney['LSeed'] = seeded_tourney.SeedL.map(lambda x: int(x[1:3]))
tourney['SeedDiff'] = tourney.WSeed - tourney.LSeed

In [51]:
seed_diff_counts = tourney[tourney.Season < 2021].SeedDiff.value_counts()

In [52]:
odds = {0: 0.5}
for diff in range(1, 16):
    if diff in seed_diff_counts:
        lower_wins = seed_diff_counts[diff]
        higher_wins = seed_diff_counts[-diff]
        odds[diff] = higher_wins/(higher_wins + lower_wins)
        odds[-diff] = lower_wins/(higher_wins + lower_wins)
    else:
        odds[diff] = 1
        odds[-diff] = 0
        

In [53]:
tourney[tourney.Season < 2021].SeedDiff.map(lambda x: odds[x]**2).mean()

np.float64(0.17484696016358534)

This results in a test Brier score of about 0.17.

## Hybrid Model
Building a model using the neural net and seeds

In [54]:
class HybridModel(object):
    def __init__(self, models, weights):
        self.models = models
        self.weights = weights

    def eval(self):
        pass

    def __call__(self, x):
        results = torch.zeros(len(x)).reshape([-1,1]).to(device)
        scores = torch.zeros(len(x)).reshape([-1,1]).to(device)
        for model, weight in zip(self.models, self.weights):
            score, result = model(x)
            results += weight * result
            scores += weight * score
        return scores, results

In [55]:
class SeedModel(object):
    def __init__(self, odds, seeds):
        self.odds = odds
        self.seeds = seeds

    def eval(self):
        pass

    def seed(self, season, team):
        if (season, team) in self.seeds.index:
            return int(self.seeds.loc[season, team].Seed[1:3])
        else:
            return -1

    def win_odds(self, team1, team2):
        if team1 == -1:
            if team2 == -1:
                return 0.5
            return 0
        if team2 == -1:
            return 1
        return self.odds[team1-team2]
        
    
    def __call__(self, x):
        team_1 = programs.loc[x[:,0].int().cpu()].TeamID
        team_2 = programs.loc[x[:,2].int().cpu()].TeamID
        season = x[:,4].int().cpu()
        team_1_seed = [self.seed(s, t) for s,t in np.stack([season, team_1], axis=1)]
        team_2_seed = [self.seed(s, t) for s,t in np.stack([season, team_2], axis=1)]
        scores = torch.zeros(len(x)).to(device).reshape([-1,1]).to(device)
        results = torch.Tensor([self.win_odds(t1, t2) for t2, t1 in
                                zip(team_1_seed, team_2_seed)]).to(device).reshape([-1,1])
        return scores, results
        

In [56]:
seed_model = SeedModel(odds, seeds)
test(stage1_loader, seed_model, loss_fn, label=f"Seeds")

Seeds: Accuracy: 72.03%, Result loss: 0.167004


In [57]:
test(stage1_loader, model, loss_fn, label="NN")

NN: Accuracy: 72.79%, Result loss: 0.157121


In [77]:
hybrid = HybridModel([model, seed_model], [0.9, 0.1])

In [78]:
test(stage1_loader, hybrid, loss_fn, label=f"Hybrid")

Hybrid: Accuracy: 72.88%, Result loss: 0.157071


They hybrid model outperforms both individual models

In [60]:
for season in range(2021, 2025):
    for league in ['M', 'W']:
        loader = DataLoader(gen_dataset(tourney[(tourney.Season == season) & (tourney.League == league)]),
                            batch_size=batch_size)
        test(loader, hybrid, loss_fn, label=f"{season} {league} Tournament")

2021 M Tournament: Accuracy: 64.39%, Result loss: 0.209146
2021 W Tournament: Accuracy: 76.19%, Result loss: 0.154484
2022 M Tournament: Accuracy: 66.42%, Result loss: 0.210576
2022 W Tournament: Accuracy: 79.10%, Result loss: 0.143539
2023 M Tournament: Accuracy: 69.40%, Result loss: 0.210446
2023 W Tournament: Accuracy: 73.88%, Result loss: 0.166499
2024 M Tournament: Accuracy: 69.40%, Result loss: 0.194093
2024 W Tournament: Accuracy: 85.82%, Result loss: 0.118828


## Generate a bracket

In [61]:
mens_slots = pd.read_csv('data/MNCAATourneySlots.csv').set_index(['Season', 'Slot'])
womens_slots = pd.read_csv('data/WNCAATourneySlots.csv').set_index(['Season', 'Slot'])
mens_tourney_seeds = mens_seeds.set_index(['Season', 'Seed'])
womens_tourney_seeds = womens_seeds.set_index(['Season', 'Seed'])

In [62]:
mens_schedule = mens_slots.\
    join(mens_tourney_seeds, on=['Season', 'StrongSeed']).\
    join(mens_tourney_seeds, on=['Season', 'WeakSeed'], rsuffix='2')
womens_schedule = mens_slots.\
    join(womens_tourney_seeds, on=['Season', 'StrongSeed']).\
    join(womens_tourney_seeds, on=['Season', 'WeakSeed'], rsuffix='2')

In [63]:
def model_odds(season, league, model):
    teams_to_test = sorted(teams[(teams.Season==season) & (teams.League==league)].TeamID.values)
    matchups = [(t1, t2) for t1 in teams_to_test for t2 in teams_to_test if t1 != t2]
    matchups_tensor = torch.Tensor(np.array(
        [[programMapping[t1], teamMapping[(t1, season)],
          programMapping[t2], teamMapping[(t2, season)],
          season, 140, league == 'M']
         for (t1, t2) in matchups])).int().to(device)
    _, predictions = model(matchups_tensor)
    return {(int(t1), int(t2)): pred.item() for  (t1, t2), pred in zip(matchups, predictions)}

In [64]:
def gen_bracket(schedule, odds):
    schedule = schedule.copy()
    schedule.insert(len(schedule.columns), 'Winner', -1)
    schedule.insert(len(schedule.columns), 'P', -1.0)
    i=0
    while sum(schedule.Winner < 0) and i <= 10:
        i+=1
        games = schedule[(schedule.Winner < 0) & schedule.TeamID.notna() & schedule.TeamID2.notna()][['TeamID', 'TeamID2']]
        for slot, t1, t2 in games.itertuples():
            p = odds[(t1, t2)]
            schedule.loc[slot, 'P'] = p
            winner = t1 if p > 0.5 else t2
            schedule.loc[slot, 'Winner'] = winner
            schedule.loc[schedule.StrongSeed == slot, 'TeamID'] = winner
            schedule.loc[schedule.WeakSeed == slot, 'TeamID2'] = winner
    return schedule

In [65]:
m_odds = model_odds(2024, 'M', hybrid)

In [66]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(gen_bracket(mens_schedule.loc[2024,:], m_odds).join(allteams, on='Winner')[['Winner', 'TeamName']])

      Winner        TeamName
Slot                        
R1W1    1163     Connecticut
R1W2    1235         Iowa St
R1W3    1228        Illinois
R1W4    1120          Auburn
R1W5    1361    San Diego St
R1W6    1140             BYU
R1W7    1450   Washington St
R1W8    1194     FL Atlantic
R1X1    1314  North Carolina
R1X2    1112         Arizona
R1X3    1124          Baylor
R1X4    1104         Alabama
R1X5    1388    St Mary's CA
R1X6    1307      New Mexico
R1X7    1173          Dayton
R1X8    1277     Michigan St
R1Y1    1345          Purdue
R1Y2    1397       Tennessee
R1Y3    1166       Creighton
R1Y4    1242          Kansas
R1Y5    1211         Gonzaga
R1Y6    1332          Oregon
R1Y7    1400           Texas
R1Y8    1429         Utah St
R1Z1    1222         Houston
R1Z2    1266       Marquette
R1Z3    1246        Kentucky
R1Z4    1181            Duke
R1Z5    1458       Wisconsin
R1Z6    1403      Texas Tech
R1Z7    1196         Florida
R1Z8    1304        Nebraska
R2W1    1163  