# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")
torch.manual_seed(20250222)

Using cuda device


<torch._C.Generator at 0x71df71d3d530>

## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
mens = pd.read_csv('data/MRegularSeasonDetailedResults.csv')
mens['League'] = 'M'
mens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,...,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0
mean,2014.146355,70.294986,1288.25451,75.878936,1283.13883,63.888287,0.068689,26.401824,55.760242,7.347445,...,20.15979,12.073403,17.732454,10.46174,21.62565,11.409722,13.888907,6.004739,2.868185,19.30578
std,6.515929,35.772556,105.3475,10.998547,104.795432,10.848767,0.305098,4.680314,7.456374,3.11926,...,6.068136,5.344049,7.081056,4.221039,4.518197,3.724567,4.3827,2.745969,2.01905,4.553353
min,2003.0,0.0,1101.0,34.0,1101.0,20.0,0.0,10.0,26.0,0.0,...,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0
25%,2009.0,40.0,1199.0,68.0,1192.0,57.0,0.0,23.0,51.0,5.0,...,16.0,8.0,13.0,7.0,19.0,9.0,11.0,4.0,1.0,16.0
50%,2014.0,73.0,1287.0,75.0,1282.0,64.0,0.0,26.0,55.0,7.0,...,20.0,12.0,17.0,10.0,21.0,11.0,14.0,6.0,3.0,19.0
75%,2020.0,101.0,1381.0,83.0,1374.0,71.0,0.0,29.0,60.0,9.0,...,24.0,15.0,22.0,13.0,25.0,14.0,17.0,8.0,4.0,22.0
max,2025.0,132.0,1480.0,149.0,1480.0,144.0,6.0,57.0,103.0,26.0,...,59.0,48.0,65.0,36.0,49.0,31.0,41.0,22.0,18.0,45.0


In [3]:
womens = pd.read_csv('data/WRegularSeasonDetailedResults.csv')
womens['League'] = 'W'
womens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,...,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0
mean,2017.404609,69.183626,3285.116823,71.706633,3286.689554,57.242044,0.051708,25.847034,58.966574,6.276077,...,17.918413,10.507392,15.503808,11.36002,22.422122,10.935852,17.150745,7.109977,2.820839,18.19299
std,4.582659,36.157922,104.073477,11.536993,105.505327,10.960867,0.259072,4.978157,7.969144,3.127369,...,6.456006,4.936838,6.630184,4.640191,4.936106,3.805935,5.27718,3.1923,2.062848,4.556919
min,2010.0,0.0,3101.0,30.0,3101.0,11.0,0.0,9.0,30.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,3.0
25%,2013.0,36.0,3196.0,64.0,3195.0,50.0,0.0,22.0,53.0,4.0,...,13.0,7.0,11.0,8.0,19.0,8.0,13.0,5.0,1.0,15.0
50%,2017.0,73.0,3283.0,71.0,3287.0,57.0,0.0,25.0,59.0,6.0,...,17.0,10.0,15.0,11.0,22.0,11.0,17.0,7.0,2.0,18.0
75%,2022.0,101.0,3376.0,79.0,3377.0,64.0,0.0,29.0,64.0,8.0,...,22.0,14.0,20.0,14.0,26.0,13.0,20.0,9.0,4.0,21.0
max,2025.0,132.0,3480.0,140.0,3480.0,130.0,5.0,58.0,113.0,30.0,...,80.0,37.0,52.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The IDs are definitely distinct so we can combine into a single dataframe

In [4]:
data = pd.concat([mens, womens])

Get the distinct team/Years

In [5]:
teams = pd.concat([data[['WTeamID', 'Season', 'League']].rename(columns={'WTeamID': 'TeamID'}),
                   data[['LTeamID', 'Season', 'League']].rename(columns={'LTeamID': 'TeamID'})]).drop_duplicates().reset_index()

teamMapping = {(x.TeamID, x.Season): x.Index for x in teams.itertuples()}

And get the distinct programs

In [6]:
programs = teams.TeamID.drop_duplicates().reset_index()

programMapping = {x.TeamID: x.Index for x in programs.itertuples()}

Define the training data. The x's will be the indexes of two team IDs and their average stats, the y's will be the score difference.

In [7]:
winningStats = data[['WTeamID', 'Season', 'WScore', 'WFGM', 'WFGA', 'WFGM3', 'WFGA3', 
                    'WFTM', 'WFTA', 'WOR', 'WDR', 'WAst', 'WTO', 'WStl', 'WBlk', 'WPF']]
winningStats.columns=['TeamID', 'Season', 'Score', 'FGM', 'FGA', 'FGM3', 'FGA3', 
                      'FTM', 'FTA', 'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF']
losingStats = data[['LTeamID', 'Season', 'LScore', 'LFGM', 'LFGA', 'LFGM3', 'LFGA3', 
                             'LFTM', 'LFTA', 'LOR', 'LDR', 'LAst', 'LTO', 'LStl', 'LBlk', 'LPF']]
losingStats.columns=['TeamID', 'Season', 'Score', 'FGM', 'FGA', 'FGM3', 'FGA3', 
                     'FTM', 'FTA', 'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF']
teamStats = pd.concat([winningStats, losingStats]).groupby(['TeamID', 'Season']).mean()

In [8]:
data

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF,League
0,2003,10,1104,68,1328,62,N,0,27,58,...,16,22,10,22,8,18,9,2,20,M
1,2003,10,1272,70,1393,63,N,0,26,62,...,9,20,20,25,7,12,8,6,16,M
2,2003,11,1266,73,1437,61,N,0,24,58,...,14,23,31,22,9,12,2,5,23,M
3,2003,11,1296,56,1457,50,N,0,18,38,...,8,15,17,20,9,19,4,3,23,M
4,2003,11,1400,77,1208,71,N,0,30,61,...,17,27,21,15,12,10,7,1,14,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80621,2025,106,3242,63,3416,58,H,0,17,46,...,5,11,15,21,11,12,4,2,21,W
80622,2025,106,3329,68,3428,64,A,0,23,63,...,9,16,8,18,16,11,8,6,20,W
80623,2025,106,3349,72,3194,39,H,0,30,63,...,16,23,4,25,5,10,4,3,9,W
80624,2025,106,3378,70,3150,52,A,0,25,59,...,11,14,7,24,5,15,8,2,15,W


In [9]:
def gen_dataset(data):
    winning_team = data.apply(lambda x: teamMapping[(x.WTeamID, x.Season)], axis=1)
    losing_team = data.apply(lambda x: teamMapping[(x.LTeamID, x.Season)], axis=1)
    winning_program = data.apply(lambda x: programMapping[x.WTeamID], axis=1)
    losing_program = data.apply(lambda x: programMapping[x.LTeamID], axis=1)
    winner_stats = data.apply(lambda x: teamStats.loc[(x.WTeamID, x.Season)], axis=1)
    loser_stats = data.apply(lambda x: teamStats.loc[(x.LTeamID, x.Season)], axis=1)
    winning_matchups = np.concatenate([np.stack([winning_program, winning_team,
                                                 losing_program, losing_team,
                                                 data.Season, data.DayNum, data.League == 'M'], axis=1),
                                       winner_stats, loser_stats], axis=1)
    losing_matchups = np.concatenate([np.stack([losing_program, losing_team,
                                                winning_program, winning_team,
                                                data.Season, data.DayNum, data.League == 'M'], axis=1),
                                      loser_stats, winner_stats], axis=1)
    x_tensor = torch.from_numpy(np.concatenate([winning_matchups, losing_matchups])).double()
    y_tensor = torch.from_numpy(np.concatenate([(data.WScore-data.LScore), (data.LScore-data.WScore)]).reshape((-1,1))).double()
    return TensorDataset(x_tensor, y_tensor)

In [10]:
fname = 'dataset.pt'
if os.path.isfile(fname):
    dataset=torch.load(fname, weights_only=False)
else:
    dataset = gen_dataset(data)
    torch.save(dataset, fname)

Generate the train/validation split

In [11]:
batch_size=200

generator = torch.Generator().manual_seed(20250217)
train_data, validation_data = torch.utils.data.random_split(dataset, [0.95, 0.05], generator=generator)
train_loader = DataLoader(train_data, batch_size=batch_size)
validation_loader = DataLoader(validation_data, batch_size=batch_size)

## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [12]:
class Model(nn.Module):
    def __init__(self, embedding_sizes, model_sizes, dropout, stats_size=14):
        super(Model, self).__init__()
        p_embedding_size, t_embedding_size = embedding_sizes
        hid1, hid2 = model_sizes
        self.team_embedding = nn.Embedding(len(teams), p_embedding_size)
        self.program_embedding = nn.Embedding(len(programs), t_embedding_size)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
        self.fc1 = nn.Linear(2*p_embedding_size+2*t_embedding_size+2*stats_size+3, hid1)
        self.fc2 = nn.Linear(hid1, hid2)
        self.score_fc = nn.Linear(hid2, 1)
        self.result_fc = nn.Linear(hid2, 1)
        self.double()

    def forward(self, x):
        program = self.program_embedding(x[:,0].int())
        team = self.team_embedding(x[:,1].int())
        opponent_program = self.program_embedding(x[:,2].int())
        opponent = self.team_embedding(x[:,3].int())
        matchup = self.dropout1(torch.cat([program, team, opponent_program, opponent, x[:,4:]], axis=1))
        hidden1 = self.dropout2(F.relu(self.fc1(matchup)))
        hidden2 = self.dropout3(F.relu(self.fc2(hidden1)))
        score = self.score_fc(hidden2)
        result = F.sigmoid(self.result_fc(hidden2))
        return score, result
        

In [13]:
model = Model(embedding_sizes=[96, 512], model_sizes=(256,128), dropout=0.25).to(device)

## Training the model

Define the training function

In [14]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)

def train(data, model, loss_fn, optimizer, full_loss=True):
    size = len(data.dataset)
    model.train()
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        pred_score, pred_result = model(x)
        actual_result = (y > 0).double()
        score_loss = loss_fn(pred_score, y)
        result_loss = loss_fn(pred_result, actual_result)
        if full_loss:
            (score_loss + 10 * result_loss).backward()
        else:
            result_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            score_loss, result_loss, current = score_loss.item(), result_loss.item(), (batch + 1) * len(x)
            print(f"result loss: {result_loss:>7f} [{current:>6d}/{size:>6d}]", end="\r")

Define the testing function

In [15]:
def test(data, model, loss_fn, label="Test"):
    size = len(data.dataset)
    num_batches = len(data)
    model.eval()
    score_loss, result_loss, correct = 0, 0, 0
    with torch.no_grad():
        for x, y in data:
            x = x.to(device)
            y = y.to(device)
            score_pred, result_pred = model(x)
            actual_result = (y > 0).double()
            score_loss += loss_fn(score_pred, y).item()
            result_loss += loss_fn(result_pred, actual_result).item()
            correct += ((result_pred >= 0.5) == (actual_result == 1)).type(torch.float).sum().item()
    score_loss /= num_batches
    result_loss /= num_batches
    correct /= size
    print(f"{label}: Accuracy: {(100*correct):>0.2f}%, Result loss: {result_loss:>8f}")

Train the model

In [16]:
n_epochs = 24
for i in range(n_epochs):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 68.20%, Result loss: 0.201606
Validation: Accuracy: 68.37%, Result loss: 0.202036
Epoch 1
Train: Accuracy: 70.01%, Result loss: 0.193072
Validation: Accuracy: 70.07%, Result loss: 0.193934
Epoch 2
Train: Accuracy: 70.60%, Result loss: 0.189818
Validation: Accuracy: 70.35%, Result loss: 0.191350
Epoch 3
Train: Accuracy: 72.33%, Result loss: 0.181714
Validation: Accuracy: 71.55%, Result loss: 0.184688
Epoch 4
Train: Accuracy: 73.51%, Result loss: 0.175389
Validation: Accuracy: 72.74%, Result loss: 0.179680
Epoch 5
Train: Accuracy: 74.17%, Result loss: 0.171133
Validation: Accuracy: 73.30%, Result loss: 0.176475
Epoch 6
Train: Accuracy: 74.71%, Result loss: 0.168548
Validation: Accuracy: 73.70%, Result loss: 0.174207
Epoch 7
Train: Accuracy: 75.13%, Result loss: 0.166341
Validation: Accuracy: 73.95%, Result loss: 0.172615
Epoch 8
Train: Accuracy: 75.44%, Result loss: 0.164327
Validation: Accuracy: 74.07%, Result loss: 0.170951
Epoch 9
Train: Accuracy: 75.63%, Resu

Fine tune with only the result

In [17]:
for i in range(12):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, full_loss=False)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 76.56%, Result loss: 0.157379
Validation: Accuracy: 75.14%, Result loss: 0.165382
Epoch 1
Train: Accuracy: 76.53%, Result loss: 0.157060
Validation: Accuracy: 75.06%, Result loss: 0.165345
Epoch 2
Train: Accuracy: 76.55%, Result loss: 0.156856
Validation: Accuracy: 75.11%, Result loss: 0.165278
Epoch 3
Train: Accuracy: 76.61%, Result loss: 0.156858
Validation: Accuracy: 75.01%, Result loss: 0.165393
Epoch 4
Train: Accuracy: 76.64%, Result loss: 0.156417
Validation: Accuracy: 74.98%, Result loss: 0.165491
Epoch 5
Train: Accuracy: 76.69%, Result loss: 0.156204
Validation: Accuracy: 75.12%, Result loss: 0.165115
Epoch 6
Train: Accuracy: 76.90%, Result loss: 0.155244
Validation: Accuracy: 75.18%, Result loss: 0.164754
Epoch 7
Train: Accuracy: 77.08%, Result loss: 0.154366
Validation: Accuracy: 75.34%, Result loss: 0.164566
Epoch 8
Train: Accuracy: 77.22%, Result loss: 0.153076
Validation: Accuracy: 75.17%, Result loss: 0.164490
Epoch 9
Train: Accuracy: 77.47%, Resu

With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [18]:
mens_tourney = pd.read_csv('data/MNCAATourneyDetailedResults.csv')
mens_tourney['League'] = 'M'
womens_tourney = pd.read_csv('data/WNCAATourneyDetailedResults.csv')
womens_tourney['League'] = 'W'
tourney = pd.concat([mens_tourney, womens_tourney])

tourney_dataset = gen_dataset(tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=batch_size)

In [19]:
test(tourney_loader, model, loss_fn, label="Tourney")

Tourney: Accuracy: 73.48%, Result loss: 0.169338


When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

### Performance by year


In [20]:
for season in tourney.Season.unique():
    loader = DataLoader(gen_dataset(tourney[tourney.Season == season]), batch_size=batch_size)
    test(loader, model, loss_fn, label=f"{season} Tournament")

2003 Tournament: Accuracy: 66.41%, Result loss: 0.182130
2004 Tournament: Accuracy: 67.97%, Result loss: 0.188122
2005 Tournament: Accuracy: 74.22%, Result loss: 0.172247
2006 Tournament: Accuracy: 67.97%, Result loss: 0.212989
2007 Tournament: Accuracy: 78.12%, Result loss: 0.149186
2008 Tournament: Accuracy: 77.34%, Result loss: 0.165814
2009 Tournament: Accuracy: 73.44%, Result loss: 0.162760
2010 Tournament: Accuracy: 72.44%, Result loss: 0.165702
2011 Tournament: Accuracy: 73.46%, Result loss: 0.161497
2012 Tournament: Accuracy: 78.85%, Result loss: 0.140557
2013 Tournament: Accuracy: 72.69%, Result loss: 0.171662
2014 Tournament: Accuracy: 70.38%, Result loss: 0.152615
2015 Tournament: Accuracy: 80.38%, Result loss: 0.138881
2016 Tournament: Accuracy: 74.23%, Result loss: 0.167111
2017 Tournament: Accuracy: 75.77%, Result loss: 0.155323
2018 Tournament: Accuracy: 71.15%, Result loss: 0.166526
2019 Tournament: Accuracy: 74.23%, Result loss: 0.136441
2021 Tournament: Accuracy: 69.3

In [21]:
stage1_loader = DataLoader(gen_dataset(tourney[tourney.Season >= 2021]), batch_size=batch_size)
test(stage1_loader, model, loss_fn, label=f"Stage 1")

Stage 1: Accuracy: 72.41%, Result loss: 0.170773


## Inspect the model
First what are the sizes of the smallest input and output weights

In [22]:
print(f"Program embedding min: {model.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {model.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {model.result_fc.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Program embedding min: 2.548204
Team embedding min: 3.588614
FC min: 0.000567


Calculate the average gradient for each input feature

In [23]:
def feature_eval(model, data):
    model.eval()
    team_grads = torch.zeros(model.team_embedding.embedding_dim).to(device)
    program_grads = torch.zeros(model.program_embedding.embedding_dim).to(device)
    stats_grads = torch.zeros(31).to(device)
    size = len(data.dataset)
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        x.requires_grad = True
        _, pred_result = model(x)
        team_grads += torch.autograd.grad(model(x)[1].mean(), model.team_embedding.parameters())[0].sum(axis=0)
        program_grads += torch.autograd.grad(model(x)[1].mean(), model.program_embedding.parameters())[0].sum(axis=0)
        stats_grads += torch.autograd.grad(model(x)[1].mean(), x)[0].sum(axis=0)[4:]
    return program_grads/size, team_grads/size, stats_grads

In [24]:
program_weights, team_weights, stats_weights = feature_eval(model, tourney_loader)

In [25]:
program_weights.abs().sum(), team_weights.abs().sum()

(tensor(0.0008, device='cuda:0'), tensor(0.0002, device='cuda:0'))

In [26]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")
for stat, weight in zip([f"W{stat}" for stat in teamStats] + [f"L{stat}" for stat in teamStats], stats_weights[3:]):
    print(f"{stat}:\t{weight:>4f}")

Year:	0.000363
Game:	-0.002085
League:	-0.010175
WScore:	0.022754
WFGM:	0.002927
WFGA:	0.000008
WFGM3:	0.007265
WFGA3:	-0.000031
WFTM:	0.009684
WFTA:	0.002843
WOR:	0.007027
WDR:	0.003464
WAst:	0.003714
WTO:	0.001160
WStl:	0.010560
WBlk:	0.028754
WPF:	-0.001406
LScore:	-0.024697
LFGM:	-0.004217
LFGA:	0.000239
LFGM3:	-0.005283
LFGA3:	-0.003114
LFTM:	-0.011699
LFTA:	-0.007395
LOR:	-0.002290
LDR:	-0.002766
LAst:	-0.009205
LTO:	0.003929
LStl:	-0.002678
LBlk:	-0.024199
LPF:	0.003347


## Generating the submission file
### Phase 2

Write the results

In [27]:
with open('submission.csv', 'w') as f:
    f.write("ID,Pred\n")
    season=2025
    for league in ('M', 'W'):
        teams_to_test = sorted(teams[(teams.Season==season) & (teams.League==league)].TeamID.values)
        matchups = [(t1, t2) for t1 in teams_to_test for t2 in teams_to_test if t1 < t2]
        matchups_tensor = torch.Tensor(np.array(
            [np.concat([[programMapping[t1], teamMapping[(t1, season)],
                         programMapping[t2], teamMapping[(t2, season)],
                        season, 140, league == 'M'],
                        teamStats.loc[(t1, season)].values,
                        teamStats.loc[(t2, season)].values])
             for (t1, t2) in matchups])).int().to(device)
        _, predictions = model(matchups_tensor)
        for (t1, t2), pred in zip(matchups, predictions):
            f.write(f"{season}_{t1.item()}_{t2.item()},{pred.item()}\n")

## Save the model

In [28]:
torch.save(model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [29]:
class ModeratedModel:
    def __init__(self, model, weight):
        self.model = model
        self.weight = weight

    def eval(self):
        pass

    def __call__(self, x):
        scores, model_score = self.model(x)
        neutral = torch.Tensor(np.array([0.5]*len(model_score)).reshape((-1,1))).to(device)
        return scores, model_score * self.weight + neutral * (1-self.weight)


In [30]:
moderated = ModeratedModel(model, 0.75)

In [31]:
for season in tourney.Season.unique():
    loader = DataLoader(gen_dataset(tourney[tourney.Season == season]), batch_size=batch_size)
    test(loader, moderated, loss_fn, label=f"{season} Tournament")

2003 Tournament: Accuracy: 66.41%, Result loss: 0.185746
2004 Tournament: Accuracy: 67.97%, Result loss: 0.190960
2005 Tournament: Accuracy: 74.22%, Result loss: 0.176864
2006 Tournament: Accuracy: 67.97%, Result loss: 0.209425
2007 Tournament: Accuracy: 78.12%, Result loss: 0.161510
2008 Tournament: Accuracy: 77.34%, Result loss: 0.172107
2009 Tournament: Accuracy: 73.44%, Result loss: 0.170900
2010 Tournament: Accuracy: 72.44%, Result loss: 0.167928
2011 Tournament: Accuracy: 73.46%, Result loss: 0.164532
2012 Tournament: Accuracy: 78.85%, Result loss: 0.149979
2013 Tournament: Accuracy: 72.69%, Result loss: 0.172665
2014 Tournament: Accuracy: 70.38%, Result loss: 0.158252
2015 Tournament: Accuracy: 80.38%, Result loss: 0.147871
2016 Tournament: Accuracy: 74.23%, Result loss: 0.168854
2017 Tournament: Accuracy: 75.77%, Result loss: 0.158651
2018 Tournament: Accuracy: 71.15%, Result loss: 0.167886
2019 Tournament: Accuracy: 74.23%, Result loss: 0.144743
2021 Tournament: Accuracy: 69.3

## Dig into 2023 results

In [32]:
loader = DataLoader(gen_dataset(mens_tourney[mens_tourney.Season == 2023]))

x, y = loader.dataset.tensors

preds = model(x.to(device))

In [33]:
mteams = pd.read_csv('data/MTeams.csv').set_index('TeamID')
wteams = pd.read_csv('data/WTeams.csv').set_index('TeamID')
teams = pd.concat([mteams, wteams])

In [34]:
mens_seeds = pd.read_csv('data/MNCAATourneySeeds.csv')
womens_seeds = pd.read_csv('data/WNCAATourneySeeds.csv')
seeds = pd.concat([mens_seeds, womens_seeds]).set_index(['Season', 'TeamID'])


In [35]:
def upset(season, winner, loser):
    winner_seed = seeds.loc[season, winner].Seed
    loser_seed = seeds.loc[season, loser].Seed
    return winner_seed[1:] > loser_seed[1:]

In [36]:
t_2023 = pd.DataFrame({'winner_name': [mteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [mteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual_score': np.array(y.tolist()).reshape([-1]),
                       'actual': np.array((y>0).tolist()).reshape([-1]),
                       'predicted': np.array(preds[1].tolist()).reshape([-1])}).iloc[:67]

In [37]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual_score,actual,predicted
23,F Dickinson,Purdue,1192,1345,5.0,True,0.003308
15,Princeton,Arizona,1343,1112,4.0,True,0.062202
8,Furman,Virginia,1202,1438,1.0,True,0.103393
37,Arkansas,Kansas,1116,1242,1.0,True,0.232148
57,Miami FL,Houston,1274,1222,14.0,True,0.247742
39,Princeton,Missouri,1343,1281,15.0,True,0.266488
58,San Diego St,Alabama,1361,1104,7.0,True,0.28623
50,Michigan St,Marquette,1277,1266,9.0,True,0.311096
53,FL Atlantic,Tennessee,1194,1397,7.0,True,0.385828
62,Miami FL,Texas,1274,1400,7.0,True,0.388916


The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a 0.3% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 6% and 10% chances of happening respectively.

In [38]:
t_2023['Upset'] = [upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [39]:
t_2023[t_2023.Upset].predicted.mean()

np.float64(0.36675658735909156)

On average the upsets had a 37% chance of happening

In [40]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,winner_name,loser_name,winner,loser,actual_score,actual,predicted,Upset
1,TAM C. Christi,SE Missouri St,1394,1369,4.0,True,0.607126,True
49,Miami FL,Indiana,1274,1231,16.0,True,0.550779,True
6,Auburn,Iowa,1120,1234,8.0,True,0.529655,True
0,Pittsburgh,Mississippi St,1338,1280,1.0,True,0.526975,True
60,Connecticut,Gonzaga,1163,1211,28.0,True,0.516828,True


I correctly predicted 5 upsets, though all were closely ranked

In [41]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,winner_name,loser_name,winner,loser,actual_score,actual,predicted,Upset
12,Missouri,Utah St,1281,1429,11.0,True,0.445188,False
28,Kentucky,Providence,1246,1344,8.0,True,0.483224,False
31,Michigan St,USC,1277,1425,10.0,True,0.496195,False


I also incorrectly predicted 3 upsets

Looking at all the tourneys

In [43]:
x, y = tourney_loader.dataset.tensors
preds = model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [teams.loc[programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [teams.loc[programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual_score': np.array(y.tolist()).reshape([-1]),
                           'actual': np.array((y>0).tolist()).reshape([-1]),
                           'predicted': np.array(preds[1].tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual]
tourney_df['Upset'] = [upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [44]:
tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5)].sort_values('predicted', ascending=False)

Unnamed: 0,season,winner_name,loser_name,winner,loser,actual_score,actual,predicted,Upset
1474,2011.0,West Virginia,Houston,3452,3222,6.0,True,0.803739,True
320,2008.0,Mt St Mary's,Coppin St,1291,1164,9.0,True,0.798633,True
128,2005.0,Oakland,Alabama A&M,1324,1105,10.0,True,0.756990,True
514,2011.0,UT San Antonio,Alabama St,1427,1106,9.0,True,0.724515,True
1701,2015.0,DePaul,Minnesota,3177,3278,7.0,True,0.712287,True
...,...,...,...,...,...,...,...,...,...
354,2008.0,Michigan St,Pittsburgh,1277,1338,11.0,True,0.505595,True
1487,2011.0,Georgetown,Maryland,3207,3268,22.0,True,0.505494,True
1612,2013.0,Delaware,North Carolina,3174,3314,9.0,True,0.505023,True
1659,2014.0,Oregon St,MTSU,3333,3292,19.0,True,0.502832,True


In [45]:
tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5)].sort_values('predicted')

Unnamed: 0,season,winner_name,loser_name,winner,loser,actual_score,actual,predicted,Upset
1947,2018.0,Notre Dame,Connecticut,3323,3163,2.0,True,0.110210,False
1988,2019.0,Texas A&M,Marquette,3401,3266,2.0,True,0.178482,False
780,2015.0,Hampton,Manhattan,1214,1264,10.0,True,0.217230,False
1595,2013.0,LSU,WI Green Bay,3261,3453,4.0,True,0.233478,False
2209,2024.0,Presbyterian,Sacred Heart,3342,3357,7.0,True,0.242401,False
...,...,...,...,...,...,...,...,...,...
1279,2023.0,Michigan St,USC,1277,1425,10.0,True,0.496195,False
509,2010.0,Butler,Michigan St,1139,1277,2.0,True,0.496229,False
1631,2013.0,Connecticut,Notre Dame,3163,3323,18.0,True,0.497682,False
4,2003.0,California,NC State,1143,1301,2.0,True,0.497938,False


Overall I predicted 119 upsets correctly, and 79 incorrectly