# March Madness 2025

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import os
from sklearn.model_selection import train_test_split
import random

torch.manual_seed(20250222)
random.seed(20250222)

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


## Hypothesis
Each team can be modeled by x hidden features. In each game, these hidden features interact in a nonlinear fashion to determine the outcome of the game

## Preparing the data
Load the data

In [2]:
mens = pd.read_csv('data/MRegularSeasonDetailedResults.csv')
mens['League'] = 'M'
mens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,...,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0,117748.0
mean,2014.146355,70.294986,1288.25451,75.878936,1283.13883,63.888287,0.068689,26.401824,55.760242,7.347445,...,20.15979,12.073403,17.732454,10.46174,21.62565,11.409722,13.888907,6.004739,2.868185,19.30578
std,6.515929,35.772556,105.3475,10.998547,104.795432,10.848767,0.305098,4.680314,7.456374,3.11926,...,6.068136,5.344049,7.081056,4.221039,4.518197,3.724567,4.3827,2.745969,2.01905,4.553353
min,2003.0,0.0,1101.0,34.0,1101.0,20.0,0.0,10.0,26.0,0.0,...,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0
25%,2009.0,40.0,1199.0,68.0,1192.0,57.0,0.0,23.0,51.0,5.0,...,16.0,8.0,13.0,7.0,19.0,9.0,11.0,4.0,1.0,16.0
50%,2014.0,73.0,1287.0,75.0,1282.0,64.0,0.0,26.0,55.0,7.0,...,20.0,12.0,17.0,10.0,21.0,11.0,14.0,6.0,3.0,19.0
75%,2020.0,101.0,1381.0,83.0,1374.0,71.0,0.0,29.0,60.0,9.0,...,24.0,15.0,22.0,13.0,25.0,14.0,17.0,8.0,4.0,22.0
max,2025.0,132.0,1480.0,149.0,1480.0,144.0,6.0,57.0,103.0,26.0,...,59.0,48.0,65.0,36.0,49.0,31.0,41.0,22.0,18.0,45.0


In [3]:
womens = pd.read_csv('data/WRegularSeasonDetailedResults.csv')
womens['League'] = 'W'
womens.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,...,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0,80626.0
mean,2017.404609,69.183626,3285.116823,71.706633,3286.689554,57.242044,0.051708,25.847034,58.966574,6.276077,...,17.918413,10.507392,15.503808,11.36002,22.422122,10.935852,17.150745,7.109977,2.820839,18.19299
std,4.582659,36.157922,104.073477,11.536993,105.505327,10.960867,0.259072,4.978157,7.969144,3.127369,...,6.456006,4.936838,6.630184,4.640191,4.936106,3.805935,5.27718,3.1923,2.062848,4.556919
min,2010.0,0.0,3101.0,30.0,3101.0,11.0,0.0,9.0,30.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,3.0
25%,2013.0,36.0,3196.0,64.0,3195.0,50.0,0.0,22.0,53.0,4.0,...,13.0,7.0,11.0,8.0,19.0,8.0,13.0,5.0,1.0,15.0
50%,2017.0,73.0,3283.0,71.0,3287.0,57.0,0.0,25.0,59.0,6.0,...,17.0,10.0,15.0,11.0,22.0,11.0,17.0,7.0,2.0,18.0
75%,2022.0,101.0,3376.0,79.0,3377.0,64.0,0.0,29.0,64.0,8.0,...,22.0,14.0,20.0,14.0,26.0,13.0,20.0,9.0,4.0,21.0
max,2025.0,132.0,3480.0,140.0,3480.0,130.0,5.0,58.0,113.0,30.0,...,80.0,37.0,52.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


The IDs are definitely distinct so we can combine into a single dataframe

In [4]:
data = pd.concat([mens, womens])

Get the distinct team/Years

In [5]:
teams = pd.concat([data[['WTeamID', 'Season', 'League']].rename(columns={'WTeamID': 'TeamID'}),
                   data[['LTeamID', 'Season', 'League']].rename(columns={'LTeamID': 'TeamID'})]).drop_duplicates().reset_index()

teamMapping = {(x.TeamID, x.Season): x.Index for x in teams.itertuples()}

And get the distinct programs

In [6]:
programs = teams.TeamID.drop_duplicates().reset_index()

programMapping = {x.TeamID: x.Index for x in programs.itertuples()}

Define the training data. The x's will be the indexes of two team IDs and program IDs, the y's will be the score difference.

In [7]:
data.describe()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WFGM,WFGA,WFGM3,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
count,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,...,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0,198374.0
mean,2015.470621,69.843291,2099.847868,74.183169,2097.450588,61.187026,0.061787,26.176339,57.063405,6.912005,...,19.248818,11.436922,16.826656,10.826832,21.949363,11.217125,15.21463,6.453946,2.848942,18.853504
std,6.024751,35.933736,986.382716,11.406085,989.676138,11.373007,0.287403,4.811306,7.828931,3.16658,...,6.325219,5.239163,6.987616,4.418293,4.708807,3.765042,5.028571,2.985335,2.037092,4.587468
min,2003.0,0.0,1101.0,30.0,1101.0,11.0,0.0,9.0,26.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0
25%,2011.0,37.0,1260.0,66.0,1253.0,53.0,0.0,23.0,52.0,5.0,...,15.0,8.0,12.0,8.0,19.0,9.0,12.0,4.0,1.0,16.0
50%,2016.0,73.0,1413.0,74.0,1407.0,61.0,0.0,26.0,57.0,7.0,...,19.0,11.0,16.0,10.0,22.0,11.0,15.0,6.0,3.0,19.0
75%,2020.0,101.0,3244.0,81.0,3245.0,69.0,0.0,29.0,62.0,9.0,...,23.0,15.0,21.0,14.0,25.0,14.0,18.0,8.0,4.0,22.0
max,2025.0,132.0,3480.0,149.0,3480.0,144.0,6.0,58.0,113.0,30.0,...,80.0,48.0,65.0,38.0,53.0,34.0,49.0,26.0,21.0,47.0


In [8]:
train_df, test_df = train_test_split(data, train_size=0.9)

In [9]:
data.columns

Index(['Season', 'DayNum', 'WTeamID', 'WScore', 'LTeamID', 'LScore', 'WLoc',
       'NumOT', 'WFGM', 'WFGA', 'WFGM3', 'WFGA3', 'WFTM', 'WFTA', 'WOR', 'WDR',
       'WAst', 'WTO', 'WStl', 'WBlk', 'WPF', 'LFGM', 'LFGA', 'LFGM3', 'LFGA3',
       'LFTM', 'LFTA', 'LOR', 'LDR', 'LAst', 'LTO', 'LStl', 'LBlk', 'LPF',
       'League'],
      dtype='object')

In [10]:
np.concatenate([np.ones((len(data),1)), data[['WScore', 'LScore', 'WFGM', 'WFGA']]], axis=1)

array([[ 1., 68., 62., 27., 58.],
       [ 1., 70., 63., 26., 62.],
       [ 1., 73., 61., 24., 58.],
       ...,
       [ 1., 72., 39., 30., 63.],
       [ 1., 70., 52., 25., 59.],
       [ 1., 73., 52., 24., 57.]])

In [11]:
stats_columns = ['Score', 'FGM', 'FGA', 'FGM3', 'FGA3', 'FTM', 'FTA',
                 'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF']

In [12]:
def gen_dataset(data):
    w_stats_columns = [f"W{stat}" for stat in stats_columns]
    l_stats_columns = [f"L{stat}" for stat in stats_columns]
    n = len(data)
    winning_team = data.apply(lambda x: teamMapping[(x.WTeamID, x.Season)], axis=1)
    losing_team = data.apply(lambda x: teamMapping[(x.LTeamID, x.Season)], axis=1)
    winning_program = data.apply(lambda x: programMapping[x.WTeamID], axis=1)
    losing_program = data.apply(lambda x: programMapping[x.LTeamID], axis=1)
    winning_matchups = np.stack([winning_program, winning_team,
                                 losing_program, losing_team,
                                 data.Season, data.DayNum, data.League == 'M'], axis=1)
    losing_matchups = np.stack([losing_program, losing_team,
                                winning_program, winning_team, 
                                data.Season, data.DayNum, data.League == 'M'], axis=1)
    winner_y = np.concatenate([np.ones((n, 1)), data[w_stats_columns], data[l_stats_columns]], axis=1)
    loser_y = np.concatenate([np.zeros((n, 1)), data[l_stats_columns], data[w_stats_columns]], axis=1)
    x_tensor = torch.from_numpy(np.concatenate([winning_matchups, losing_matchups])).double()
    y_tensor = torch.from_numpy(np.concatenate([winner_y, loser_y])).double()
    return TensorDataset(x_tensor, y_tensor)

In [13]:
fname = 'train_dataset.pt'
if os.path.isfile(fname):
    train_data=torch.load(fname, weights_only=False)
else:
    train_data = gen_dataset(train_df)
    torch.save(train_data, fname)

fname = 'validation_dataset.pt'  
if os.path.isfile(fname):
    validation_data=torch.load(fname, weights_only=False)
else:
    validation_data = gen_dataset(test_df)
    torch.save(validation_data, fname)

In [14]:
batch_size=500

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_data, batch_size=batch_size, shuffle=True)

## The Model
Define the model. Combine the embeddings for the two teams, go to a hidden layer, and then output to a prediction if the first team won

In [15]:
class Model(nn.Module):
    def __init__(self, embedding_sizes, model_sizes, dropout):
        super(Model, self).__init__()
        p_embedding_size, t_embedding_size = embedding_sizes
        hid1, hid2 = model_sizes
        self.team_embedding = nn.Embedding(len(teams), p_embedding_size)
        self.program_embedding = nn.Embedding(len(programs), t_embedding_size)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
        self.fc1 = nn.Linear(2*p_embedding_size+2*t_embedding_size+3, hid1)
        self.fc2 = nn.Linear(hid1, hid2)
        self.stats_fc = nn.Linear(hid2, 2*len(stats_columns))
        self.result_fc = nn.Linear(hid2, 1)
        self.double()

    def forward(self, x):
        program = self.program_embedding(x[:,0].int())
        team = self.team_embedding(x[:,1].int())
        opponent_program = self.program_embedding(x[:,2].int())
        opponent = self.team_embedding(x[:,3].int())
        matchup = self.dropout1(torch.cat([program, team, opponent_program, opponent, x[:,4:]], axis=1))
        hidden1 = self.dropout2(F.relu(self.fc1(matchup)))
        hidden2 = self.dropout3(F.relu(self.fc2(hidden1)))
        stats = self.stats_fc(hidden2)
        result = F.sigmoid(self.result_fc(hidden2))
        return result, stats
        

In [16]:
model = Model(embedding_sizes=[128, 512], model_sizes=(128,128), dropout=0.1).to(device)

## Training the model

Define the training function

In [17]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train(data, model, loss_fn, optimizer, full_loss=True):
    size = len(data.dataset)
    model.train()
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        pred_result, pred_stats = model(x)
        actual_result = y[:,0].double().reshape((-1,1))
        actual_stats = y[:,1:].double()
        result_loss = loss_fn(pred_result, actual_result)
        stats_loss = loss_fn(pred_stats, actual_stats)
        if full_loss:
            (stats_loss + result_loss).backward()
        else:
            result_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            result_loss, current = result_loss.item(), (batch + 1) * len(x)
            print(f"result loss: {result_loss:>7f} [{current:>6d}/{size:>6d}]", end="\r")

Define the testing function

In [18]:
def test(data, model, loss_fn, label="Test"):
    size = len(data.dataset)
    num_batches = len(data)
    model.eval()
    stats_loss, result_loss, correct = 0, 0, 0
    with torch.no_grad():
        for x, y in data:
            x = x.to(device)
            y = y.to(device)
            pred_result, pred_stats = model(x)
            actual_result = y[:,0].double().reshape((-1,1))
            actual_stats = y[:,1:].double()
            result_loss += loss_fn(pred_result, actual_result).item()*len(x)
            stats_loss += loss_fn(pred_stats, actual_stats).item()*len(x)
            correct += ((pred_result >= 0.5) == (actual_result == 1)).type(torch.float).sum().item()
    stats_loss /= size
    result_loss /= size
    correct /= size
    print(f"{label}: Accuracy: {(100*correct):>0.2f}%, Stats loss: {stats_loss:>8f} Result loss: {result_loss:>8f}")

Train the model

In [None]:
n_epochs = 15
for i in range(n_epochs):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

Epoch 0
Train: Accuracy: 57.28%, Stats loss: 50.817376 Result loss: 0.245155
Validation: Accuracy: 56.83%, Stats loss: 51.028760 Result loss: 0.245493
Epoch 1
Train: Accuracy: 64.71%, Stats loss: 51.085264 Result loss: 0.214809
Validation: Accuracy: 64.13%, Stats loss: 51.782098 Result loss: 0.217136
Epoch 2
Train: Accuracy: 67.76%, Stats loss: 56.132609 Result loss: 0.201794
Validation: Accuracy: 67.09%, Stats loss: 57.045771 Result loss: 0.204975
Epoch 3
Train: Accuracy: 70.01%, Stats loss: 47.534107 Result loss: 0.191630
Validation: Accuracy: 68.51%, Stats loss: 48.556479 Result loss: 0.197535
Epoch 4
Train: Accuracy: 72.16%, Stats loss: 50.967282 Result loss: 0.182302
Validation: Accuracy: 70.05%, Stats loss: 52.137161 Result loss: 0.190671
Epoch 5
Train: Accuracy: 73.52%, Stats loss: 52.363307 Result loss: 0.173118
Validation: Accuracy: 71.23%, Stats loss: 53.605065 Result loss: 0.184822
Epoch 6
Train: Accuracy: 74.76%, Stats loss: 45.812208 Result loss: 0.167530
Validation: Accur

Fine tune with only the result

In [None]:
for i in range(4):
    print(f"Epoch {i}")
    train(train_loader, model, loss_fn, optimizer, full_loss=False)
    test(train_loader, model, loss_fn, label="Train")
    test(validation_loader, model, loss_fn, label="Validation")

With this model we can predict the output of about three quarters of regular season games.

## Load the tourney data to test with

In [None]:
mens_tourney = pd.read_csv('data/MNCAATourneyDetailedResults.csv')
mens_tourney['League'] = 'M'
womens_tourney = pd.read_csv('data/WNCAATourneyDetailedResults.csv')
womens_tourney['League'] = 'W'
tourney = pd.concat([mens_tourney, womens_tourney])

tourney_dataset = gen_dataset(tourney)
tourney_loader = DataLoader(tourney_dataset, batch_size=batch_size, shuffle=True)

In [None]:
test(tourney_loader, model, loss_fn, label="Tourney")

When it comes to tournament results we get a little worse. The lower result is likely due to teams having increased pairity.

Train with early torney data

In [None]:
tourney_df = tourney[tourney.Season < 2021]

tourney_train_df, tourney_validation_df = train_test_split(tourney_df, train_size=0.8)
tourney_train_data = gen_dataset(tourney_train_df)
tourney_validation_data = gen_dataset(tourney_validation_df)

tourney_train_loader = DataLoader(tourney_train_data, batch_size=batch_size)
tourney_validation_loader = DataLoader(tourney_validation_data, batch_size=batch_size)

In [None]:
for param in model.team_embedding.parameters():
    param.requires_grad=False
for param in model.program_embedding.parameters():
    param.requires_grad=False

In [None]:
for i in range(10):
    print(f"Epoch {i}")
    train(tourney_train_loader, model, loss_fn, optimizer, full_loss=False)
    test(tourney_train_loader, model, loss_fn, label="Train")
    test(tourney_validation_loader, model, loss_fn, label="Validation")

### Performance by year


In [None]:
for season in tourney.Season.unique():
    loader = DataLoader(gen_dataset(tourney[tourney.Season == season]), batch_size=batch_size)
    test(loader, model, loss_fn, label=f"{season} Tournament")

In [None]:
stage1_loader = DataLoader(gen_dataset(tourney[tourney.Season >= 2021]), batch_size=batch_size, shuffle=True)
test(stage1_loader, model, loss_fn, label=f"Stage 1")

Breaking out by league

In [None]:
for season in tourney.Season.unique():
    for league in tourney[tourney.Season == season].League.unique():
        loader = DataLoader(gen_dataset(tourney[(tourney.Season == season) & (tourney.League == league)]),
                            batch_size=batch_size)
        test(loader, model, loss_fn, label=f"{season} {league} Tournament")

## Inspect the model
First what are the sizes of the smallest input and output weights

In [None]:
print(f"Program embedding min: {model.program_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"Team embedding min: {model.team_embedding.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")
print(f"FC min: {model.result_fc.state_dict()['weight'].abs().max(axis=0).values.min().item():>8f}")

Calculate the average gradient for each input feature

In [None]:
def feature_eval(model, data):
    model.eval()
    team_grads = torch.zeros(model.team_embedding.embedding_dim).to(device)
    program_grads = torch.zeros(model.program_embedding.embedding_dim).to(device)
    stats_grads = torch.zeros(3).to(device)
    size = len(data.dataset)
    for batch, (x, y) in enumerate(data):
        x = x.to(device)
        y = y.to(device)
        x.requires_grad = True
        _, pred_result = model(x)
        team_grads += torch.autograd.grad(model(x)[1].mean(), model.team_embedding.parameters())[0].sum(axis=0)
        program_grads += torch.autograd.grad(model(x)[1].mean(), model.program_embedding.parameters())[0].sum(axis=0)
        stats_grads += torch.autograd.grad(model(x)[1].mean(), x)[0].sum(axis=0)[4:]
    return program_grads/size, team_grads/size, stats_grads

In [None]:
for param in model.team_embedding.parameters():
    param.requires_grad=True
for param in model.program_embedding.parameters():
    param.requires_grad=True

In [None]:
program_weights, team_weights, stats_weights = feature_eval(model, tourney_loader)

In [None]:
program_weights.abs().sum().item(), team_weights.abs().sum().item()

In [None]:
print(f"Year:\t{stats_weights[0]:>4f}")
print(f"Game:\t{stats_weights[1]:>4f}")
print(f"League:\t{stats_weights[2]:>4f}")

## Generating the submission file
### Phase 2

Write the results

In [None]:
with open('submission.csv', 'w') as f:
    f.write("ID,Pred\n")
    season=2025
    for league in ('M', 'W'):
        teams_to_test = sorted(teams[(teams.Season==season) & (teams.League==league)].TeamID.values)
        matchups = [(t1, t2) for t1 in teams_to_test for t2 in teams_to_test if t1 < t2]
        matchups_tensor = torch.Tensor(np.array(
            [[programMapping[t1], teamMapping[(t1, season)],
              programMapping[t2], teamMapping[(t2, season)],
              season, 140, league == 'M']
             for (t1, t2) in matchups])).int().to(device)
        predictions, _ = model(matchups_tensor)
        for (t1, t2), pred in zip(matchups, predictions):
            f.write(f"{season}_{t1.item()}_{t2.item()},{pred.item()}\n")

## Save the model

In [None]:
torch.save(model.state_dict(), 'model.pth')

## Moderated model

Moderate a model by pushing it towards 0.5

In [None]:
class ModeratedModel:
    def __init__(self, model, weight):
        self.model = model
        self.weight = weight

    def eval(self):
        pass

    def __call__(self, x):
        scores, model_score = self.model(x)
        neutral = torch.Tensor(np.array([0.5]*len(model_score)).reshape((-1,1))).to(device)
        return scores, model_score * self.weight + neutral * (1-self.weight)


In [None]:
moderated = ModeratedModel(model, 0.75)

In [None]:
for season in tourney.Season.unique():
    loader = DataLoader(gen_dataset(tourney[tourney.Season == season]), batch_size=batch_size)
    test(loader, moderated, loss_fn, label=f"{season} Tournament")

## Dig into 2023 results

In [None]:
loader = DataLoader(gen_dataset(mens_tourney[mens_tourney.Season == 2023]))

x, y = loader.dataset.tensors

preds = model(x.to(device))

In [None]:
mteams = pd.read_csv('data/MTeams.csv').set_index('TeamID')
wteams = pd.read_csv('data/WTeams.csv').set_index('TeamID')
allteams = pd.concat([mteams, wteams])

In [None]:
mens_seeds = pd.read_csv('data/MNCAATourneySeeds.csv')
womens_seeds = pd.read_csv('data/WNCAATourneySeeds.csv')
seeds = pd.concat([mens_seeds, womens_seeds]).set_index(['Season', 'TeamID'])

In [None]:
def upset(season, winner, loser):
    winner_seed = seeds.loc[season, winner].Seed
    loser_seed = seeds.loc[season, loser].Seed
    return winner_seed[1:3] > loser_seed[1:3]

In [None]:
t_2023 = pd.DataFrame({'winner_name': [mteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                       'loser_name': [mteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                       'winner': [programs.loc[i].TeamID for i in x[:,0].tolist()],
                       'loser': [programs.loc[i].TeamID for i in x[:,2].tolist()],
                       'actual': y[:,0].reshape([-1]),
                       'predicted': np.array(preds[0].tolist()).reshape([-1])}).iloc[:67]

In [None]:
t_2023[t_2023.predicted < 0.5].sort_values('predicted')

The biggest thing in this season were the huge upsets in the first round. Purdue was a number one seed and lost which I only gave a .4% chance to happen. Arizona and Virginia were number 2 seeds and lost which I gave 7% and 15% chances of happening respectively.

In [None]:
t_2023['Upset'] = [upset(2023, winner, loser) for (winner, loser) in zip(t_2023['winner'], t_2023['loser'])]

In [None]:
t_2023[t_2023.Upset].predicted.mean()

On average the upsets had a 32% chance of happening

In [None]:
t_2023[t_2023.Upset & (t_2023.predicted >= 0.5)].sort_values('predicted', ascending=False)

I correctly predicted 2 upsets, though all were closely ranked

In [None]:
t_2023[~t_2023.Upset & (t_2023.predicted < 0.5)].sort_values('predicted')

I also incorrectly predicted 4 upsets

Looking at all the tourneys

In [None]:
x, y = tourney_loader.dataset.tensors
preds = model(x.to(device))
tourney_df = pd.DataFrame({'season': x[:,4].tolist(),
                           'winner_name': [allteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,0].tolist()],
                           'loser_name': [allteams.loc[programs.loc[i].TeamID].TeamName for i in x[:,2].tolist()],
                           'winner': [programs.loc[i].TeamID for i in x[:,0].tolist()],
                           'loser': [programs.loc[i].TeamID for i in x[:,2].tolist()],
                           'actual': y[:,0].reshape([-1]),
                           'predicted': np.array(preds[0].tolist()).reshape([-1])})
tourney_df = tourney_df[tourney_df.actual == 1.0]
tourney_df['Upset'] = [upset(season, winner, loser) for (winner, loser, season)
                       in zip(tourney_df['winner'], tourney_df['loser'], tourney_df['season'])]

In [None]:
len(tourney_df[tourney_df.Upset & (tourney_df.predicted >= 0.5) & (tourney_df.season > 2020)].sort_values('predicted', ascending=False))

In [None]:
len(tourney_df[~tourney_df.Upset & (tourney_df.predicted < 0.5) & (tourney_df.season > 2020)].sort_values('predicted'))

Overall I predicted 19 upsets correctly, and 30 incorrectly

## Predicting by seeds
What if I predict just using the seeds?

In [None]:
seeded_tourney = tourney.join(seeds, on=['Season', 'WTeamID']).join(seeds, on=['Season', 'LTeamID'], rsuffix='L')

In [None]:
tourney['WSeed'] = seeded_tourney.Seed.map(lambda x: int(x[1:3]))
tourney['LSeed'] = seeded_tourney.SeedL.map(lambda x: int(x[1:3]))
tourney['SeedDiff'] = tourney.WSeed - tourney.LSeed

In [None]:
seed_diff_counts = tourney[tourney.Season < 2021].SeedDiff.value_counts()

In [None]:
odds = {0: 0.5}
for diff in range(1, 16):
    if diff in seed_diff_counts:
        lower_wins = seed_diff_counts[diff]
        higher_wins = seed_diff_counts[-diff]
        odds[diff] = higher_wins/(higher_wins + lower_wins)
        odds[-diff] = lower_wins/(higher_wins + lower_wins)
    else:
        odds[diff] = 1
        odds[-diff] = 0
        

In [None]:
tourney[tourney.Season >= 2021].SeedDiff.map(lambda x: odds[x]**2).mean()

This results in a test Brier score of about 0.185.

## Hybrid Model
Building a model using the neural net and seeds

In [None]:
class HybridModel(object):
    def __init__(self, models, weights):
        self.models = models
        self.weights = weights

    def eval(self):
        pass

    def __call__(self, x):
        results = torch.zeros(len(x)).reshape([-1,1]).to(device)
        stats = torch.zeros((len(x), len(stats_columns*2))).to(device)
        for model, weight in zip(self.models, self.weights):
            result, stats = model(x)
            results += weight * result
            stats += weight * stats
        return results, stats

In [None]:
class SeedModel(object):
    def __init__(self, odds, seeds):
        self.odds = odds
        self.seeds = seeds

    def eval(self):
        pass

    def seed(self, season, team):
        if (season, team) in self.seeds.index:
            return int(self.seeds.loc[season, team].Seed[1:3])
        else:
            return -1

    def win_odds(self, team1, team2):
        if team1 == -1:
            if team2 == -1:
                return 0.5
            return 0
        if team2 == -1:
            return 1
        return self.odds[team1-team2]
        
    
    def __call__(self, x):
        team_1 = programs.loc[x[:,0].int().cpu()].TeamID
        team_2 = programs.loc[x[:,2].int().cpu()].TeamID
        season = x[:,4].int().cpu()
        team_1_seed = [self.seed(s, t) for s,t in np.stack([season, team_1], axis=1)]
        team_2_seed = [self.seed(s, t) for s,t in np.stack([season, team_2], axis=1)]
        stats = torch.zeros((len(x), len(stats_columns*2))).to(device)
        results = torch.Tensor([self.win_odds(t1, t2) for t2, t1 in
                                zip(team_1_seed, team_2_seed)]).to(device).reshape([-1,1])
        return results, stats
        

In [None]:
seed_model = SeedModel(odds, seeds)
test(stage1_loader, seed_model, loss_fn, label=f"Seeds")

In [None]:
test(stage1_loader, model, loss_fn, label="NN")

In [None]:
hybrid = HybridModel([model, seed_model], [0.8, 0.2])

In [None]:
test(stage1_loader, hybrid, loss_fn, label=f"Hybrid")

They hybrid model outperforms both individual models

In [None]:
for season in range(2021, 2025):
    for league in ['M', 'W']:
        loader = DataLoader(gen_dataset(tourney[(tourney.Season == season) & (tourney.League == league)]),
                            batch_size=batch_size)
        test(loader, hybrid, loss_fn, label=f"{season} {league} Tournament")

## Generate a bracket

In [None]:
mens_slots = pd.read_csv('data/MNCAATourneySlots.csv').set_index(['Season', 'Slot'])
womens_slots = pd.read_csv('data/WNCAATourneySlots.csv').set_index(['Season', 'Slot'])
mens_tourney_seeds = mens_seeds.set_index(['Season', 'Seed'])
womens_tourney_seeds = womens_seeds.set_index(['Season', 'Seed'])

In [None]:
mens_schedule = mens_slots.\
    join(mens_tourney_seeds, on=['Season', 'StrongSeed']).\
    join(mens_tourney_seeds, on=['Season', 'WeakSeed'], rsuffix='2')
womens_schedule = mens_slots.\
    join(womens_tourney_seeds, on=['Season', 'StrongSeed']).\
    join(womens_tourney_seeds, on=['Season', 'WeakSeed'], rsuffix='2')

In [None]:
def model_odds(season, league, model):
    teams_to_test = sorted(teams[(teams.Season==season) & (teams.League==league)].TeamID.values)
    matchups = [(t1, t2) for t1 in teams_to_test for t2 in teams_to_test if t1 != t2]
    matchups_tensor = torch.Tensor(np.array(
        [[programMapping[t1], teamMapping[(t1, season)],
          programMapping[t2], teamMapping[(t2, season)],
          season, 140, league == 'M']
         for (t1, t2) in matchups])).int().to(device)
    predictions, _ = model(matchups_tensor)
    return {(int(t1), int(t2)): pred.item() for  (t1, t2), pred in zip(matchups, predictions)}

In [None]:
def gen_bracket(schedule, odds):
    schedule = schedule.copy()
    schedule.insert(len(schedule.columns), 'Winner', -1)
    schedule.insert(len(schedule.columns), 'P', -1.0)
    i=0
    while sum(schedule.Winner < 0) and i <= 10:
        i+=1
        games = schedule[(schedule.Winner < 0) & schedule.TeamID.notna() & schedule.TeamID2.notna()][['TeamID', 'TeamID2']]
        for slot, t1, t2 in games.itertuples():
            p = odds[(t1, t2)]
            schedule.loc[slot, 'P'] = p
            winner = t1 if p > 0.5 else t2
            schedule.loc[slot, 'Winner'] = winner
            schedule.loc[schedule.StrongSeed == slot, 'TeamID'] = winner
            schedule.loc[schedule.WeakSeed == slot, 'TeamID2'] = winner
    return schedule

In [None]:
m_odds = model_odds(2024, 'M', hybrid)

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(gen_bracket(mens_schedule.loc[2024,:], m_odds).join(allteams, on='Winner')[['Winner', 'TeamName']])