In [None]:
!pip install binarytree
!pip install bracketeer==0.2.0

In [None]:
# https://www.kaggle.com/joseleiva/massey-s-ordinal-s-ordinals

import numpy as np
import pandas as pd

inp = '../input/ncaam-march-mania-2021/MDataFiles_Stage2/'
season_df = pd.read_csv(inp+'MRegularSeasonCompactResults.csv')
tourney_df = pd.read_csv(inp+'MNCAATourneyCompactResults.csv')
ordinals_df = pd.read_csv(inp+'MMasseyOrdinals.csv').rename(columns={'RankingDayNum':'DayNum'})

# Get the last available data from each system previous to the tournament
ordinals_df = ordinals_df.groupby(['SystemName','Season','TeamID']).last().reset_index()

# Add winner's ordinals
games_df = tourney_df.merge(ordinals_df,left_on=['Season','WTeamID'],
                          right_on=['Season','TeamID'])
games_df.head()
# Then add losser's ordinals
games_df = games_df.merge(ordinals_df,left_on=['Season','LTeamID','SystemName'],
                          right_on=['Season','TeamID','SystemName'],
                          suffixes = ['W','L'])

## Add column with 1 if result is correct
games_df = games_df.drop(labels=['TeamIDW','TeamIDL'],axis=1)
games_df['prediction'] = (games_df.OrdinalRankW<games_df.OrdinalRankL).astype(int)
results_by_system = games_df.groupby('SystemName').agg({'prediction':('mean','count')})

games_df['Wrating'] = 100-4*np.log(games_df['OrdinalRankW']+1)-games_df['OrdinalRankW']/22
games_df['Lrating'] = 100-4*np.log(games_df['OrdinalRankL']+1)-games_df['OrdinalRankL']/22
games_df['prob'] = 1/(1+10**((games_df['Lrating']-games_df['Wrating'])/15))
loss_results = games_df[games_df.Season>=2015].groupby('SystemName')['prob'].agg([('loss',lambda p: -np.mean(np.log(p))),('count','count')])

ref_system = 'POM'
ordinals_df['Rating']= 100-4*np.log(ordinals_df['OrdinalRank']+1)-ordinals_df['OrdinalRank']/22
ordinals_df = ordinals_df[ordinals_df.SystemName==ref_system]

# Get submission file
sub_df = pd.read_csv(inp+'MSampleSubmissionStage2.csv')
sub_df['Season'] = sub_df['ID'].map(lambda x: int(x.split('_')[0]))
sub_df['Team1'] = sub_df['ID'].map(lambda x: int(x.split('_')[1]))
sub_df['Team2'] = sub_df['ID'].map(lambda x: int(x.split('_')[2]))
sub_df = sub_df.merge(ordinals_df[['Season','TeamID','Rating']], how='left', left_on = ['Season','Team1'], right_on = ['Season','TeamID'])
sub_df = sub_df.merge(ordinals_df[['Season','TeamID','Rating']], how='left', left_on = ['Season','Team2'], right_on = ['Season','TeamID'], suffixes=['W','L'])
sub_df['Pred'] = 1/(1+10**((sub_df['RatingL']-sub_df['RatingW'])/15))
sub_df[['ID', 'Pred']].to_csv('submission.csv', index=False, float_format='%.5g')


In [None]:
from bracketeer import build_bracket

b = build_bracket(
        outputPath='MNCAA2021.png',
        teamsPath=inp+'MTeams.csv',
        seedsPath=inp+'MNCAATourneySeeds.csv',
        submissionPath='submission.csv',
        slotsPath=inp+'MNCAATourneySlots.csv',
        year=2021
)

![MNCAA2021](MNCAA2021.png "MNCAA2021")