In [1]:
import pandas as pd

In [2]:
# the win probabilities outputted from M4_ProbabilityAlterations.ipynb
preds = pd.read_csv('mydata/mens/altered_probabilities.csv')
preds.head()

Unnamed: 0,ID,Pred,TeamID_x,TeamID_y
0,2021_1101_1104,0.194731,1101,1104
1,2021_1101_1116,0.22,1101,1116
2,2021_1101_1124,0.08,1101,1124
3,2021_1101_1140,0.329809,1101,1140
4,2021_1101_1155,0.496278,1101,1155


In [3]:
# File for names of teams
team_names = pd.read_csv('ncaam-march-mania-2021/MTeams.csv')[['TeamID', 'TeamName']]
team_names.head()

Unnamed: 0,TeamID,TeamName
0,1101,Abilene Chr
1,1102,Air Force
2,1103,Akron
3,1104,Alabama
4,1105,Alabama A&M


In [4]:
# Seeds for 2021
seeds = pd.read_csv('ncaam-march-mania-2021/MNCAATourneySeeds.csv').query('Season == 2021')[['TeamID', 'Seed']]
seeds.head()

Unnamed: 0,TeamID,Seed
2286,1276,W01
2287,1104,W02
2288,1400,W03
2289,1199,W04
2290,1160,W05


In [5]:
# merge seeds and teams
teams = pd.merge(team_names, seeds, on = 'TeamID')
teams.head()

Unnamed: 0,TeamID,TeamName,Seed
0,1101,Abilene Chr,W14
1,1104,Alabama,W02
2,1111,Appalachian St,X16a
3,1116,Arkansas,Z03
4,1124,Baylor,Z01


In [6]:
slots = pd.read_csv('ncaam-march-mania-2021/MNCAATourneySeedRoundSlots.csv')
slots.head()

Unnamed: 0,Seed,GameRound,GameSlot,EarlyDayNum,LateDayNum
0,W01,1,R1W1,136,137
1,W01,2,R2W1,138,139
2,W01,3,R3W1,143,144
3,W01,4,R4W1,145,146
4,W01,5,R5WX,152,152


In [7]:
probs = teams.copy()

# probabilities for making each round for each team
probs['Round64'] = 1  # default to 1 for opening round, rest default to 0
probs['Round32'] = 0
probs['Sweet16'] = 0
probs['Elite8'] = 0
probs['Final4'] = 0
probs['Final'] = 0
probs['Champs'] = 0
rounds = ['Round64', 'Round32', 'Sweet16', 'Elite8', 'Final4', 'Final', 'Champs']

In [8]:
# calculate the probabilty of making each round of the tournament

# dictionary of past oppenents for each seed (each team has a unique seed)
past_opponent_seeds = {}

# for each round
for i in range(len(rounds)):
    
    # for each team
    for j in range(len(probs)):
        
        # If opening round and team is in the play in game, or if past opening round
        if (i == 0 and (probs.loc[j, "Seed"].endswith('a') or probs.loc[j, "Seed"].endswith('b'))) or i > 0:
            
            # add team to past oppenent dictionary
            team_seed = probs.loc[j, "Seed"]
            if team_seed not in past_opponent_seeds:
                past_opponent_seeds[team_seed] = [] # initialize list of past opponents
                
            # get team id
            team_id = teams[teams['Seed'] == team_seed]['TeamID'].tolist()[0]
            
            # get game slot for team and round
            slot = slots[(slots['Seed'] == team_seed) & (slots['GameRound'] == i)]['GameSlot'].tolist()[0]
            
            # get the possible opponents for the team in the slot
            other_teams_seeds = slots[(slots['Seed'] != team_seed) & (slots['GameSlot'] == slot)]['Seed'].tolist()
            
            # for each possible opponent
            for s in other_teams_seeds:
                
                # if the opponent hasn't been added to past opponents and the seed exists in this years tournament
                if s not in past_opponent_seeds[team_seed] and s in list(teams['Seed']):
                    past_opponent_seeds[team_seed].append(s)
                    opponent_id = teams[teams['Seed'] == s]['TeamID'].tolist()[0]
                    
                    id_1 = min(team_id, opponent_id) # get lower team ID of the two teams
                    id_2 = max(team_id, opponent_id) # get higher team ID
                    
                    # create game ID
                    game_id = '2021_' + str(id_1) + '_' + str(id_2)
                    
                    # get win probability for that game
                    win_prob = preds[preds['ID'] == game_id]['Pred'].tolist()[0]
                    
                    # reverse the win probability if the team is the higher ID team
                    if team_id == id_2:
                        win_prob = 1 - win_prob
                        
                    # assign the probability of making round i for team j
                    # if opening round, the probability is just the win probability of the play in game
                    if i == 0:
                        probs.loc[j, rounds[i]] = win_prob
                        
                    # if not opening round
                    else:
                        # get the index of the opponent in the round probabilities dataframe
                        opponent_index = probs.index[probs['TeamID'] == opponent_id][0]
                        
                        # probability is the sum over each possible opponent of...
                        # the probability of making the previous round and the opponent making the previous round and winning against the opponent
                        probs.loc[j, rounds[i]] = probs.loc[j, rounds[i]] + probs.loc[j, rounds[i - 1]] * probs.loc[opponent_index, rounds[i - 1]] * win_prob

In [9]:
# print the sum of probabilities for each round, should be equal to the number of teams in each round
for r in rounds:
    print(r + ' sum of probabilties: ' + str(sum(probs[r])))

Round64 sum of probabilties: 64.0
Round32 sum of probabilties: 32.0
Sweet16 sum of probabilties: 16.0
Elite8 sum of probabilties: 7.999999999999998
Final4 sum of probabilties: 4.0
Final sum of probabilties: 2.0
Champs sum of probabilties: 0.9999999999999999


In [10]:
# print the probabilties
pd.set_option('display.max_rows', 500)
probs.drop(columns = ['TeamID']).sort_values(by = 'Final', ascending = False).round(3)

Unnamed: 0,TeamName,Seed,Round64,Round32,Sweet16,Elite8,Final4,Final,Champs
19,Gonzaga,X01,1.0,0.98,0.928,0.805,0.613,0.497,0.301
22,Houston,Y02,1.0,0.951,0.693,0.474,0.365,0.249,0.148
4,Baylor,Z01,1.0,0.98,0.797,0.649,0.475,0.242,0.144
23,Illinois,Y01,1.0,0.98,0.801,0.572,0.238,0.146,0.076
10,Connecticut,W07,1.0,0.739,0.479,0.341,0.224,0.094,0.045
40,Ohio St,Z02,1.0,0.955,0.668,0.392,0.175,0.079,0.03
1,Alabama,W02,1.0,0.922,0.437,0.307,0.18,0.063,0.026
48,San Diego St,Y06,1.0,0.45,0.372,0.176,0.102,0.059,0.03
25,Iowa,X02,1.0,0.92,0.475,0.25,0.099,0.053,0.026
26,Kansas,X03,1.0,0.825,0.446,0.232,0.09,0.048,0.023
