In [None]:
import pandas as pd

In [None]:
# the win probabilities outputted from W4_ProbabilityAlterations.ipynb
preds = pd.read_csv('mydata/womens/altered_probabilities.csv')
preds.head()

In [None]:
# File for names of teams
team_names = pd.read_csv('WDataFiles_Stage1/WTeams.csv')[['TeamID', 'TeamName']]
team_names.head()

In [None]:
# Seeds for 2021
seeds = pd.read_csv('WDataFiles_Stage2/NCAATourneySeeds.csv').query('Season == 2021')[['TeamID', 'Seed']]
seeds.head()

In [None]:
# merge seeds and teams
teams = pd.merge(team_names, seeds, on = 'TeamID')
teams.head()

In [None]:
probs = teams.copy()

# probabilities for making each round for each team
probs['Round64'] = 1  # default to 1 for opening round, rest default to 0
probs['Round32'] = 0
probs['Sweet16'] = 0
probs['Elite8'] = 0
probs['Final4'] = 0
probs['Final'] = 0
probs['Champs'] = 0
rounds = ['Round64', 'Round32', 'Sweet16', 'Elite8', 'Final4', 'Final', 'Champs']

In [None]:
# calculate the probabilty of making each round of the tournament

# dictionary of past oppenents for each seed (each team has a unique seed)
past_opponent_seeds = {}

# for each round
for i in range(len(rounds)):
    
    # for each team
    for j in range(len(probs)):
        
        # If opening round and team is in the play in game, or if past opening round
        if (i == 0 and (probs.loc[j, "Seed"].endswith('a') or probs.loc[j, "Seed"].endswith('b'))) or i > 0:
            
            # add team to past oppenent dictionary
            team_seed = probs.loc[j, "Seed"]
            if team_seed not in past_opponent_seeds:
                past_opponent_seeds[team_seed] = [] # initialize list of past opponents
                
            # get team id
            team_id = teams[teams['Seed'] == team_seed]['TeamID'].tolist()[0]
            
            # get game slot for team and round
            slot = slots[(slots['Seed'] == team_seed) & (slots['GameRound'] == i)]['GameSlot'].tolist()[0]
            
            # get the possible opponents for the team in the slot
            other_teams_seeds = slots[(slots['Seed'] != team_seed) & (slots['GameSlot'] == slot)]['Seed'].tolist()
            
            # for each possible opponent
            for s in other_teams_seeds:
                
                # if the opponent hasn't been added to past opponents and the seed exists in this years tournament
                if s not in past_opponent_seeds[team_seed] and s in list(teams['Seed']):
                    past_opponent_seeds[team_seed].append(s)
                    opponent_id = teams[teams['Seed'] == s]['TeamID'].tolist()[0]
                    
                    id_1 = min(team_id, opponent_id) # get lower team ID of the two teams
                    id_2 = max(team_id, opponent_id) # get higher team ID
                    
                    # create game ID
                    game_id = '2021_' + str(id_1) + '_' + str(id_2)
                    
                    # get win probability for that game
                    win_prob = preds[preds['ID'] == game_id]['Pred'].tolist()[0]
                    
                    # reverse the win probability if the team is the higher ID team
                    if team_id == id_2:
                        win_prob = 1 - win_prob
                        
                    # assign the probability of making round i for team j
                    # if opening round, the probability is just the win probability of the play in game
                    if i == 0:
                        probs.loc[j, rounds[i]] = win_prob
                        
                    # if not opening round
                    else:
                        # get the index of the opponent in the round probabilities dataframe
                        opponent_index = probs.index[probs['TeamID'] == opponent_id][0]
                        
                        # probability is the sum over each possible opponent of...
                        # the probability of making the previous round and the opponent making the previous round and winning against the opponent
                        probs.loc[j, rounds[i]] = probs.loc[j, rounds[i]] + probs.loc[j, rounds[i - 1]] * probs.loc[opponent_index, rounds[i - 1]] * win_prob

In [None]:
# print the sum of probabilities for each round, should be equal to the number of teams in each round
for r in rounds:
    print(r + ' sum of probabilties: ' + str(sum(probs[r])))

In [None]:
# print the probabilties
pd.set_option('display.max_rows', 500)
probs.drop(columns = ['TeamID']).sort_values(by = 'Champs', ascending = False).round(3)