In [98]:
import numpy as np
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool
output_notebook()

import os
os.chdir('C:\\Users\\kyles\\Documents\\GitHub\\bracketology')
from bracketology import Bracket

Probability of a seed s making it to round r

In [9]:
b2019 = Bracket(2019)

In [10]:
brackets = []
for year in range(1985,2019+1):
    brackets.append(Bracket(year))

In [11]:
n_years = len(brackets)
n_subbrackets = n_years * 4 # there are four regions per bracket
seeds = [s+1 for s in range(16)] # potential seeds, 1-16

In [12]:
bracket = brackets[0]

In [13]:
second_round_seeds = []
third_round_seeds = []
fourth_round_seeds = []
fifth_round_seeds = []
sixth_round_seeds = []
seventh_round_seeds = []

for bracket in brackets:

    second_round_seeds += [team['Seed'] for team in bracket.result['second']]
    third_round_seeds += [team['Seed'] for team in bracket.result['sweet16']]
    fourth_round_seeds += [team['Seed'] for team in bracket.result['elite8']]
    fifth_round_seeds += [team['Seed'] for team in bracket.result['final4']]
    sixth_round_seeds += [team['Seed'] for team in bracket.result['championship']]
    seventh_round_seeds += [bracket.result['winner']['Seed']]

In [14]:
seed_round_probability = {}
for seed in seeds:
    prob_first = 1.0
    prob_second = sum([s==seed for s in second_round_seeds]) / n_subbrackets
    prob_third = sum([s==seed for s in third_round_seeds]) / n_subbrackets
    prob_fourth = sum([s==seed for s in fourth_round_seeds]) / n_subbrackets
    prob_fifth = sum([s==seed for s in fifth_round_seeds]) / n_subbrackets
    prob_sixth = sum([s==seed for s in sixth_round_seeds]) / (n_subbrackets/2)
    prob_seventh = sum([s==seed for s in seventh_round_seeds]) / (n_subbrackets/4)
    
    seed_round_probability[(seed,1)] = round(prob_first,3)
    seed_round_probability[(seed,2)] = round(prob_second,3)
    seed_round_probability[(seed,3)] = round(prob_third,3)
    seed_round_probability[(seed,4)] = round(prob_fourth,3)
    seed_round_probability[(seed,5)] = round(prob_fifth,3)
    seed_round_probability[(seed,6)] = round(prob_sixth,3)
    seed_round_probability[(seed,7)] = round(prob_seventh,3)

In [15]:
probs_df = pd.Series(seed_round_probability).reset_index()
probs_df.columns = ['seed','round','prob']

In [16]:
print(probs_df[(probs_df.seed.isin([1,16])) & (probs_df['round'] == 2)].prob.sum())
print(probs_df[(probs_df.seed.isin([2,15])) & (probs_df['round'] == 2)].prob.sum())
print(probs_df[(probs_df.seed.isin([3,14])) & (probs_df['round'] == 2)].prob.sum())
print(probs_df[(probs_df.seed.isin([4,13])) & (probs_df['round'] == 2)].prob.sum())
print(probs_df[(probs_df.seed.isin([5,12])) & (probs_df['round'] == 2)].prob.sum())
print(probs_df[(probs_df.seed.isin([6,11])) & (probs_df['round'] == 2)].prob.sum())
print(probs_df[(probs_df.seed.isin([7,10])) & (probs_df['round'] == 2)].prob.sum())
print(probs_df[(probs_df.seed.isin([8,9])) & (probs_df['round'] == 2)].prob.sum())

print(probs_df[(probs_df.seed.isin([1,16,8,9])) & (probs_df['round'] == 3)].prob.sum())
print(probs_df[(probs_df.seed.isin([4,13,5,12])) & (probs_df['round'] == 3)].prob.sum())
print(probs_df[(probs_df.seed.isin([3,14,6,11])) & (probs_df['round'] == 3)].prob.sum())
print(probs_df[(probs_df.seed.isin([2,15,7,10])) & (probs_df['round'] == 3)].prob.sum())

print(probs_df[(probs_df.seed.isin([1,16,8,9,4,13,5,12])) & (probs_df['round'] == 4)].prob.sum())
print(probs_df[(probs_df.seed.isin([2,15,7,10,3,14,6,11])) & (probs_df['round'] == 4)].prob.sum())

print(probs_df[(probs_df.seed.isin(seeds)) & (probs_df['round'] == 5)].prob.sum())

1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.999
0.999


In [138]:
probs_pivot = probs_df.pivot(index='round',columns='seed',values='prob')
probs_pivot['round'] = probs_pivot.index
probs_dict = probs_pivot.to_dict('list')
probs_dict = {str(k):probs_dict[k] for k in probs_dict}
probs_dict['round_names'] = ['First','Second','Sweet 16','Elite 8','Final Four','Championship','Winner']
probs_dict['half'] = [0.5] * 7

In [126]:
for seed in probs_dict:
    if seed != 'round':
        x = probs_dict[seed]
    print(seed, probs_dict[seed])

1 [1.0, 0.993, 0.857, 0.693, 0.407, 0.486, 0.629]
2 [1.0, 0.943, 0.636, 0.457, 0.207, 0.186, 0.143]
3 [1.0, 0.85, 0.529, 0.257, 0.121, 0.157, 0.114]
4 [1.0, 0.793, 0.471, 0.15, 0.093, 0.043, 0.029]
5 [1.0, 0.643, 0.336, 0.064, 0.05, 0.043, 0.0]
6 [1.0, 0.629, 0.3, 0.1, 0.021, 0.029, 0.029]
7 [1.0, 0.607, 0.193, 0.071, 0.021, 0.014, 0.029]
8 [1.0, 0.486, 0.093, 0.057, 0.036, 0.043, 0.029]
9 [1.0, 0.514, 0.05, 0.029, 0.007, 0.0, 0.0]
10 [1.0, 0.393, 0.164, 0.057, 0.007, 0.0, 0.0]
11 [1.0, 0.371, 0.157, 0.057, 0.029, 0.0, 0.0]
12 [1.0, 0.357, 0.15, 0.007, 0.0, 0.0, 0.0]
13 [1.0, 0.207, 0.043, 0.0, 0.0, 0.0, 0.0]
14 [1.0, 0.15, 0.014, 0.0, 0.0, 0.0, 0.0]
15 [1.0, 0.057, 0.007, 0.0, 0.0, 0.0, 0.0]
16 [1.0, 0.007, 0.0, 0.0, 0.0, 0.0, 0.0]
round [1, 2, 3, 4, 5, 6, 7]
round_names ['First', 'Second', 'Sweet 16', 'Elite 8', 'Final Four', 'Championship', 'Winner']


In [133]:
TOOLTIPS = []
TOOLTIPS += [('Round', '@round_names')]
TOOLTIPS += [(f'Seed: {i}', f'@{i}') for i in seeds]

In [142]:
#source = ColumnDataSource(pb)
p = figure(plot_width=900, plot_height=700, title="NCAA Probability")
renderers = [p.line(x='round', y='half', color='white', line_dash='dashed', source=probs_dict)]
for seed in seeds:
    p.line(x='round', y=str(seed), source=probs_dict)

hover = \
HoverTool(
    tooltips=TOOLTIPS,
    show_arrow=False, 
    line_policy='nearest',
    renderers=renderers,

    # display a tooltip whenever the cursor is vertically in line with a glyph
    mode='vline'
)
p.add_tools(hover)
show(p)

In [113]:
2/35

0.05714285714285714