# Overview
* We seek to find the most likely winner of a selected playoff bracket given historical data.
* If we assume that teams are the primary driver, and that games between teams are bernoulli trials, then we can simulate the entire bracket with a Monte Carlo simulation
* This entails defining the individual probablities of a win given the two teams involved
    * Practically this means creating a matrix s.t.:
    
    $P_{ij} = P(i,j) =$ probability of team i beating team j
    
    Note this naturally leads to $P_{ij} = 1 - P_{ji}$
    
    
* So given the data we gathered previously how can we calculate such a matrix? and how do we setup such a montecarlo simulation.

In [None]:
import numpy as np
import pandas as pd
import json
from scipy import stats

from bracket_utils import simulate,trial
from generate_probabilities import gen_prob

In [None]:
def define_seasons(years, playoff_years):
    seasons = []
    for year in years:
        seasons.append(pd.read_csv(f'data/{year}/regular.csv'))
        
    for year in playoff_years:
        seasons.append(pd.read_csv(f'data/{year}/playoffs.csv'))
        
    return seasons

In [None]:
prediction_year = 2018

# only include four years of data

reg_included = list(range(prediction_year-3,prediction_year+1))

poffs_included = [] #[2014,2015,2016,2017,2018]

assert prediction_year not in poffs_included, "Included playoff data from prediction year!"

seasons = define_seasons(reg_included,poffs_included)

with open(f'data/{prediction_year}/bracket.json','r') as f:
    bracket = json.load(f)

In [None]:
pmat = gen_prob(bracket,seasons)

num_trials = 100000

np.random.seed(0)

_,winners,_ = simulate(num_trials,pmat)

In [None]:
indicies, counts = np.unique(winners, return_counts=True)

results = [(c,bracket['first_round'][i]) for c,i in sorted(zip(counts,indicies),reverse = True)]
print('{:-^60}'.format('WINNER PREDICTION'))
print('{team:32s}{prob:8s}{cumul:8s}'.format(team='team',prob='prob',cumul='cumsum'))

cumulative = 0
for count,team in results:
    prob = count/num_trials
    cumulative += prob
    
    actual = ''
    if bracket['winner'] and team == bracket['winner'][0]:
        actual = 'ACTUAL WINNER'
    
    print('{team:30s}{prob:8.4f}{cumul:8.4f}  {actual:15s}'.format(team=team,prob=prob,cumul=cumulative,actual=actual))