# Tournament Simulation Analysis

## Libraries

In [1]:
import math
import pandas as pd

In [7]:
# Library settings.
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 2000)

## Data

### Retrieval

In [8]:
# Get tournament teams.
teams = pd.read_csv('../../../data/tournament_brackets/teams.csv')
# Get simulation outcomes.
outcomes_tournament = teams.merge(pd.read_csv('../../../data/simulations/tournament/agg_outcomes.csv'), on='team')
# Get matchups.
matchups_tournament = pd.read_csv('../../../data/simulations/tournament/agg_matchups.csv')

### Cleanup

In [4]:
# Store static list of column round team counts.
round_teams = [64,32,16,8,4,2]

In [5]:
# Sort by round reached and discard.
outcomes_tournament.sort(['round_reached','team'], ascending=True, inplace=True)
# outcomes_tournament.drop('round_reached', axis=1, inplace=True)
outcomes_tournament.reset_index(drop=True, inplace=True)
# Tranform wins to percentages.
win_cols = [c for c in outcomes_tournament.columns if c.startswith('wins_')]
for c, team_count in zip(win_cols,round_teams):
    outcomes_tournament[c] = 100 * outcomes_tournament[c] / 20000.
    outcomes_tournament.rename(columns={c:'round_'+str(team_count)}, inplace=True)

---

## Analysis

In [6]:
# Print outcomes.
outcomes_tournament

Unnamed: 0,team,seed,round_reached,round_64,round_32,round_16,round_8,round_4,round_2
0,Duke,1,1,97.9,83.35,59.795,35.725,18.375,6.51
1,Wisconsin,1,2,98.04,89.435,75.295,48.53,24.74,16.095
2,Kentucky,1,4,99.605,94.005,87.73,73.665,47.845,34.7
3,Michigan St.,7,4,66.735,18.965,10.325,3.11,0.93,0.22
4,Arizona,2,8,98.775,84.38,68.725,36.685,16.805,10.045
5,Gonzaga,2,8,96.99,76.69,53.815,31.15,15.595,5.4
6,Louisville,4,8,79.85,32.2,7.145,2.27,0.66,0.165
7,Notre Dame,3,8,93.22,67.095,43.015,11.88,3.995,1.77
8,North Carolina,4,16,86.245,60.22,15.095,5.215,1.35,0.44
9,North Carolina St.,8,16,55.96,7.09,2.44,0.525,0.12,0.01


Also print an HTML version for our report:

In [None]:
# Traverse all teams.
for team in outcomes_tournament.values:
    print '<tr><td style="text-align: right"><small style="color:gray;font-size:.625em">('+str(team[1])+')</small> <strong>'+team[0]+'</strong></td>',
    # Traverse rounds.
    for result,team_count in zip(team[3:],round_teams):
        if (team_count > team[2]):
            print '<td style="text-align: center; color: green">',
        else:
            print '<td style="text-align: center; color: red">',
        if result >= 9.5:
            print '%d%%' % round(result),
        elif result >= 0.05:
            print '%.1f%%' % result,
        elif result > 0:
            print '&lt;0.1%',
        else:
            print '0%',
        print '</td>',
    print '</tr>'