# Tournament Simulator
#This script will simulate how my models perform given multiple simulations through a tournement bracket

In [1]:
# Import packages
import pandas as pd
import numpy as np
import os, sys
import pickle

#Import other files
raw_path = os.path.join('..', 'data', 'raw')
proc_path = os.path.join('..', 'data', 'processed')
sys.path.append(os.path.join('..', 'src'))

In [2]:
# Makes dataset of actual outcomes from tournament

In [2]:
# Reads in data
seeds = pd.read_csv(os.path.join(raw_path, 'TourneySeeds.csv'))
slots = pd.read_csv(os.path.join(raw_path, 'TourneySlots.csv'))
results = pd.read_csv(os.path.join(raw_path, 'TourneyCompactResults.csv'))

#Adds round info to data
slots['Round'] = ['R0' if x[0:1] in ['W', 'X', 'Y', 'Z'] else x[0:2] for x in slots['Slot']]
slots = slots[slots['Season']>2003]

In [3]:
#Method for creating round 0 games
def create_round_0(slots, seeds):
    #Adds Round 0 winners
    slots_0 = slots[slots['Round']=='R0']

    #joins slots with team names
    slots_teams_0 = slots_0.merge(seeds, left_on=['Strongseed', 'Season'], right_on=['Seed', 'Season'], how='left')
    slots_teams_0 = slots_teams_0.rename(index=str, columns={'Team': 'StrongTeam'}).drop('Seed', 1)

    slots_teams_0 = slots_teams_0.merge(seeds, left_on=['Weakseed', 'Season'], right_on=['Seed', 'Season'], how='left')
    slots_teams_0 = slots_teams_0.rename(index=str, columns={'Team': 'WeakTeam'}).drop('Seed', 1)

    #Adds matchup field to slots and results
    slots_teams_0['matchup'] = [str(a)+'_'+str(b) if a<b else str(b)+'_'+str(a) for a,b in zip(slots_teams_0['StrongTeam'], slots_teams_0['WeakTeam'])]
    results['matchup'] = [str(a)+'_'+str(b) if a<b else str(b)+'_'+str(a) for a,b in zip(results['Wteam'], results['Lteam'])]

    #Adds results to round 0 games
    slots_teams_0 = slots_teams_0.merge(results, on=['Season', 'matchup'])
    return slots_teams_0

In [4]:
#Method for creating SS for a round
def create_round_ss(rnd, slots_data, prev_results):
    slots_r = slots_data[slots_data['Round']==('R'+str(rnd))]

    #Adds winners from previous round to this round
    slots_r = slots_r.merge(prev_results[['Season', 'Slot', 'Wteam']], left_on=['Strongseed', 'Season'], right_on=['Slot', 'Season'], how='left')
    slots_r = slots_r.rename(index=str, columns={'Wteam':'StrongTeam_x'})
    slots_r = slots_r.merge(prev_results[['Season', 'Slot', 'Wteam']], left_on=['Weakseed', 'Season'], right_on=['Slot', 'Season'], how='left')
    slots_r = slots_r.drop(['Slot', 'Slot_y'], 1).rename(index=str, columns={'Wteam':'WeakTeam_x', 'Slot_x':'Slot'})

    #joins slots with team names
    slots_teams_r = slots_r.merge(seeds, left_on=['Strongseed', 'Season'], right_on=['Seed', 'Season'], how='left')
    slots_teams_r = slots_teams_r.rename(index=str, columns={'Team': 'StrongTeam'}).drop('Seed', 1)

    slots_teams_r = slots_teams_r.merge(seeds, left_on=['Weakseed', 'Season'], right_on=['Seed', 'Season'], how='left')
    slots_teams_r = slots_teams_r.rename(index=str, columns={'Team': 'WeakTeam'}).drop('Seed', 1)

    #combines Strongteam and Weakteam columns
    slots_teams_r['StrongTeam'] = slots_teams_r['StrongTeam'].fillna(slots_teams_r['StrongTeam_x'])
    slots_teams_r['WeakTeam'] = slots_teams_r['WeakTeam'].fillna(slots_teams_r['WeakTeam_x'])
    slots_teams_r['StrongTeam'] = slots_teams_r['StrongTeam'].astype('int')
    slots_teams_r['WeakTeam'] = slots_teams_r['WeakTeam'].astype('int')
    slots_teams_r = slots_teams_r.drop(['StrongTeam_x', 'WeakTeam_x'], 1)

    #Adds matchup column
    slots_teams_r['matchup'] = [str(a)+'_'+str(b) if a<b else str(b)+'_'+str(a) for a,b in zip(slots_teams_r['StrongTeam'], slots_teams_r['WeakTeam'])]
    
    return slots_teams_r

In [5]:
# slots_teams_1 = create_round_ss(slots, slots_teams_0)

In [6]:
#Score round
#Function get a given SS and scores it using real data (may need round info, but matchup should suffice)
#Also, allow a custom scorer to look at SS to determine winner

def score_round(round_data, results, pred=False, model=None):
    if pred==False:
        scored_round = round_data.merge(results[['Season', 'matchup', 'Wteam']], on=['Season', 'matchup'])
    else:
        features = pd.read_csv(os.path.join(proc_path, 'team_features.csv'))
        ss = round_data.merge(features, left_on=['Season', 'StrongTeam'], right_on=['Season', 'Team'], how='inner')
        ss = ss.merge(features, left_on=['Season', 'WeakTeam'], right_on=['Season', 'Team'], suffixes=('_W', '_L'), how='inner')
        fn = os.path.join(proc_path, 'Models', 'model_v1.p')
        game_model = pickle.load(open(fn, 'rb'))
        ss = ss[list(game_model.input_cols)].sort_index(axis=1)
        scored_round = round_data.copy()
        scored_round['Wteam'] = [s if o>0 else w for o,s,w in zip(game_model.get_pred(ss), scored_round['StrongTeam'], scored_round['WeakTeam'])]
    return scored_round

In [7]:
# Simulator method

def simulate_tournament(slots, seeds, results, pred=False, model=None):
    r0_data = create_round_0(slots, seeds)
    r1_ss = create_round_ss(1, slots, r0_data)
    r1_scored = score_round(r1_ss, results, pred, model)
    r2_ss = create_round_ss(2, slots, r1_scored)
    r2_scored = score_round(r2_ss, results, pred, model)
    r3_ss = create_round_ss(3, slots, r2_scored)
    r3_scored = score_round(r3_ss, results, pred, model)
    r4_ss = create_round_ss(4, slots, r3_scored)
    r4_scored = score_round(r4_ss, results, pred, model)
    r5_ss = create_round_ss(5, slots, r4_scored)
    r5_scored = score_round(r5_ss, results, pred, model)
    r6_ss = create_round_ss(6, slots, r5_scored)
    r6_scored = score_round(r6_ss, results, pred, model)
    
    return [r1_scored, r2_scored, r3_scored, r4_scored, r5_scored, r6_scored]

In [8]:
actuals = simulate_tournament(slots, seeds, results, False)

In [14]:
pred = simulate_tournament(slots, seeds, results, True)

In [19]:
pred[5]

Unnamed: 0,Season,Slot,Strongseed,Weakseed,Round,StrongTeam,WeakTeam,matchup,Wteam
0,2004,R6CH,R5WX,R5YZ,R6,1181,1386,1181_1386,1181
1,2005,R6CH,R5WX,R5YZ,R6,1228,1242,1228_1242,1228
2,2006,R6CH,R5WX,R5YZ,R6,1181,1437,1181_1437,1181
3,2007,R6CH,R5WX,R5YZ,R6,1326,1417,1326_1417,1326
4,2008,R6CH,R5WX,R5YZ,R6,1242,1181,1181_1242,1181
5,2009,R6CH,R5WX,R5YZ,R6,1338,1272,1272_1338,1338
6,2010,R6CH,R5WX,R5YZ,R6,1452,1242,1242_1452,1242
7,2011,R6CH,R5WX,R5YZ,R6,1326,1242,1242_1326,1242
8,2012,R6CH,R5WX,R5YZ,R6,1314,1277,1277_1314,1277
9,2013,R6CH,R5WX,R5YZ,R6,1196,1307,1196_1307,1307


In [23]:
score = 0
for idx in range(6):
    act = actuals[idx][['Season', 'Slot', 'Wteam']]
    pre = pred[idx][['Season', 'Slot', 'Wteam']]
    comb = act.merge(pre, on=['Season', 'Slot'])
    comb['Correct'] = [1 if a==b else 0 for a,b in zip(comb['Wteam_x'], comb['Wteam_y'])]
    pts = np.sum(comb['Correct'])
    score += pts * (2**idx)
print score

941


In [None]:
#Scores predictions using tournament style scoring system
def score_model(slots, seeds, results, model):
    actuals = simulate_tournament(slots, seeds, results, False)
    pred = simulate_tournament(slots, seeds, results, True, model)
    score = 0
    for idx in range(6):
        act = actuals[idx][['Season', 'Slot', 'Wteam']]
        pre = pred[idx][['Season', 'Slot', 'Wteam']]
        comb = act.merge(pre, on=['Season', 'Slot'])
        comb['Correct'] = [1 if a==b else 0 for a,b in zip(comb['Wteam_x'], comb['Wteam_y'])]
        pts = np.sum(comb['Correct'])
        score += pts * (2**idx)
    return score

In [54]:
# Product predictions using scoring set.  Uses 2003+ to make the scoring set more similar to today's environment

#Creats SS
ss = simulate_tournament(slots, seeds, results, false)[0]
ss = ss[ss['Season']>2002]
features = pd.read_csv(os.path.join(proc_path, 'team_features.csv'))
#teams = pd.read_csv(os.path.join(raw_path, 'teams.csv'))
#features = features.merge(teams, left_on='Team', right_on='Team_Name').drop(['Team', 'Team_Name'],1)
print len(ss)


448


In [55]:
ss = ss.merge(features, left_on=['Season', 'StrongTeam'], right_on=['Season', 'Team'], how='inner')
ss = ss.merge(features, left_on=['Season', 'WeakTeam'], right_on=['Season', 'Team'], suffixes=('_W', '_L'), how='inner')
print len(ss)

y = [1 if x==y else 0 for x,y in zip(ss['StrongTeam'], ss['Wteam'])]

#Import a model
import pickle
fn = os.path.join(proc_path, 'Models', 'model_v1.p')
game_model = pickle.load(open(fn, 'rb'))


448


In [70]:
r0_data = create_round_0(slots, seeds)
r1_ss = create_round_ss(1, slots, r0_data)
r1_scored = score_round(r1_ss, results)

In [71]:
r1_ss

Unnamed: 0,Season,Slot,Strongseed,Weakseed,Round,StrongTeam,WeakTeam,matchup
0,1985,R1W1,W01,W16,R1,1207,1250,1207_1250
1,1985,R1W2,W02,W15,R1,1210,1273,1210_1273
2,1985,R1W3,W03,W14,R1,1228,1318,1228_1318
3,1985,R1W4,W04,W13,R1,1260,1233,1233_1260
4,1985,R1W5,W05,W12,R1,1374,1330,1330_1374
5,1985,R1W6,W06,W11,R1,1208,1455,1208_1455
6,1985,R1W7,W07,W10,R1,1393,1177,1177_1393
7,1985,R1W8,W08,W09,R1,1396,1439,1396_1439
8,1985,R1X1,X01,X16,R1,1385,1380,1380_1385
9,1985,R1X2,X02,X15,R1,1433,1267,1267_1433


In [72]:
r1_scored

Unnamed: 0,Season,Slot,Strongseed,Weakseed,Round,StrongTeam,WeakTeam,matchup,Wteam
0,1985,R1W1,W01,W16,R1,1207,1250,1207_1250,1207
1,1985,R1W2,W02,W15,R1,1210,1273,1210_1273,1210
2,1985,R1W3,W03,W14,R1,1228,1318,1228_1318,1228
3,1985,R1W4,W04,W13,R1,1260,1233,1233_1260,1260
4,1985,R1W5,W05,W12,R1,1374,1330,1330_1374,1374
5,1985,R1W6,W06,W11,R1,1208,1455,1208_1455,1208
6,1985,R1W7,W07,W10,R1,1393,1177,1177_1393,1393
7,1985,R1W8,W08,W09,R1,1396,1439,1396_1439,1396
8,1985,R1X1,X01,X16,R1,1385,1380,1380_1385,1385
9,1985,R1X2,X02,X15,R1,1433,1267,1267_1433,1433


In [57]:
ss = ss[list(game_model.input_cols)].sort_index(axis=1)
ss.columns

Index([u'Clutch_L', u'Clutch_W', u'SOS_L', u'SOS_W', u'WinPct_L', u'WinPct_W',
       u'd3P_L', u'd3P_W', u'deff_L', u'deff_W', u'home_adv_L', u'home_adv_W',
       u'o3P_L', u'o3P_W', u'oeff_L', u'oeff_W', u'poss_stdev_L',
       u'poss_stdev_W', u'total_poss_L', u'total_poss_W'],
      dtype='object')

In [58]:
list(game_model.input_cols)

['d3P_L',
 'poss_stdev_W',
 'total_poss_L',
 'WinPct_L',
 'Clutch_W',
 'o3P_L',
 'oeff_L',
 'home_adv_L',
 'SOS_L',
 'deff_L',
 'oeff_W',
 'o3P_W',
 'Clutch_L',
 'deff_W',
 'SOS_W',
 'home_adv_W',
 'd3P_W',
 'WinPct_W',
 'total_poss_W',
 'poss_stdev_L']

In [59]:
ss[0:5]
ss.to_csv(os.path.join(proc_path,'ss_temp.csv'), index=False)

In [60]:
game_model.get_pred(ss[0:5])

array([ 19.16861892,  12.8513426 ,   4.19942841,   9.91328612,   3.92425115])

In [61]:
#predicts values
pred_y = game_model.get_pred(ss)
pred_y

array([ 19.16861892,  12.8513426 ,   4.19942841,   9.91328612,
         3.92425115,   3.30408125,   2.65986859,   2.81804935,
        22.08942349,  12.00858506,   6.02216432,   8.5735744 ,
        -3.38651446,  -0.97833614,   2.03509379,   1.03694783,
        21.5636696 ,  14.66510877,   7.78702637,   6.30614138,
         3.54061863,   3.98376937,  -2.51786947,  -6.34515671,
        16.16681728,  10.57608504,   9.18980416,   6.72297459,
         3.64870217,   3.28129538,   2.18754203,   1.78987161,
        27.74837468,  18.75538406,  12.53419732,   6.10882441,
         5.56286596,   8.91992349,  -5.68865052,  -1.14136443,
        21.17135542,  14.45843843,   7.33348484,   1.31071134,
         0.03749471,   1.48551966,  -0.15469824,  -1.43206241,
        21.7802957 ,  14.78744285,  12.25442229,   6.64503422,
         6.87251538,   6.61788566,   3.60124036,  -0.37479266,
        28.84578521,  15.74162306,   8.47797766,   7.13763073,
         8.77100048,   3.07579235,  -1.76071494,  -6.89

In [67]:
y

[1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
