# Tournament Simulator
#This script will simulate how my models perform given multiple simulations through a tournement bracket

In [1]:
# Import packages
import pandas as pd
import numpy as np
import os, sys

#Import other files
raw_path = os.path.join('..', 'data', 'raw')
proc_path = os.path.join('..', 'data', 'processed')
sys.path.append(os.path.join('..', 'src'))




In [2]:
# Makes dataset of actual outcomes from tournament

In [3]:
# Reads in data
seeds = pd.read_csv(os.path.join(raw_path, 'TourneySeeds.csv'))
slots = pd.read_csv(os.path.join(raw_path, 'TourneySlots.csv'))
results = pd.read_csv(os.path.join(raw_path, 'TourneyCompactResults.csv'))

#Adds round info to data
slots['Round'] = ['R0' if x[0:1] in ['W', 'X', 'Y', 'Z'] else x[0:2] for x in slots['Slot']]

In [4]:
#Method for creating round 0 games
def create_round_0(slots, seeds):
    #Adds Round 0 winners
    slots_0 = slots[slots['Round']=='R0']

    #joins slots with team names
    slots_teams_0 = slots_0.merge(seeds, left_on=['Strongseed', 'Season'], right_on=['Seed', 'Season'], how='left')
    slots_teams_0 = slots_teams_0.rename(index=str, columns={'Team': 'StrongTeam'}).drop('Seed', 1)

    slots_teams_0 = slots_teams_0.merge(seeds, left_on=['Weakseed', 'Season'], right_on=['Seed', 'Season'], how='left')
    slots_teams_0 = slots_teams_0.rename(index=str, columns={'Team': 'WeakTeam'}).drop('Seed', 1)

    #Adds matchup field to slots and results
    slots_teams_0['matchup'] = [str(a)+'_'+str(b) if a<b else str(b)+'_'+str(a) for a,b in zip(slots_teams_0['StrongTeam'], slots_teams_0['WeakTeam'])]
    results['matchup'] = [str(a)+'_'+str(b) if a<b else str(b)+'_'+str(a) for a,b in zip(results['Wteam'], results['Lteam'])]

    #Adds results to round 0 games
    slots_teams_0 = slots_teams_0.merge(results, on=['Season', 'matchup'])
    return slots_teams_0

In [5]:
#Method for creating SS for a round
def create_round_ss(rnd, slots_data, prev_results):
    slots_r = slots_data[slots_data['Round']==('R'+str(rnd))]

    #Adds winners from previous round to this round
    slots_r = slots_r.merge(prev_results[['Season', 'Slot', 'Wteam']], left_on=['Strongseed', 'Season'], right_on=['Slot', 'Season'], how='left')
    slots_r = slots_r.rename(index=str, columns={'Wteam':'StrongTeam_x'})
    slots_r = slots_r.merge(prev_results[['Season', 'Slot', 'Wteam']], left_on=['Weakseed', 'Season'], right_on=['Slot', 'Season'], how='left')
    slots_r = slots_r.drop(['Slot', 'Slot_y'], 1).rename(index=str, columns={'Wteam':'WeakTeam_x', 'Slot_x':'Slot'})

    #joins slots with team names
    slots_teams_r = slots_r.merge(seeds, left_on=['Strongseed', 'Season'], right_on=['Seed', 'Season'], how='left')
    slots_teams_r = slots_teams_r.rename(index=str, columns={'Team': 'StrongTeam'}).drop('Seed', 1)

    slots_teams_r = slots_teams_r.merge(seeds, left_on=['Weakseed', 'Season'], right_on=['Seed', 'Season'], how='left')
    slots_teams_r = slots_teams_r.rename(index=str, columns={'Team': 'WeakTeam'}).drop('Seed', 1)

    #combines Strongteam and Weakteam columns
    slots_teams_r['StrongTeam'] = slots_teams_r['StrongTeam'].fillna(slots_teams_r['StrongTeam_x'])
    slots_teams_r['WeakTeam'] = slots_teams_r['WeakTeam'].fillna(slots_teams_r['WeakTeam_x'])
    slots_teams_r['StrongTeam'] = slots_teams_r['StrongTeam'].astype('int')
    slots_teams_r['WeakTeam'] = slots_teams_r['WeakTeam'].astype('int')
    slots_teams_r = slots_teams_r.drop(['StrongTeam_x', 'WeakTeam_x'], 1)

    #Adds matchup column
    slots_teams_r['matchup'] = [str(a)+'_'+str(b) if a<b else str(b)+'_'+str(a) for a,b in zip(slots_teams_r['StrongTeam'], slots_teams_r['WeakTeam'])]
    
    return slots_teams_r

In [None]:
# slots_teams_1 = create_round_ss(slots, slots_teams_0)

In [6]:
#Score round
#Function get a given SS and scores it using real data (may need round info, but matchup should suffice)
#Also, allow a custom scorer to look at SS to determine winner

def score_round(round_data, results):
    scored_round = round_data.merge(results[['Season', 'matchup', 'Wteam']], on=['Season', 'matchup'])
    return scored_round

In [7]:
# Simulator method

def simulate_tournament(slots, seeds, results):
    r0_data = create_round_0(slots, seeds)
    r1_ss = create_round_ss(1, slots, r0_data)
    r1_scored = score_round(r1_ss, results)
    r2_ss = create_round_ss(2, slots, r1_scored)
    r2_scored = score_round(r2_ss, results)
    r3_ss = create_round_ss(3, slots, r2_scored)
    r3_scored = score_round(r3_ss, results)
    r4_ss = create_round_ss(4, slots, r3_scored)
    r4_scored = score_round(r4_ss, results)
    r5_ss = create_round_ss(5, slots, r4_scored)
    r5_scored = score_round(r5_ss, results)
    r6_ss = create_round_ss(6, slots, r5_scored)
    r6_scored = score_round(r6_ss, results)
    
    return [r1_scored, r2_scored, r3_scored, r4_scored, r5_scored, r6_scored]

In [54]:
# Product predictions using scoring set

#Creats SS
ss = simulate_tournament(slots, seeds, results)[0]
ss = ss[ss['Season']>=2002]
features = pd.read_csv(os.path.join(proc_path, 'team_features.csv'))
teams = pd.read_csv(os.path.join(raw_path, 'teams.csv'))
features = features.merge(teams, left_on='Team', right_on='Team_Name').drop(['Team', 'Team_Name'],1)
print len(ss)
ss = ss.merge(features, left_on=['Season', 'StrongTeam'], right_on=['Season', 'Team_Id'], how='inner')
ss = ss.merge(features, left_on=['Season', 'WeakTeam'], right_on=['Season', 'Team_Id'], suffixes=('_W', '_L'), how='inner')
print len(ss)

y = [1 if x==y else 0 for x,y in zip(ss['StrongTeam'], ss['Wteam'])]

#Import a model
import pickle
fn = os.path.join(proc_path, 'Models', 'model_v1.p')
game_model = pickle.load(open(fn, 'rb'))

#prepares scoring sample
ss = ss[list(game_model.input_cols)]

pred_y = game_model.get_pred(ss)

480
448


In [55]:
pred_outcome = [1 if x>0 else 0 for x in pred_y]

In [56]:
accuracy = [1 if a==b else 0 for a,b in zip(pred_outcome, y)]

In [59]:
np.mean(accuracy)

0.25669642857142855

In [60]:
np.mean(y)

0.7433035714285714

In [62]:
np.mean(pred_outcome)  #problem identified

0.0

In [71]:
#test of using the scoring set
ss = pd.read_csv(os.path.join(proc_path, 'scoring_set_v1.csv'))
y = [1 if x>y else 0 for x,y in zip(ss['Wscore'], ss['Lscore'])]
ss = ss[list(game_model.input_cols)]
pred = game_model.get_pred(ss)

In [73]:
np.mean(pred) #this is not right eitehr

-294.86303568071025