In [2]:
import cPickle as pkl
from cspython.scraper import modifiedSoup
from cspython.data_processing import process_scrapped
from cspython.analysis import roster_match
import cPickle as pkl
import sys
import os
sys.setrecursionlimit(15000)
import pandas as pd
import itertools

In [3]:
def filter_map_pool(maps_to_filter, overview, big_data):
    #returns a list of series ids which have the correct map pool (do not have the maps to filter)
    bad_idx = []
    all_idx = overview.loc[:, 'id']
    for idx in all_idx:
        series = big_data[idx]
        for m in maps_to_filter:
            if m in series['map_pool']:
                bad_idx.append(idx)
    return all_idx.loc[~all_idx.isin(bad_idx)]


def roster_match_threshold(cur_roster, thresh, team_name, big_data):
    m = roster_match(big_data, team_name, cur_roster)
    return m.loc[m>=thresh]


def create_veto_matrix(match_idxs, big_data, map_pool, team_name):
    veto_matrix = pd.DataFrame(index=map_pool, columns=map_pool).fillna(0)
    for idx in match_idxs:
        vetos = big_data[idx]['vetos']
        remaining = list(map_pool)
        if vetos is None:
            continue
        for v in vetos:
            picked_map = False
            if 'left over' in v or 'remaining' in v:
                continue
            if team_name not in v:
                remaining.remove(v.split(' ')[-1])
            else:
                if 'picked' in v:
                    picked_map = v.split('picked')[-1].strip()
                elif 'removed' in v:
                    removed_map = v.split('removed')[-1].strip()
                if picked_map:
                    remaining.remove(picked_map)
                    for m in remaining:
                        veto_matrix.loc[picked_map, m] -= 1
                else:
                    remaining.remove(removed_map)
                    for m in remaining:
                        veto_matrix.loc[removed_map, m] += 1

    return veto_matrix

def combine_vm_data(vm):
    new_vm = pd.DataFrame()
    for row_map, col_map in itertools.product(vm.index, vm.index):
        score = vm.loc[col_map, row_map] - vm.loc[row_map, col_map]
        new_vm.loc[row_map, col_map] = score
    return new_vm

def num_times_vetoted(map_pool, big_data, match_idxs, team_name):
    r = pd.Series(index=map_pool).fillna(0)
    count = 0
    for idx in match_idxs:
        s = big_data[idx]
        if s['vetos'] == []:
            continue
        maps_remaining = list(map_pool)
        for v in s['vetos']:
            if 'left over' in v or 'remaining' in v:
                continue
            if team_name not in v:
                maps_remaining.remove(v.split(' ')[len(v.split(' '))-1])
            else:
                if 'removed' in v:
                    map_vetoed = v.split(' ')[len(v.split(' '))-1]
                    r.loc[map_vetoed] += 1 
        count += 1
    return r/count

def num_times_selected(map_pool, big_data, match_idxs, team_name):
    r = pd.Series(index=map_pool).fillna(0)
    count = 0
    for idx in match_idxs:
        s = big_data[idx]
        if s['vetos'] == []:
            continue
        maps_remaining = list(map_pool)
        for v in s['vetos']:
            if 'left over' in v or 'remaining' in v:
                continue
            if 'picked' in v:
                map_picked = v.split(' ')[len(v.split(' '))-1]
                r.loc[map_picked] += 1 
                count += 1
    return r/count

def veto_analysis_spreadsheet(match_idxs, big_data, map_pool, team_name, map_advantage_df):
    vm = create_veto_matrix(match_idxs, big_data, map_pool, team_name)
    vm = combine_vm_data(vm)
    r_picked = num_times_selected(map_pool, big_data, match_idxs, team_name)
    r_vetoed = num_times_vetoted(map_pool, big_data, match_idxs, team_name)
    vm.loc[:,'sum']=  vm.sum(1)
    vm.loc[:, 'times vetoed'] = r_vetoed.round(3)
    vm.loc[:, 'times picked'] = r_picked.round(3)

    return vm


def get_good_series_idx(bad_maps, roster_thresh, cur_roster, team_name, big_data, overview):
    good_map_pool_idxs = filter_map_pool(bad_maps, overview, big_data)
    roster_matches = roster_match_threshold(cur_roster, roster_thresh, team_name, big_data)
    good_idxs = roster_matches.index[roster_matches.index.isin(good_map_pool_idxs)]
    print len(good_idxs), ' good series found'
    return roster_matches.index[roster_matches.index.isin(good_map_pool_idxs)]

In [4]:
def create_ss(cur_roster, team_name, big_data, map_advantage_df, overview, home_team):
    good_idx = get_good_series_idx(['Dust2'], 3, cur_roster, team_name, big_data, overview)
    ss = veto_analysis_spreadsheet(good_idx, big_data, map_pool, team_name, map_advantage_df) 
    ma_col = map_advantage_df.loc[team_name,['roster_weighted_t_win_ratio', 'roster_weighted_ct_win_ratio']].sum(1)
    ma_col = ma_col.loc[ma_col.index !='Dust2']
    ss.loc[:, 'comaprative '+home_team+' map advantage (weighted)'] = ma_col
    return ss

In [5]:

def filter_map_df(df):
    df = df.copy()
    df.index = df.iloc[:,0]
    df = df.iloc[:,1:5]
    return df

def make_map_advantage_df(home_team):
    files = os.listdir('spreadsheets')
    multi_idx_tuples = []
    frames = []
    baseline = pd.read_csv('spreadsheets/{0}.csv'.format(home_team))
    baseline_ratios = filter_map_df(baseline) 
    for csv in files:
        cur_frame = pd.read_csv('spreadsheets/'+csv)
        cur_frame.index = cur_frame.iloc[:,0]
        cur_ratios = filter_map_df(cur_frame)
        diff_ratios = baseline_ratios - cur_ratios
        diff_ratios = pd.concat([diff_ratios, cur_frame.iloc[:,-2:]], axis=1)
        frames.append(diff_ratios)
        multi_idx_tuples += zip([csv[:-4]]*len(diff_ratios),diff_ratios.index.tolist())

    final = pd.concat(frames)
    final.index = pd.MultiIndex.from_tuples(multi_idx_tuples)
    map_advantage_df = final
    return map_advantage_df

In [13]:
make_map_advantage_df('NRG')

Unnamed: 0,Unnamed: 1,Unnamed: 4,ct_rounds_played,ct_win_ratio,roster_weighted_ct_win_ratio,roster_weighted_t_win_ratio,sum_advantage,t_rounds_played,t_win_ratio
BIG,Cache,,360.0,-0.049638,-0.034054,-0.039697,,385.0,-0.057003
BIG,Cobblestone,,377.0,-0.042157,-0.040812,0.033789,,378.0,0.015721
BIG,Dust2,,119.0,-0.047673,-0.047673,-0.089506,,109.0,-0.089506
BIG,Inferno,,245.0,0.020327,0.018216,0.010788,,267.0,0.034291
BIG,Mirage,,320.0,-0.034077,-0.013017,0.081485,,338.0,0.062762
BIG,Nuke,,88.0,0.066220,0.087134,-0.095928,,96.0,-0.172482
BIG,Overpass,,379.0,-0.021640,0.001989,-0.148196,,322.0,-0.137785
BIG,Train,,413.0,0.057944,0.072786,0.060292,,399.0,0.055164
CLG,Cache,,385.0,-0.082772,-0.064944,-0.068210,,415.0,-0.080012
CLG,Cobblestone,,408.0,0.043712,0.049088,-0.025220,,383.0,-0.020291


In [6]:
def make_ss(home_team, opp_team, opp_roster, pkl_paths, map_pool):
    all_data = []
    for p in pkl_paths:
        with open(p, 'rb') as f:
            all_data += pkl.load(f)
    big_data = process_scrapped(all_data)
    overview, big_data = big_data
    map_advantage_df = make_map_advantage_df(home_team)
    ss = create_ss(opp_roster, opp_team, big_data, map_advantage_df, overview, home_team)
    return ss


In [7]:
map_pool = ['Cache', 'Mirage', 'Inferno', 'Nuke', 'Train', 'Cobblestone', 'Overpass']


teams = [(['RIKO', 'FugLy', 'Brehze', 'AnJ', 'CeRq', 'daps'],'NRG',['../cspython/NRG_2016-10-01_to_2017-10-13.pkl']),
(['gob b', 'LEGIJA', 'tabseN', 'nex', 'keev', 'kakafu'], 'Big', ['../cspython/BIG_2016-10-01_to_2017-10-13.pkl']),
(['Skadoodle', 'Stewie2k', 'autimatic', 'RUSH', 'tarik', 'valens'], 'Cloud9', ['../cspython/Cloud9_2016-10-01_to_2017-10-13.pkl']),
(['reltuC', 'koosta', 'nahtE', 'FNS', 'Rickeh', 'Ryu'], 'CLG', ['../cspython/CLG_2016-10-01_to_2017-10-13.pkl']),
(['chrisj', 'oskar', 'ropz', 'suNny', 'STYKO', 'lmbt'], 'mousesports', ['../cspython/mousesports_2016-10-01_to_2017-10-13.pkl']),
(['AZR', 'jks', 'USTILO', 'Nifty', 'NAF', 'kassad'],  'Renegades', ['../cspython/Renegades_2016-10-01_to_2017-07-22.pkl',
                                                                    '../cspython/Renegades_2017-07-22_to_2017-10-13.pkl']),
(['dzt', 'land1n', 'tatazin', 'shz', 's1', 'bLecker'], 'Tempo Storm', ['Tempo_combined.pkl']),
(['Maniac', 'Ex6TenZ', 'to1nou', 'ALEX', 'DEVIL'], 'LDLC', ['../cspython/LDLC_2016-10-01_to_2017-10-13.pkl'])]

team_data = {team[1]:{'cur_roster': team[0], 'pkl_paths': team[2]} for team in teams}


In [28]:
c9_veto = make_ss('Cloud9', 'Tempo Storm', team_data['Tempo Storm']['cur_roster'], team_data['Tempo Storm']['pkl_paths'], map_pool)

49  good series found


In [29]:
ts_veto = make_ss('Tempo Storm', 'Cloud9', team_data['Cloud9']['cur_roster'], team_data['Cloud9']['pkl_paths'], map_pool)

42  good series found


In [8]:
ldlc_veto = make_ss('NRG', 'LDLC', team_data['LDLC']['cur_roster'], team_data['LDLC']['pkl_paths'], map_pool)

93  good series found


In [9]:
ldlc_veto

Unnamed: 0,Cache,Mirage,Inferno,Nuke,Train,Cobblestone,Overpass,sum,times vetoed,times picked,comaprative NRG map advantage (weighted)
Cache,0.0,45.0,-6.0,-13.0,39.0,-9.0,-11.0,45.0,0.328,0.138,0.0
Mirage,-45.0,0.0,-34.0,-39.0,-1.0,-42.0,-43.0,-204.0,0.776,0.126,0.0
Inferno,6.0,34.0,0.0,-8.0,44.0,8.0,-4.0,80.0,0.149,0.161,0.0
Nuke,13.0,39.0,8.0,0.0,41.0,3.0,4.0,108.0,0.045,0.184,0.0
Train,-39.0,1.0,-44.0,-41.0,0.0,-42.0,-46.0,-211.0,0.791,0.103,0.0
Cobblestone,9.0,42.0,-8.0,-3.0,42.0,0.0,-15.0,67.0,0.194,0.126,0.0
Overpass,11.0,43.0,4.0,-4.0,46.0,15.0,0.0,115.0,0.075,0.161,0.0
