In [380]:
import pandas as pd
import plotly.graph_objects as go

co_df = pd.read_csv("data/co_data.csv")
map_df = pd.read_csv("data/map_data.csv")
user_df = pd.read_csv("data/user_data.csv", index_col= 'userName')
replay_df = pd.read_csv("data/replay_data.csv", index_col= 'replayId')
pick_df = pd.read_csv("data/pick_data.csv")
ban_df = pd.read_csv("data/ban_data.csv")

In [381]:
# Filter high ranked players, >= 1400 represents 25%, top quartile
highranked_users = user_df[(user_df['stdScore'] >= 1400) | (user_df['fogScore'] >= 1400)]

# Replays played by high ranked players, exclude time out and draws
highranked_replays = replay_df.loc[pick_df[pick_df['userName'].isin(highranked_users.index)]['replayId'].unique()]
highranked_replays = highranked_replays[(highranked_replays['isBoot'] != True) & 
                                        (highranked_replays['winnerPlayerIndex'] != 0)]

In [382]:
CO_NAME = 'Gage'

# Pick rate calculation, replays where CO is unbanned, replays where CO is picked
available_matches = ban_df[ban_df['replayId'].isin(highranked_replays.index) & 
                           (ban_df['coName'] != CO_NAME)]['replayId'].unique()

picked_matches = pick_df[pick_df['replayId'].isin(highranked_replays.index) & 
                         (pick_df['coName'] == CO_NAME)]['replayId'].unique()

pickrate = len(picked_matches)/len(available_matches)

In [383]:
# need to filter total
co_matchup = pick_df[pick_df['replayId'].isin(picked_matches)]
co_matchup_total = co_matchup[co_matchup['coName'] != CO_NAME]['coName'].value_counts()
co_matchup_total = co_matchup_total.rename("Matches")

# High ranked matches where CO of interest is picked but is not the winner
# Excludes mirror matchups, counts losses
co_matchup_winner = highranked_replays[highranked_replays.index.isin(picked_matches)]
co_matchup_loss = co_matchup_winner[co_matchup_winner['winnerCoName'] != CO_NAME]['winnerCoName'].value_counts()

In [384]:
co_matchup_wins = co_matchup_total - co_matchup_loss
co_matchup_wins = co_matchup_wins.fillna(1)
co_matchup_wins = co_matchup_wins.rename("Wins")

co_matchup_winrate = co_matchup_wins / co_matchup_total
co_matchup_winrate = co_matchup_winrate.rename("Win Rate")

co_matchup_table = pd.concat([co_matchup_wins, co_matchup_total, co_matchup_winrate], axis=1)
co_matchup_table = co_matchup_table.sort_values(['Matches'], ascending= False)
co_matchup_table.index = co_matchup_table.index.rename(CO_NAME + " vs")
co_matchup_table

Unnamed: 0_level_0,Wins,Matches,Win Rate
Gage vs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lin,1.0,9,0.111111
Tabitha,2.0,9,0.222222
Will,3.0,9,0.333333
Hawk,1.0,8,0.125
Caulder,2.0,7,0.285714
Penny,0.0,6,0.0
Grat,2.0,5,0.4
Greyfield,3.0,5,0.6
Forsythe,1.0,4,0.25
Brenner,1.0,3,0.333333


In [385]:
# Why can't gage win vs penny?
# co_matchup[co_matchup['coName'] != CO_NAME]['coName']
co_matchup[co_matchup['replayId'].isin(co_matchup[co_matchup['coName'] == 'Penny']['replayId'])]

Unnamed: 0,playerIndex,replayId,coName,userName
444,1,6017,Penny,Plague
445,2,6017,Gage,Mini Nini
770,1,5719,Gage,PhantomFullForce
771,2,5719,Penny,Sliicer
1414,1,5078,Gage,BWAVE
1415,2,5078,Penny,BusterBeachside
1708,1,4789,Gage,Chan ' tcho
1709,2,4789,Penny,DC290
2000,1,4558,Gage,Tee Lord
2001,2,4558,Penny,Zerobillion


In [386]:
replay_df.loc[4169]

winnerPlayerIndex             1
turns                        18
actions                     835
mapId                       110
isStd                      True
isBoot                    False
mapName              Swamp City
winnerCoName               Gage
winnerUserName           Mauwus
Name: 4169, dtype: object