In [11]:
import pandas as pd
import warnings
import os
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
parent_dir = os.path.dirname(os.getcwd())
data_path = os.path.join(parent_dir, 'data', 'datasets')

In [13]:
dataset = pd.read_csv(os.path.join(data_path, 'dataset.csv'))
print(dataset.shape)

(1312248, 15)


In [14]:
champs = dataset.CHAMPION.unique().tolist()
roles = dataset.ROLE.unique().tolist()
print("Total champs: ", len(champs))
print("Total roles: ", len(roles))

Total champs:  156
Total roles:  5


In [15]:
# no of campion role combinations possible. Max possible is 780 = 5x156
champ_role_df = dataset.copy()
champ_role_df['champ_role'] = champ_role_df['CHAMPION'].str.cat(champ_role_df[['ROLE']], sep='_')
champ_role_df.champ_role.nunique()

772

In [16]:
# champ in role count distribution.
# Only keep those combinations which have atleast 15 records
counts = champ_role_df.champ_role.value_counts().reset_index(name='count')
counts = counts.query('count > 15')
counts

Unnamed: 0,index,count
0,Ezreal_bottom,35172
1,Kaisa_bottom,28125
2,Thresh_utility,26293
3,LeeSin_jungle,23512
4,Lulu_utility,20272
...,...,...
560,Leblanc_bottom,16
561,TwistedFate_jungle,16
562,Camille_bottom,16
563,Volibear_utility,16


In [17]:
champ_role_df = champ_role_df[champ_role_df.champ_role.isin(counts['index'])]
champ_role_df = champ_role_df[['CHAMPION', 'ROLE', 'champ_role', 'KILLS', 'ASSISTS', 'DEATHS', 'WIN']]
champ_role_df = pd.get_dummies(champ_role_df, columns=['WIN'])
champ_role_df

Unnamed: 0,CHAMPION,ROLE,champ_role,KILLS,ASSISTS,DEATHS,WIN_False,WIN_True
0,Kassadin,top,Kassadin_top,14,5,7,1,0
1,Fiora,top,Fiora_top,3,13,6,0,1
2,Yone,middle,Yone_middle,7,3,6,1,0
3,Corki,middle,Corki_middle,4,15,7,0,1
4,Kaisa,bottom,Kaisa_bottom,5,10,12,1,0
...,...,...,...,...,...,...,...,...
1312243,Alistar,utility,Alistar_utility,5,16,6,0,1
1312244,LeeSin,jungle,LeeSin_jungle,2,5,7,1,0
1312245,FiddleSticks,jungle,FiddleSticks_jungle,5,16,5,0,1
1312246,Shen,top,Shen_top,1,1,3,1,0


In [18]:
champ_role = champ_role_df.groupby(['CHAMPION', 'ROLE']).apply(lambda x: (x['KILLS'].sum() + x['ASSISTS'].sum()) / (x['DEATHS'].sum())).reset_index(name = 'kda')
champ_role['wr'] = champ_role_df.groupby(['CHAMPION', 'ROLE']).apply(lambda x: (x['WIN_True'].sum()/(x['WIN_True'].sum() + x['WIN_False'].sum() )*100)).reset_index(name='WINRATE').WINRATE

In [19]:
champ_role.describe()

Unnamed: 0,kda,wr
count,565.0,565.0
mean,2.295393,47.824775
std,0.459054,6.930245
min,1.235294,15.625
25%,1.967713,45.454545
50%,2.243193,49.375975
75%,2.548215,51.309397
max,4.290919,72.0


In [20]:
champ_role.to_csv(os.path.join(data_path, 'champ_role_metrics.csv'), index = False)