# Work on Sample data from League of Legends

In [74]:
import numpy as np
import pandas as pd
import json

file = '../raw_data/full_dump.json'
with open(file) as data_file:    
    data = json.load(data_file)
len(data)

35320

In [75]:
data_keys = list(data.keys())
data_keys[:5]

['EU Challenger Series/2017 Season/Spring Qualifiers/Scoreboards_1_1',
 'EU Challenger Series/2017 Season/Spring Qualifiers/Scoreboards_2_1',
 'EU Challenger Series/2017 Season/Spring Qualifiers/Scoreboards_3_1',
 'EU Challenger Series/2017 Season/Spring Qualifiers/Scoreboards_4_1',
 'EU Challenger Series/2017 Season/Spring Qualifiers/Scoreboards_5_1']

In [76]:
import re

pattern = r'(\d{4})'
re.search(pattern, data_keys[2]).group(1)

year = []
no_year = []
for i in range(len(data_keys)):
    try:
        year.append(re.search(pattern, data_keys[i]).group(1))
    except:
        no_year.append(data_keys[i])

np.unique(year), len(no_year)


(array(['2016', '2017', '2018', '2019', '2020', '2021'], dtype='<U4'), 1703)

## Normalize the JSON file into a Pandas DataFrame

Data is structured in a JSON structure that is inconvinient for data analysis. Converting nested JSON structures to Pandas DataFrames using https://medium.com/swlh/converting-nested-json-structures-to-pandas-dataframes-e8106c59976e

In [77]:
#Normalize the complete dataframe and make a copy
df_normalized_teams = pd.json_normalize(data.values())
df_teams = df_normalized_teams.copy()
df_teams.sample(10)

Unnamed: 0,id,start,patch,winner,duration,picks_bans,teams.BLUE.name,teams.BLUE.total_turret_kills,teams.BLUE.total_inhibitor_kills,teams.BLUE.total_rift_herald_kills,...,teams.BLUE.side,teams.BLUE.players,teams.RED.name,teams.RED.total_turret_kills,teams.RED.total_inhibitor_kills,teams.RED.total_rift_herald_kills,teams.RED.total_dragon_kills,teams.RED.total_baron_kills,teams.RED.side,teams.RED.players
15063,NA Academy League/2019 Season/Summer Season/Sc...,,9.13,BLUE,1735,"[{'champion_name': 'Irelia', 'is_ban': True}, ...",Team Liquid Academy,9,1,1,...,BLUE,"[{'name': 'Jenkins', 'id': 220331, 'role': 'TO...",CLG Academy,1,0,0,0,0,RED,"[{'name': 'FallenBandit', 'id': 159119, 'role'..."
30465,LCO/2021 Season/Split 1/Scoreboards/Week 5_4_1,,11.5,BLUE,2065,"[{'champion_name': 'Thresh', 'is_ban': True}, ...",Chiefs Esports Club,9,1,2,...,BLUE,"[{'name': 'Lived', 'id': 303244, 'role': 'TOP'...",Gravitas,1,0,0,2,0,RED,"[{'name': 'Safa', 'id': 457420, 'role': 'TOP',..."
31616,NA LCS/2016 Season/Spring Season/Scoreboards_8_1,,6.1,BLUE,2609,[],TSM,7,1,2,...,BLUE,"[{'name': 'Hauntzer', 'id': 164344, 'role': 'T...",Team Liquid,7,1,0,2,1,RED,"[{'name': 'Lourlo', 'id': 180523, 'role': 'TOP..."
28081,CBLOL Academy/2021 Season/Split 1/Scoreboards/...,,11.2,BLUE,1586,"[{'champion_name': 'Sett', 'is_ban': True}, {'...",Cruzeiro Academy,7,1,1,...,BLUE,"[{'name': 'Reversed', 'id': 400492, 'role': 'T...",Vorax Academy,2,0,1,2,0,RED,"[{'name': 'Yupps', 'id': 349556, 'role': 'TOP'..."
25965,Greek Legends League/2020 Season/Winter Season...,,10.2,BLUE,1976,"[{'champion_name': 'Graves', 'is_ban': True}, ...",Anorthosis Famagusta Esports,9,1,0,...,BLUE,"[{'name': 'WouLou', 'id': 204021, 'role': 'TOP...",Team Phantasma,3,0,2,3,0,RED,"[{'name': 'Bako', 'id': 252099, 'role': 'TOP',..."
30096,LCK CL/2021 Season/Spring Season/Scoreboards/W...,,11.5,BLUE,1909,"[{'champion_name': 'Thresh', 'is_ban': True}, ...",Gen.G Challengers,6,1,0,...,BLUE,"[{'name': 'Lonely', 'id': 272225, 'role': 'TOP...",Nongshim RedForce Challengers,3,0,2,1,0,RED,"[{'name': 'DnDn', 'id': 423672, 'role': 'TOP',..."
7271,LCK/2018 Season/Summer Season/Scoreboards/Week...,,8.11,BLUE,1393,"[{'champion_name': 'Shen', 'is_ban': True}, {'...",KT Rolster,9,2,0,...,BLUE,"[{'name': 'Smeb', 'id': 197924, 'role': 'TOP',...",Afreeca Freecs,1,0,1,0,0,RED,"[{'name': 'Kiin', 'id': 176422, 'role': 'TOP',..."
19075,TCL/2020 Season/Winter Season/Scoreboards/Week...,,10.3,BLUE,2201,"[{'champion_name': 'Gragas', 'is_ban': True}, ...",Dark Passage,9,1,2,...,BLUE,"[{'name': 'Juuzou', 'id': 280652, 'role': 'TOP...",Beşiktaş Esports,1,0,0,3,0,RED,"[{'name': 'Rare', 'id': 194867, 'role': 'TOP',..."
31152,LLA/2021 Season/Opening Playoffs/Scoreboards_3_2,,11.6,BLUE,2192,"[{'champion_name': 'Udyr', 'is_ban': True}, {'...",Furious Gaming,9,1,2,...,BLUE,"[{'name': 'Jauny', 'id': 279392, 'role': 'TOP'...",Infinity Esports (Latin American Team),4,0,0,1,1,RED,"[{'name': 'Buggax', 'id': 193123, 'role': 'TOP..."
23691,Turkey Academy League/2020 Season/Summer Seaso...,,10.14,RED,1831,[],Galatasaray Academy,3,0,2,...,BLUE,"[{'name': 'NuQ', 'id': 399288, 'role': 'TOP', ...",Team AURORA Academy,11,3,0,2,1,RED,"[{'name': 'Asteroid', 'id': 304606, 'role': 'T..."


In [80]:
#Make a dataframe dedicated to the teams 
df_teams.drop(['teams.BLUE.players', 'teams.RED.players', 'picks_bans'], axis=1, inplace=True)
df_teams

KeyError: "['teams.BLUE.players' 'teams.RED.players' 'picks_bans'] not found in axis"

In [81]:
#Make a dataframe dedicated to the team BLUE
df_normalized_teams['teams.BLUE.players'].explode()
df_normalized_BLUE = pd.json_normalize(df_normalized_teams['teams.BLUE.players'].explode())
df_BLUE = df_normalized_BLUE.copy()
df_BLUE.head(5)

Unnamed: 0,name,id,role,champion_name,champion_id,gold_15,kills_assists_15,deaths_15,total_gold,total_cs,total_kills,total_monster_kills,total_assists,total_deaths,total_damage_taken,total_damage_dealt,win,side
0,Kikis,172122.0,TOP,Nautilus,111,5326.0,8.0,1.0,12504,233,2,0,6,1,18220,156270,True,BLUE
1,Broxah,193072.0,JGL,Lee Sin,64,5261.0,11.0,1.0,12352,153,4,132,7,1,22212,152183,True,BLUE
2,Nisqy,185791.0,MID,Syndra,134,6009.0,13.0,1.0,13393,229,5,6,8,1,10647,177681,True,BLUE
3,MrRallez,183407.0,BOT,Jhin,202,5304.0,11.0,0.0,13969,315,2,5,9,0,9758,228328,True,BLUE
4,Klaj,171882.0,SUP,Karma,43,2767.0,12.0,0.0,9740,38,1,0,11,0,11917,34299,True,BLUE


In [82]:
#Make a dataframe dedicated to the team RED
df_normalized_teams['teams.RED.players'].explode()
df_normalized_RED = pd.json_normalize(df_normalized_teams['teams.RED.players'].explode())
df_RED = df_normalized_RED.copy()
df_RED.head(5)

Unnamed: 0,name,id,role,champion_name,champion_id,gold_15,kills_assists_15,deaths_15,total_gold,total_cs,total_kills,total_monster_kills,total_assists,total_deaths,total_damage_taken,total_damage_dealt,win,side
0,Phones,193289.0,TOP,Maokai,57,4528.0,2.0,7.0,9611,190,1,13,1,7,41065,111536,False,RED
1,Obvious,187241.0,JGL,Rengar,107,4728.0,2.0,1.0,9640,174,0,126,2,1,27879,147035,False,RED
2,MagiFelix,181359.0,MID,Ryze,13,4893.0,2.0,3.0,11840,301,0,2,2,3,16013,205899,False,RED
3,Sedrion,197437.0,BOT,Varus,110,5133.0,2.0,1.0,12010,283,2,1,0,1,10370,184927,False,RED
4,Noxiak,185879.0,SUP,Nami,267,2521.0,2.0,2.0,7348,16,0,0,2,2,13815,15418,False,RED


In [83]:
#Make a dataframe dedicated to the Bans
df_normalized_teams['picks_bans'].explode()
df_normalized_BANS = pd.json_normalize(df_normalized_teams['picks_bans'].explode())
df_BANS = df_normalized_BANS.copy()
df_BANS.isnull().sum()/len(df_BANS)

champion_name    0.019285
is_ban           0.019285
dtype: float64

In [84]:
get_index = df_normalized_teams['id'].tolist()
index_preproc = np.asarray([[index] * 5 for index in get_index])
index_teams = index_preproc.reshape(len(df_normalized_teams) * 5).tolist()
df_RED['game_id'] = index_teams
df_BLUE['game_id'] = index_teams
len(index_teams)/5

35320.0

In [98]:
df_BLUE['win'].value_counts()

True     94620
False    81980
Name: win, dtype: int64

In [99]:
df_RED['win'].value_counts()

False    94620
True     81980
Name: win, dtype: int64

In [108]:
df_BLUE['win']

0         True
1         True
2         True
3         True
4         True
          ... 
176595    True
176596    True
176597    True
176598    True
176599    True
Name: win, Length: 176600, dtype: bool

In [113]:
df_BLUE_WIN = df_BLUE[df_BLUE['win'] == True]
df_BLUE_WIN
#Use dictionary Champion ID 

Unnamed: 0,name,id,role,champion_name,champion_id,gold_15,kills_assists_15,deaths_15,total_gold,total_cs,total_kills,total_monster_kills,total_assists,total_deaths,total_damage_taken,total_damage_dealt,win,side,game_id
0,Kikis,172122.0,TOP,Nautilus,111,5326.0,8.0,1.0,12504,233,2,0,6,1,18220,156270,True,BLUE,EU Challenger Series/2017 Season/Spring Qualif...
1,Broxah,193072.0,JGL,Lee Sin,64,5261.0,11.0,1.0,12352,153,4,132,7,1,22212,152183,True,BLUE,EU Challenger Series/2017 Season/Spring Qualif...
2,Nisqy,185791.0,MID,Syndra,134,6009.0,13.0,1.0,13393,229,5,6,8,1,10647,177681,True,BLUE,EU Challenger Series/2017 Season/Spring Qualif...
3,MrRallez,183407.0,BOT,Jhin,202,5304.0,11.0,0.0,13969,315,2,5,9,0,9758,228328,True,BLUE,EU Challenger Series/2017 Season/Spring Qualif...
4,Klaj,171882.0,SUP,Karma,43,2767.0,12.0,0.0,9740,38,1,0,11,0,11917,34299,True,BLUE,EU Challenger Series/2017 Season/Spring Qualif...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176595,CuVee,145319.0,TOP,Maokai,57,5940.0,9.0,1.0,11857,260,3,5,6,1,16691,144141,True,BLUE,IEM Season 11/Gyeonggi/Scoreboards/Playoffs_3_4
176596,Ambition,189721.0,JGL,Lee Sin,64,6063.0,13.0,2.0,11399,146,6,124,7,2,27569,159701,True,BLUE,IEM Season 11/Gyeonggi/Scoreboards/Playoffs_3_4
176597,Crown,142619.0,MID,Viktor,112,5926.0,13.0,1.0,13043,245,7,18,6,1,8244,164088,True,BLUE,IEM Season 11/Gyeonggi/Scoreboards/Playoffs_3_4
176598,Ruler,196493.0,BOT,Ezreal,81,5784.0,8.0,2.0,11857,239,3,9,5,2,10718,146160,True,BLUE,IEM Season 11/Gyeonggi/Scoreboards/Playoffs_3_4


In [115]:
df_RED_WIN = df_RED[df_RED['win'] == True]
df_RED_WIN

Unnamed: 0,name,id,role,champion_name,champion_id,gold_15,kills_assists_15,deaths_15,total_gold,total_cs,total_kills,total_monster_kills,total_assists,total_deaths,total_damage_taken,total_damage_dealt,win,side,game_id
5,Arin,190279.0,TOP,Shen,98,4707.0,19.0,0.0,12968,210,6,1,13,0,21463,104438,True,RED,EU Challenger Series/2017 Season/Spring Qualif...
6,Pridestalkr,194373.0,JGL,Nocturne,56,5826.0,15.0,1.0,11986,159,7,135,8,1,26557,175986,True,RED,EU Challenger Series/2017 Season/Spring Qualif...
7,Larssen,177025.0,MID,Syndra,134,5335.0,15.0,1.0,13118,278,5,7,10,1,13295,184311,True,RED,EU Challenger Series/2017 Season/Spring Qualif...
8,SMILEY,205007.0,BOT,Ashe,22,6467.0,17.0,2.0,14181,246,9,10,8,2,16716,169901,True,RED,EU Challenger Series/2017 Season/Spring Qualif...
9,Hadow,163748.0,SUP,Malzahar,90,3507.0,15.0,2.0,8631,16,0,0,15,2,13399,27741,True,RED,EU Challenger Series/2017 Season/Spring Qualif...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176580,CuVee,145319.0,TOP,Maokai,57,5410.0,2.0,0.0,13602,321,0,0,2,0,25464,192722,True,RED,IEM Season 11/Gyeonggi/Scoreboards/Playoffs_3_1
176581,Ambition,189721.0,JGL,Rek'Sai,421,5038.0,4.0,2.0,13766,245,1,206,3,2,43240,229578,True,RED,IEM Season 11/Gyeonggi/Scoreboards/Playoffs_3_1
176582,Crown,142619.0,MID,Orianna,61,5302.0,10.0,1.0,17675,354,9,22,1,1,10901,257123,True,RED,IEM Season 11/Gyeonggi/Scoreboards/Playoffs_3_1
176583,Ruler,196493.0,BOT,Sivir,15,5458.0,8.0,1.0,17111,409,2,14,6,1,12571,318955,True,RED,IEM Season 11/Gyeonggi/Scoreboards/Playoffs_3_1


In [119]:
df_BLUE_CHAMPWINS = df_BLUE_WIN[["champion_name","champion_id","win"]]
df_BLUE_CHAMPWINS.head(10)

Unnamed: 0,champion_name,champion_id,win
0,Nautilus,111,True
1,Lee Sin,64,True
2,Syndra,134,True
3,Jhin,202,True
4,Karma,43,True
15,Poppy,78,True
16,Camille,164,True
17,Orianna,61,True
18,Jhin,202,True
19,Malzahar,90,True


In [123]:
df_RED_CHAMPWINS = df_RED_WIN[["champion_name","champion_id","win"]]
df_RED_CHAMPWINS.head(10)

Unnamed: 0,champion_name,champion_id,win
5,Shen,98,True
6,Nocturne,56,True
7,Syndra,134,True
8,Ashe,22,True
9,Malzahar,90,True
10,Poppy,78,True
11,Kha'Zix,121,True
12,Syndra,134,True
13,Ezreal,81,True
14,Karma,43,True


In [125]:
df_BLUE_CHAMPWINS["champion_name"].value_counts(normalize=True)

Braum        0.024678
Ezreal       0.024657
Gragas       0.022183
Thresh       0.021253
Kai'Sa       0.018558
               ...   
Annie        0.000085
Master Yi    0.000074
Shaco        0.000074
Teemo        0.000074
Viego        0.000042
Name: champion_name, Length: 154, dtype: float64

In [128]:
df_BLUE_CHAMPWINS["champion_id"].value_counts(normalize=True)

201    0.024678
81     0.024657
79     0.022183
412    0.021253
145    0.018558
         ...   
1      0.000085
11     0.000074
35     0.000074
17     0.000074
234    0.000042
Name: champion_id, Length: 154, dtype: float64

In [126]:
df_RED_CHAMPWINS["champion_name"].value_counts(normalize=True)

Braum        0.025714
Ezreal       0.025287
Gragas       0.023201
Thresh       0.022896
Kai'Sa       0.019554
               ...   
Master Yi    0.000098
Teemo        0.000085
Amumu        0.000061
Viego        0.000024
Gwen         0.000012
Name: champion_name, Length: 155, dtype: float64

## Patch/Date Splitting

In [136]:
df_teams['patch'].value_counts()

11.4     910
11.2     879
9.13     868
10.14    791
11.3     726
        ... 
7.23      19
7.21      19
9.18      12
8.18       9
8.7        9
Name: patch, Length: 120, dtype: int64

In [None]:
## Assigning each patch that starts with a number to a year
#6 - 2016, 7 -2017, 8 -2018, 9-2019, 10-2020, 11-2022



df_teams['patch_year'] = df.apply('patch', axis=1)