# League of Legends Win Factors

## Imports & Loading Data

The data is in a folder with one file per match. The files are JSON encoded and named `{matchID}.json`.

In [46]:
from os import listdir
from os.path import isfile, join, expanduser
import pandas as pd
from json import load
from os import remove
from IPython.display import display, clear_output

## Define Features

In [83]:
def extract_red_win(match_details: dict):
    team = match_details['teams'][0]
    
    if team['teamId'] == 100:  # blue
        if team['win'] == 'Win':
            return False
        else:
            return True
    else:  # red
        if team['win'] == 'Win':
            return True
        else:
            return False

def _extract_bool_from_teams(match_details: dict, key: str) -> int:
    indicator = 0
    for team in match_details['teams']:
        if team['teamId'] == 100:
            indicator -= team[key]
        else:
            indicator += team[key]
    return indicator    



def extract_firstDragon(match_details: dict) -> bool:
    return _extract_bool_from_teams(match_details, 'firstDragon')

def extract_firstRiftHerald(match_details: dict) -> bool:
    return _extract_bool_from_teams(match_details, 'firstRiftHerald')

def extract_firstBlood(match_details: dict) -> bool:
    return _extract_bool_from_teams(match_details, 'firstBlood')

def extract_firstTower(match_details: dict) -> bool:
    return match_details['teams'][win_team]['firstTower']

def extract_firstBloodAssist(match_details: dict) -> bool:
    win_team_id = match_details['teams'][win_team]['teamId']
    
    return any(
        participant['stats']['firstBloodAssist']
        if 'firstBloodAssist' in participant['stats'] else False
        for participant
        in match_details['participants']
        if participant['teamId'] == win_team_id)

def extract_anyFirstTowerAssist(match_details: dict) -> bool:
    win_team_id = match_details['teams'][win_team]['teamId']
    
    return any(
        participant['stats']['firstTowerAssist']
        if 'firstTowerAssist' in participant['stats'] else False
        for participant
        in match_details['participants']
        if participant['teamId'] == win_team_id)

def extract_numFirstBloodAssist(match_details: dict) -> int:
    win_team_id = match_details['teams'][win_team]['teamId']
    
    return sum(
        participant['stats']['firstBloodAssist']
        if 'firstBloodAssist' in participant['stats'] else False
        for participant
        in match_details['participants']
        if participant['teamId'] == win_team_id)

def extract_numFirstTowerAssist(match_details: dict) -> int:
    win_team_id = match_details['teams'][win_team]['teamId']
    
    return sum(
        participant['stats']['firstTowerAssist']
        if 'firstTowerAssist' in participant['stats'] else False
        for participant
        in match_details['participants']
        if participant['teamId'] == win_team_id)

features = (
    ('redWin', extract_red_win),
    ('firstDragon', extract_firstDragon),
    ('firstRiftHerald', extract_firstRiftHerald),
    ('firstBlood', extract_firstBlood),
    ('firstTower', extract_firstTower),
    ('anyFirstBloodAssist', extract_anyFirstBloodAssist),
    ('anyFirstTowerAssist', extract_anyFirstTowerAssist),
    ('numFirstBloodAssist', extract_numFirstBloodAssist),
    ('numFirstTowerAssist', extract_numFirstTowerAssist)
)

SyntaxError: invalid syntax (<ipython-input-83-ddb00e0652f2>, line 1)

In [81]:
base_path = join(expanduser('~'), 'Downloads', 'lol_matches')

filelist = listdir(base_path)

data = pd.DataFrame(
    columns=tuple(f for f, _ in features),
    index=(i for i in range(len(filelist))),
    dtype='float64')

print(f"loading max. {len(filelist)} files")

files_kept = 0
files_deleted = 0

for file_i, file_name in enumerate(filelist):
    if len(file_name) != 15 or not file_name.endswith('.json'):
        continue
    
    file_path = join(base_path, file_name)
    
    if isfile(file_path):
        with open(file_path, 'r') as match_file:
            match_details = load(match_file)
            
        if not match_details['queueId'] in (420, 440):
            files_deleted += 1
            remove(file_path)
        else:
            files_kept += 1
                
            for feature, extractor in features:
                data.loc[file_i, feature] = extractor(match_details, win_team)
                
                    
        
        if file_i%250 == 0 or file_i+1==len(filelist):
            clear_output(wait=True)
            display(f"{100.0*float(file_i+1)/float(len(filelist)):>5.1f}% - {100.0*float(files_kept)/(float(files_kept)+float(files_deleted)):>5.1f}% kept")

'100.0% - 100.0% kept'

In [82]:
data

Unnamed: 0,firstDragon,firstRiftHerald,firstBlood,firstTower,anyFirstBloodAssist,anyFirstTowerAssist,numFirstBloodAssist,numFirstTowerAssist
0,True,False,True,True,False,True,0.0,2.0
1,False,False,False,False,False,False,0.0,0.0
2,True,False,False,True,False,True,0.0,1.0
3,False,False,False,False,False,False,0.0,0.0
4,True,False,True,True,False,False,0.0,0.0
5,True,False,True,True,False,True,0.0,1.0
6,True,False,True,True,False,True,0.0,1.0
7,False,False,False,False,False,False,0.0,0.0
8,False,False,True,True,False,False,0.0,0.0
9,False,False,False,False,False,False,0.0,0.0
