# League of Legends Early Objectives

## Imports

In [None]:
%matplotlib inline

from functools import partial
from json import load
from os import listdir, remove
from os.path import expanduser, isfile, join
from concurrent.futures import ThreadPoolExecutor, as_completed

import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt
import seaborn as sb
from IPython.display import clear_output, display

## Define Features

In [None]:
def _extract_indicator_from_teams(key: str, match_details: dict) -> int:
    indicator = 0
    for team in match_details['teams']:
        if team['teamId'] == 100:
            indicator -= team[key]
        else:
            indicator += team[key]
    return indicator

def _extract_participant_stat(aggregator: callable, key: str, match_details: dict) -> int:
    blue = aggregator(
        participant['stats'][key] if key in participant['stats'] else 0
        for participant in match_details['participants']
        if participant['teamId'] == 100)

    red = aggregator(
        participant['stats'][key] if key in participant['stats'] else 0
        for participant in match_details['participants']
        if participant['teamId'] == 200)
    
    return red-blue

def _extract_participant_timeline_key(team_id: int, lane: str, key: str, frame: str, match_details: dict):
    '''
    - team_id (int): Possible values are 100 (blue)
        and 200 (red).

    - lane (str): Possible values are 'MIDDLE', 'TOP',
        'JUNGLE' and 'BOTTOM'.

    - key (str): Possbile values are 'goldPerMinDeltas',
        'creepsPerMinDeltas', 'xpPerMinDeltas' and
        'damageTakenPerMinDeltas'

    - frame (str): Possible values are '0-10', '10-20',
        '20-30' and '30-end'.

    - match_details (dict): whole dictionary of parse
        match json.
    '''
    metric = 0.0  # sum because two players are bottom

    for participant in match_details['participants']:
        is_lane = participant['timeline'][0]['lane'] == lane
        is_team = participant['teamId'] == team_id
        
        if is_lane and is_team:
            metric += participant['timeline'][key][frame]
            
    return metric

def extract_red_win(match_details: dict) -> bool:
    team = match_details['teams'][0]
    
    if team['teamId'] == 100:  # blue
        if team['win'] == 'Win':
            return False
        else:
            return True
    else:  # red
        if team['win'] == 'Win':
            return True
        else:
            return False

team_to_text = {100: 'blue', 200: 'red'}
        
def generate_participant_timeline_extractors(team_ids: (int,), lanes: (str,), keys: (str,), frames: (str,)):
    for team_id in (100, 200):
        for lane in ('BOTTOM', 'MIDDLE', 'TOP', 'JUNGLE'):
            for key in ('goldPerMinDeltas', 'creepsPerMinDeltas', 'xpPerMinDeltas', 'damageTakenPerMinDeltas'):
                for frame in ('0-10',):
                    yield (
                        f"{key}{team_to_text[team_id].title()}{lane.title()}{frame.split('-')[-1]}",
                        'f4',
                        partial(_extract_participant_timeline_key, team_id, lane, key, frame)
                    )
        
features = (
    ('redWin', 'i1', extract_red_win),

    ('firstDragon', 'i1', partial(_extract_indicator_from_teams, 'firstDragon')),
    ('firstRiftHerald', 'i1', partial(_extract_indicator_from_teams, 'firstRiftHerald')),
    
    ('firstBlood', 'i1', partial(_extract_indicator_from_teams, 'firstBlood')),
    ('anyFirstBloodAssist', 'i1', partial(_extract_participant_stat, any, 'firstBloodAssist')),
    ('sumFirstBloodAssist', 'i1', partial(_extract_participant_stat, sum, 'firstBloodAssist')),

    ('firstTower', 'i1', partial(_extract_indicator_from_teams, 'firstTower')),
    ('anyFirstTowerAssist', 'i1', partial(_extract_participant_stat, any, 'firstTowerAssist')),
    ('sumFirstTowerAssist', 'i1', partial(_extract_participant_stat, sum, 'firstTowerAssist'))
)+tuple(
    extractor for extractor in
    generate_participant_timeline_extractors(
        (100, 200),
        ('BOTTOM', 'MIDDLE', 'TOP', 'JUNGLE'),
        ('goldPerMinDeltas', 'creepsPerMinDeltas', 'xpPerMinDeltas', 'damageTakenPerMinDeltas'),
        ('0-10',)
    )
)

## Loading Data

The data is in a folder with one file per match. The files are JSON encoded and named `{matchID}.json`.

In [None]:
base_path = join(expanduser('~'), 'Downloads', 'lol_matches')

filelist = listdir(base_path)[:1000]

data = np.zeros(len(filelist), dtype=[(f, t) for f, t, _ in features])

type_to_default = {'i1': -128, 'f4': np.nan}
for f, t, _ in features:
    data[f] = type_to_default[t]

In [None]:
print(f"loading max. {len(filelist)} files")

def load_and_parse_match_details(data: np.ndarray, file_i: int, base_path: str, file_name: str):
    file_path = join(base_path, file_name)

    if isfile(file_path):
        with open(file_path, 'r') as match_file:
            match_details = load(match_file)
            
        if not match_details['queueId'] in (420, 440):
            remove(file_path)
        else:
            for feature, _, extractor in features:
                data[feature][file_i] = extractor(match_details)
                
    return file_i

max_file_done = 0

with ThreadPoolExecutor() as executor:
    
    print(f"using {executor._max_workers} threads")

    futures = tuple(
        executor.submit(load_and_parse_match_details, data, file_i, base_path, file_name)
        for file_i, file_name in enumerate(filelist)
        if file_name.endswith('.json')
    )
    
    print(f"submitted all {len(futures)} to the executor")
    
    for future in as_completed(futures):
        try:
            file_i = future.result()
        except Exception as exception:
            print(f"error parsing files - {exception}")
        else:
            max_file_done = max(max_file_done, file_i)
            if file_i%1000 == 0 or max_file_done+1==len(filelist):
                clear_output(wait=True)
                display(f"{100.0*float(max_file_done+1)/float(len(filelist)):>5.1f}%")

## Cleanup

Remove matches without first blood or first tower. These presumably were not played to the end.

In [None]:
data = data[:][((data['firstBlood'] != 0) | (data['firstTower'] != 0))].copy()

Drop any columns that only have one value. They provide no information at all.

In [None]:
drop_columns = []
for f, _, _ in features:
    if (data[f] == data[f][0]).all():
        drop_columns.append(f)

data = data[tuple(f for f, _, _ in features if f not in drop_columns)].copy()
features = tuple(f for f in features if f[0] not in drop_columns)

## Exploring the Data

In [None]:
# Compute the correlation matrix
corr = np.zeros((len(features), len(features)), dtype='float32')

for f1_i, (f1, _, _) in enumerate(features):
    for f2_i, (f2, _, _) in enumerate(features):
        corr[f1_i, f2_i] = np.corrcoef(data[f1], data[f2])[0, 1]

# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sb.diverging_palette(220, 10, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
_ = sb.heatmap(
    corr, mask=mask, cmap=cmap, #vmax=.3,
    linewidths=.5, cbar_kws={"shrink": .5},
    ax=ax, annot=True,
    xticklabels=tuple(f for f, _, _ in features),
    yticklabels=tuple(f for f, _, _ in features))

In [None]:
# data['sumFirstTowerAssist'] = (((data['sumFirstTowerAssist']*data['anyFirstTowerAssist'])+1)*data['anyFirstTowerAssist'])
# data['sumFirstTowerAssist'] = (1.5 / (1 + np.exp(-data['sumFirstTowerAssist'])))-0.5

In [None]:
with pm.Model() as logistic_model:
    pm.glm.GLM.from_formula(
        'redWin ~ firstDragon + firstRiftHerald + firstBlood + firstTower + anyFirstTowerAssist + sumFirstTowerAssist',
        data,
        family=pm.glm.families.Binomial()
    )
    trace_logistic_model = pm.sample(2000, chains=None, tune=1000)

In [None]:
def plot_traces(traces, retain=1000):
    '''
    Convenience function:
    Plot traces with overlaid means and values
    '''

    ax = pm.traceplot(traces[-retain:], figsize=(12,len(traces.varnames)*1.5),
        lines={k: v['mean'] for k, v in pm.summary(traces[-retain:]).iterrows()})

    for i, mn in enumerate(pm.summary(traces[-retain:])['mean']):
        ax[i,0].annotate('{:.2f}'.format(mn), xy=(mn,0), xycoords='data'
                    ,xytext=(5,10), textcoords='offset points', rotation=90
                    ,va='bottom', fontsize='large', color='#AA0022')

plot_traces(trace_logistic_model, retain=1000)