- Will be able to visualize how champion mastery influences chance of probabiltiy winning

- https://stats.stackexchange.com/questions/323290/kernel-functions-for-vectors-in-discrete-spaces

- https://www.ml.cmu.edu/research/dap-papers/kondor-diffusion-kernels.pdf

In [1]:
import os
import json 
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import networkx as nx
from numpy.linalg import norm
from dataclasses import dataclass
from dotenv import load_dotenv
from riotwatcher import LolWatcher, ApiError

f = open('champions.json',  encoding='utf8') 
json_champ = json.load(f) 
champions = pd.DataFrame([
    {'name':key, 'id': int(value['key'])} 
        for key, value in json_champ['data'].items()])
champions.head()

Unnamed: 0,id,name
0,266,Aatrox
1,103,Ahri
2,84,Akali
3,12,Alistar
4,32,Amumu


# Summoners

In [6]:
class Summoners:
    def __init__(self, lol, region, patch_date, N):
        self.N = N
        self.region = region
        self.lol = lol
        self.patch_date =  datetime.datetime(*patch_date).timestamp()

        self.match_params = {'queue': 420}
        self.summoner_params = lambda x: {
            'region': 'na1', 
            'queue': 'RANKED_SOLO_5x5', 
            'tier': 'DIAMOND', 
            'division': 'I',
            'page': x}
    
    def get_summoner(self, page_number):
        return self.lol.league.entries(**self.summoner_params(page_number))
    
    def get_account_id(self, summonerId):
        return self.lol.summoner.by_id(self.region, summonerId)['accountId']
    
    def get_match_list(self, accountId):
        raw_list = self.lol.match.matchlist_by_account(self.region, accountId, **self.match_params)['matches']
        return pd.DataFrame(raw_list).query('timestamp >= @self.patch_date')[['gameId','season','timestamp']]
        
    def collect_summoners(self, accountId = False):
        pages = (self.N//200)+1
        search_fields = ['summonerId', 'tier', 'rank', 'wins', 'losses']
        summoners = pd.concat([pd.DataFrame(
            self.get_summoner(i)).query("freshBlood == False and inactive == False").loc[:, search_fields]
                for i in range(1, pages)])
        if accountId == True:
             summoners['accountId'] = [self.get_account_id(ID) for ID in summoners['summonerId']]
        return summoners
    
    def collect_matches(self):
        summoners = self.collect_summoners(accountId = True)
        print("Checkpoint")
        matches = pd.DataFrame()
        for ID in summoners['accountId']:
            matches = matches.append(self.get_match_list(ID))
        #matches = pd.concat([self.get_match_list(ID) for ID in summoners['accountId']]).drop_duplicates()
        return matches

# Game

In [3]:
@dataclass
class Game:
    lol: LolWatcher
    region: str
    ID: str
    player_info = ['championId', 'participantId', 'teamId']
    final_info = ['win','true_lane','gameId','name']
    
    def __post_init__(self):
        self.raw_data = lol.match.by_id(self.region, self.ID)
        self.raw_timeline = lol.match.timeline_by_match(self.region, self.ID)
        
        ####################################
        self.summoner_ids = [i['player']['summonerId'] for i in self.raw_data['participantIdentities']]

    def get_mastery(self, championId, summonerId):
        query = lol.champion_mastery.by_summoner(self.region,  summonerId)
        mastery = pd.DataFrame(query, columns = ['championId', 'championPoints'])
        return mastery.query('championId == @championId')['championPoints'].iloc[0]

    def players(self):
        players = pd.DataFrame(self.raw_data['participants'], columns = self.player_info)
        outcome = pd.DataFrame(self.raw_data['teams'], columns = ['teamId','win'])
        return players[self.player_info].merge(outcome)
       
    def stats(self):
        players = pd.DataFrame(self.raw_data['participants'], columns = self.player_info+['stats'])
        return pd.DataFrame([i for i in players['stats']])
    
    def lane_role(self):
        df = pd.DataFrame([self.raw_data['participants'][i]['timeline'] for i in range(10)])
        return df[['participantId','lane','role']]
    
    def avg_position(self):
        frames = pd.DataFrame([
                     val.get('position') for f in self.raw_timeline['frames'][:-1] 
                         for val in f['participantFrames'].values()]).iloc[:19] 
        return pd.DataFrame([frames.iloc[i::9].mean() for i in range(10)])
    
    def data(self, mastery = False):
        players = self.players() 
        lane_role = self.lane_role()
        avg_pos = self.avg_position()
        
        if lane_role['lane'].unique().size > 2: # checking for remake
            unlabeled = lane_role.merge(avg_pos, left_index=True, right_index=True)
            team1 = self.estimate_true_lane(unlabeled, 100)
            team2 = self.estimate_true_lane(unlabeled, 200)
            players['true_lane'] = team1 + team2
            
            cols = self.final_info
            if mastery == True:
                players['mastery'] = [self.get_mastery(players['championId'].iloc[i], self.summoner_ids[i]) for i in range(10)]
                cols.append('mastery')
            players['gameId'] = [self.raw_data['gameId'] for i in range(10)]
            return players.merge(champions, left_on = 'championId', right_on = 'id')[cols]
    
    def closest_position(self, avg_cord, remaining, coords):
        l2_norms = [norm(avg_cord - np.array(coords[key])) for key in remaining]
        return remaining[l2_norms.index(min(l2_norms))]

    def estimate_true_lane(self, df, teamId):
        coords = {'ADC': [1e4, 0], 'TOP': [0, 1e4], 
                  'SUPPORT': [1e4, 0], 'MIDDLE': [5e3, 5e3], 
                  'JUNGLE': [5e3, 2500]}
        
        if teamId == 100:
            df = df.iloc[0:5]
        elif teamId == 200:
            df = df.iloc[5:10]
            
        mask = df[['lane', 'role']].duplicated(keep=False)
        lis1, lis2 = [], []
        duplicates, unique = df[mask].copy(), df[~mask].copy()
        for i in unique.index:
            role, lane = unique['role'].loc[i], unique['lane'].loc[i]
            if role == 'SOLO' and lane in ['TOP', 'JUNGLE', 'MIDDLE']:
                lis1.append(lane)
            elif lane == 'JUNGLE':
                lis1.append(lane)
            elif role == 'DUO_CARRY' and lane == 'BOTTOM':
                lis1.append('ADC')
            elif role == 'DUO_SUPPORT' and lane == 'BOTTOM':
                lis1.append('SUPPORT')
            else:
                duplicates = duplicates.append(unique.loc[i]) 
                unique = unique.drop(i)

        remaining = list(set(coords.keys()).difference(set(lis1)))
        for j in duplicates.index:
            avg_coord = df[['x','y']].loc[j]
            pred = self.closest_position(avg_coord, remaining, coords)
            lis2.append(pred)
            remaining.pop(remaining.index(pred))
        unique['true_lane'] = lis1
        duplicates['true_lane'] = lis2
        return unique.append(duplicates).sort_index()['true_lane'].to_list()

# Testing and Aggregating Data

In [4]:
region = 'na1'
patch_date = (2020, 10, 1)
N = 200

load_dotenv()
API_KEY = os.getenv('API_KEY')
lol = LolWatcher(API_KEY)
S = Summoners(lol, region, patch_date, N)

In [7]:
matches = S.collect_matches()
matches.head()

Checkpoint


Unnamed: 0,gameId,season,timestamp
0,3658625362,13,1605133811717
1,3658569120,13,1605131270472
2,3658595071,13,1605129583573
3,3658570983,13,1605127760658
4,3658487910,13,1605126367185


In [8]:
# example
G = Game(lol, region, matches['gameId'].iloc[2])
G.data(mastery = True)

Unnamed: 0,win,true_lane,gameId,name,mastery
0,Win,MIDDLE,3658595071,Ahri,83095
1,Win,JUNGLE,3658595071,Khazix,5530
2,Win,TOP,3658595071,Renekton,465820
3,Win,SUPPORT,3658595071,Annie,99136
4,Win,ADC,3658595071,Twitch,388107
5,Fail,TOP,3658595071,Akali,4961
6,Fail,ADC,3658595071,Jinx,5272
7,Fail,JUNGLE,3658595071,Amumu,58495
8,Fail,MIDDLE,3658595071,Kled,854064
9,Fail,SUPPORT,3658595071,Lulu,9392


In [9]:
G.stats().head()

Unnamed: 0,assists,champLevel,combatPlayerScore,damageDealtToObjectives,damageDealtToTurrets,damageSelfMitigated,deaths,doubleKills,firstBloodAssist,firstBloodKill,...,trueDamageDealt,trueDamageDealtToChampions,trueDamageTaken,turretKills,unrealKills,visionScore,visionWardsBoughtInGame,wardsKilled,wardsPlaced,win
0,7,13,0,2288,2288,5109,2,0,False,False,...,26375,5313,614,0,0,14,2,0,11,True
1,5,13,0,15335,139,15556,5,1,False,False,...,9069,812,1349,1,0,15,0,0,7,True
2,2,14,0,20605,5608,14976,1,0,False,True,...,7065,795,906,3,0,21,1,1,8,True
3,15,11,0,1883,1875,4595,6,0,False,False,...,906,906,702,0,0,36,0,2,18,True
4,4,13,0,4884,4884,8087,4,1,False,False,...,8197,1922,646,2,0,7,0,0,4,True


In [None]:
game_data = pd.DataFrame()
for i in range(2000):
    try:
        print(i)
        G = Game(lol, region, matches['gameId'].iloc[i])
        game_data = game_data.append(G.data(mastery = False))
    except:
        print("API Error")

# Modelling

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [None]:
game_data['champ_lane'] = game_data['name'] + '_' + game_data['true_lane']
clean = pd.get_dummies(game_data[['mastery','win','champ_lane']])

In [None]:
train, test = train_test_split(clean, test_size=0.1, random_state=7)

y_train = train['win_Win']
X_train = train.drop(['win_Win','win_Fail'], axis = 1)

In [None]:
logistic = LogisticRegression(penalty = 'l1', solver = 'liblinear', fit_intercept = False)
parameters = {'C': np.linspace(0.01, 3, 10)}
search = GridSearchCV(logistic, parameters, return_train_score = True)
search.fit(X_train, y_train)

In [None]:
plt.figure(figsize = (7,5))
plt.plot(search.param_grid['C'], search.cv_results_['mean_test_score'], color = 'k')
plt.ylim([0,1])
plt.title('Cross Validation')
plt.xlabel('lmbda')
plt.ylabel('score')
plt.show()

Amazing. I guess League of Legends really is a coin flip game. I am only using (champion, champ_mastery, lane) at this point to predict a binary outcome. Here's what I have in mind for future improvements:

1. Simplify the model to focus on lane match ups instead of entire team comps. This would allow me to calculate features using in game statistics from previous games for a pair of given summoners and champions. For example average creep score etc. While I could include this information in a more general model, I beleive it would be more informative to analyze each lane induvidually.

2. Get summoner win/loss information from previous 5 games to see if they are on a hot streak. 