In [1]:
# this is a test of classification algorithms with scikit
#  the basic application is to predict whether or not a team will win based on the ADC character matchup, and year
LEAGUE_PATH = "datasets/lol/LeagueofLegends.csv"

import pandas as pd

def load_lol_data(file_path=LEAGUE_PATH):
    return pd.read_csv(file_path)

In [4]:
matches = load_lol_data()
matches.head()

Unnamed: 0,MatchHistory,League,Season,Year,blueTeamTag,bResult,rResult,redTeamTag,gamelength,blueTop,...,redTop,redTopChamp,redJungle,redJungleChamp,redMiddle,redMiddleChamp,redADC,redADCChamp,redSupportChamp,redSupport
0,http://matchhistory.na.leagueoflegends.com/en/...,North_America,Spring_Season,2015,TSM,1,0,C9,40,Dyrus,...,Balls,Gnar,Meteos,Elise,Hai,Fizz,Sneaky,Sivir,Thresh,LemonNation
1,http://matchhistory.na.leagueoflegends.com/en/...,North_America,Spring_Season,2015,CST,0,1,DIG,38,Cris,...,Gamsu,Irelia,Crumbzz,JarvanIV,Shiphtur,Azir,CoreJJ,Corki,Annie,KiWiKiD
2,http://matchhistory.na.leagueoflegends.com/en/...,North_America,Spring_Season,2015,WFX,1,0,GV,40,Flaresz,...,Hauntzer,Sion,Saintvicious,LeeSin,Keane,Azir,Cop,Corki,Janna,BunnyFuFuu
3,http://matchhistory.na.leagueoflegends.com/en/...,North_America,Spring_Season,2015,TIP,0,1,TL,41,Rhux,...,Quas,Gnar,IWDominate,Nunu,Fenix,Lulu,KEITH,KogMaw,Janna,Xpecial
4,http://matchhistory.na.leagueoflegends.com/en/...,North_America,Spring_Season,2015,CLG,1,0,T8,35,Benny,...,CaliTrlolz8,Sion,Porpoise8,RekSai,Slooshi8,Lulu,Maplestreet8,Corki,Annie,Dodo8


In [5]:
from typing import Any, List

def process_line(line: List[Any]) -> List[Any]:
    """
    processes a single row in the above and table and turns it into the lists
    to add to the new processed data set
    
    format to return:
    ["Year", "Win", "perADCChamp", "oppADCChamp"]
    """
    
    red_row = [line[6], line[3], line[26], line[16]]
    blue_row = [line[5], line[3], line[16], line[26]]
    
    return [red_row, blue_row]
    
    

In [9]:
# loop through dataframes object to get thing we want

processed_games = []
max_len = 0
for idx, row in matches.iterrows():
    result = process_line(row)
    processed_games.extend(result)
    if idx > max_len:
        max_len = idx

max_len += 1 # because its 0-index

print("Length of unprocessed games: %d" % max_len)
print("Length of processed games: %d" % len(processed_games))

Length of unprocessed games: 3645
Length of processed games: 7290


In [11]:
processed_games[1]

[1, 2015, 'Jinx', 'Sivir']

In [19]:
# turn it back into dataframes so we can do some analysis

matches_panda = pd.DataFrame(processed_games, columns=['Win', 'Year', 'OwnADC', 'OppADC'])

In [20]:
matches_panda.head()

Unnamed: 0,Win,Year,OwnADC,OppADC
0,0,2015,Sivir,Jinx
1,1,2015,Jinx,Sivir
2,1,2015,Corki,Caitlyn
3,0,2015,Caitlyn,Corki
4,0,2015,Corki,Sivir


In [21]:
matches_panda["OwnADC"].value_counts()

Sivir          1171
Lucian          940
Ashe            761
Jhin            726
Ezreal          709
Kalista         582
Corki           567
Caitlyn         373
Varus           309
Tristana        219
KogMaw          216
Jinx            175
Graves          141
Vayne           137
Twitch           69
Urgot            61
Ziggs            39
MissFortune      39
Draven           30
Kennen           12
Mordekaiser       7
Kindred           2
Jayce             2
Lulu              1
Quinn             1
Janna             1
Name: OwnADC, dtype: int64