# Your Title Here

**Name(s)**: (your name(s) here)

**Website Link**: (your website link)

## Code

In [173]:
import pandas as pd
import numpy as np
import os

import plotly.express as px
pd.options.plotting.backend = 'plotly'
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer


### Framing the Problem

In [174]:
fp = r"data\2014_LoL_esports_match_data_from_OraclesElixir.csv"

with open(fp, 'r') as f:
    columns = (f.readline())
columns

'gameid,datacompleteness,url,league,year,split,playoffs,date,game,patch,participantid,side,position,playername,playerid,teamname,teamid,champion,ban1,ban2,ban3,ban4,ban5,gamelength,result,kills,deaths,assists,teamkills,teamdeaths,doublekills,triplekills,quadrakills,pentakills,firstblood,firstbloodkill,firstbloodassist,firstbloodvictim,team kpm,ckpm,firstdragon,dragons,opp_dragons,elementaldrakes,opp_elementaldrakes,infernals,mountains,clouds,oceans,chemtechs,hextechs,dragons (type unknown),elders,opp_elders,firstherald,heralds,opp_heralds,firstbaron,barons,opp_barons,firsttower,towers,opp_towers,firstmidtower,firsttothreetowers,turretplates,opp_turretplates,inhibitors,opp_inhibitors,damagetochampions,dpm,damageshare,damagetakenperminute,damagemitigatedperminute,wardsplaced,wpm,wardskilled,wcpm,controlwardsbought,visionscore,vspm,totalgold,earnedgold,earned gpm,earnedgoldshare,goldspent,gspd,total cs,minionkills,monsterkills,monsterkillsownjungle,monsterkillsenemyjungle,cspm,goldat10,xp

In [175]:
# Reading the data
from tqdm.notebook import tqdm

pd.set_option('display.max_columns', None)
usecols = ['gameid','datacompleteness', 'league', 'year', 'date', 'game', 'patch', 
           'side', 'position', 'playername', 'teamname', 'champion', 'gamelength', 'result', 
           'firstblood', 'firsttower', 'turretplates', 'opp_turretplates',
           'killsat15', 'deathsat15', 'assistsat15', 'opp_killsat15', 'opp_deathsat15', 'opp_assistsat15',
           'firstdragon', 'elders', 'opp_elders', 'firstherald', 'barons', 'opp_barons', 'firstbaron',
           'golddiffat10', 'xpdiffat10', 'csdiffat10', 
           'golddiffat15', 'xpdiffat15', 'csdiffat15',]
                 

directory = r'data'
df = pd.DataFrame()
for filename in tqdm(os.listdir(directory)):
    new_df = pd.read_csv(os.path.join(directory, filename), usecols=usecols)
    df = pd.concat([df, new_df])
df['gameid'] = df['gameid'].astype(str)

  0%|          | 0/10 [00:00<?, ?it/s]

In [176]:
count = df.pivot_table(
    index="champion",
    columns="position",
    values="gameid",
    aggfunc="count",
    fill_value=0
)

normalized_count = count.div(count.sum(axis=1), axis=0)*100
normalized_count = normalized_count.round(1)
tvd = normalized_count.apply(lambda row: sum(abs(row - row.mean())) / 2, axis=1)
normalized_count['tvd'] = tvd
normalized_count.sort_values('tvd', ascending=False)

position,bot,jng,mid,sup,top,tvd
champion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Naafiri,0.0,0.0,100.0,0.0,0.0,80.0
Jinx,100.0,0.0,0.0,0.0,0.0,80.0
Briar,0.0,100.0,0.0,0.0,0.0,80.0
Renata Glasc,0.1,0.0,0.0,99.9,0.0,79.9
Gnar,0.0,0.0,0.1,0.0,99.9,79.9
...,...,...,...,...,...,...
Seraphine,32.1,0.0,39.9,27.9,0.1,39.9
Ekko,0.2,22.9,48.7,0.1,28.1,39.7
Swain,23.0,0.0,54.8,1.1,21.1,38.9
Sett,0.3,9.7,15.3,26.9,47.8,34.7


In [177]:
df[['champion', 'golddiffat15']].groupby('champion')['golddiffat15'].mean().abs().diff

<bound method Series.diff of champion
Aatrox      73.443936
Ahri        21.500473
Akali       47.291788
Akshan     343.759657
Alistar     66.884666
              ...    
Zeri         6.172757
Ziggs       32.814883
Zilean     197.765354
Zoe         94.862220
Zyra        96.717972
Name: golddiffat15, Length: 165, dtype: float64>

In [178]:
# Data Cleaning
# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

# Remove games after October 2023 (the current league is still ongoing)
time_cutoff = pd.to_datetime('2023-10-01')
df = df[df['date'] < time_cutoff]

# Remove games before patch 6.9 (the first patch with elemental drakes)
df = df[df['patch'] >= 6.9]

# Filter out incomplete data, and fill in NA player names with 'unknown player'
df = df[df['datacompleteness'] == 'complete']
df['playername'] = df['playername'].fillna('unknown player')

# Remove team summary rows
df_teams = df.query('position == "team"')

# Fill in NA values with 0
df_teams = df_teams.fillna(0)

df_teams['platediff'] = df_teams['turretplates'] - df_teams['opp_turretplates']

### Baseline Model

In [179]:
df_teams.head()

Unnamed: 0,gameid,datacompleteness,league,year,date,game,patch,side,position,playername,teamname,champion,gamelength,result,firstblood,firstdragon,elders,opp_elders,firstherald,firstbaron,barons,opp_barons,firsttower,turretplates,opp_turretplates,golddiffat10,xpdiffat10,csdiffat10,golddiffat15,xpdiffat15,csdiffat15,killsat15,assistsat15,deathsat15,opp_killsat15,opp_assistsat15,opp_deathsat15,platediff
406,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Blue,team,unknown player,Ever8 Winners,0,3014,1,1.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,-419.0,-575.0,-18.0,701.0,858.0,7.0,2.0,3.0,0.0,0.0,0.0,2.0,0.0
407,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Red,team,unknown player,CJ Entus,0,3014,0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,419.0,575.0,18.0,-701.0,-858.0,-7.0,0.0,0.0,2.0,2.0,3.0,0.0,0.0
418,ESPORTSTMNT06/20207,complete,CK,2017,2017-01-16 09:30:58,2.0,7.01,Blue,team,unknown player,CJ Entus,0,2211,1,1.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,-133.0,-79.0,-4.0,-1653.0,-1264.0,8.0,3.0,2.0,6.0,6.0,12.0,3.0,0.0
419,ESPORTSTMNT06/20207,complete,CK,2017,2017-01-16 09:30:58,2.0,7.01,Red,team,unknown player,Ever8 Winners,0,2211,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,133.0,79.0,4.0,1653.0,1264.0,-8.0,6.0,12.0,3.0,3.0,2.0,6.0,0.0
430,ESPORTSTMNT06/20215,complete,CK,2017,2017-01-16 10:27:14,3.0,7.01,Blue,team,unknown player,Ever8 Winners,0,2728,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,-3135.0,-1044.0,-3.0,-4307.0,-1843.0,-49.0,2.0,7.0,2.0,2.0,2.0,2.0,0.0


In [180]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df_teams.drop(columns=['result']), df_teams['result'], test_size=0.25)


In [201]:
def assess_model(predicted_y, y_test, name='test', do_print=False):
    # Assessing the model
    TP = ((predicted_y == 1) & (y_test == 1)).sum()
    TN = ((predicted_y == 0) & (y_test == 0)).sum()

    FP = ((predicted_y == 1) & (y_test == 0)).sum()
    FN = ((predicted_y == 0) & (y_test == 1)).sum()

    accuracy = (TP + TN) / (TP + TN + FP + FN)
    recall = TP / (TP + FN)
    precision = TP / (TP + FP)
    f1 = 2 * precision * recall / (precision + recall)
    stats = {'acc': accuracy, 'recall': recall, 'precision': precision, 'f1': f1}
    if do_print: print(stats)
    return stats

def compare_train_test(model, X_train, X_test, y_train, y_test, do_print=False):
    model.fit(X_train, y_train)
    result_train = assess_model(model.predict(X_train), y_train, 'train', do_print=do_print)
    result_test = assess_model(model.predict(X_test), y_test, 'test', do_print=do_print)

    output = {'train': result_train, 'test': result_test}
    diff = result_test['acc'] - result_train['acc']
    pcnt_change = diff / result_train['acc']
    for key in result_test.keys():
        diff = result_test[key] - result_train[key]
        pcnt_change = abs(diff) / result_train[key]
        if do_print: print(f"{key.title()} change: {' ' * (10-len(key))}{diff:.4f} ({pcnt_change:.2%})")
    return {'test_acc': result_test['acc'], 'train_acc': result_train['acc'], 'diff': diff, 'pcnt_diff': pcnt_change}

In [209]:
all_models = []

# Random Forest Classifier with league, side, gdiff@15, xpdiff@15, firstherald

preproc = ColumnTransformer(transformers=[
    ('onehot', OneHotEncoder(), ['league', 'side']),
    ('passthrough', 'passthrough', ['golddiffat15', 'xpdiffat15', 'firstherald']),
], remainder='drop')

model = Pipeline([
    ('preprocessing', preproc),
    ('classification', RandomForestClassifier(max_depth=5))
])
all_models.append(model)


# Random Forest Classifier with side, gdiff@15, xpdiff@15, firstherald

preproc = ColumnTransformer(transformers=[
    ('onehot', OneHotEncoder(), ['side']),
    ('passthrough', 'passthrough', ['golddiffat15', 'xpdiffat15', 'firstherald']),
], remainder='drop')

model = Pipeline([
    ('preprocessing', preproc),
    ('classification', RandomForestClassifier(max_depth=5))
])
all_models.append(model)


# Random Forest Classifier with side, gdiff@15, xpdiff@15, firstblood

use_cols = ['golddiffat15', 'xpdiffat15', 'firstblood']

preproc = ColumnTransformer(transformers=[
    ('onehot', OneHotEncoder(), ['side']),
    ('passthrough', 'passthrough', use_cols),
], remainder='drop')

model = Pipeline([
    ('preprocessing', preproc),
    ('classification', RandomForestClassifier(max_depth=5))
])
all_models.append(model)


# Random Forest Classifier with firstherald, firistdragon, firsttower, firstblood, side

use_cols = ['firstherald', 'firstdragon', 'firsttower', 'firstblood']

preproc = ColumnTransformer(transformers=[
    ('onehot', OneHotEncoder(), ['side']),
    ('passthrough', 'passthrough', use_cols),
], remainder='drop')

model = Pipeline([
    ('preprocessing', preproc),
    ('classification', RandomForestClassifier(max_depth=5))
])
all_models.append(model)


# Random Forest Classifier with gdif@15, xpdiff@15, firstherald, firistdragon, firsttower, firstblood, side

use_cols = ['golddiffat15', 'xpdiffat15', 'firstherald', 'firstdragon', 'firsttower', 'firstblood']

preproc = ColumnTransformer(transformers=[
    ('onehot', OneHotEncoder(), ['side']),
    ('passthrough', 'passthrough', use_cols),
], remainder='drop')

model = Pipeline([
    ('preprocessing', preproc),
    ('classification', RandomForestClassifier(max_depth=5))
])
all_models.append(model)


# Random Forest Classifier with firstherald, firstdragon, firsttower, firstblood, platediff, side

use_cols = ['firstherald', 'firstdragon', 'firsttower', 'firstblood', 'platediff']

preproc = ColumnTransformer(transformers=[
    ('onehot', OneHotEncoder(), ['side']),
    ('std', StandardScaler(), ['golddiffat15', 'xpdiffat15']),
    ('passthrough', 'passthrough', use_cols),
], remainder='drop')

model = Pipeline([
    ('preprocessing', preproc),
    ('classification', RandomForestClassifier(max_depth=5))
])

all_models.append(model)

# Random Forest Classifier with firstherald, firstdragon, firsttower, firstblood, platediff, gdiff@15, xpdiff@15, side

use_cols = ['firstherald', 'firstdragon', 'firsttower', 'firstblood', 'platediff', 'golddiffat15', 'xpdiffat15']

preproc = ColumnTransformer(transformers=[
    ('onehot', OneHotEncoder(), ['side']),
    ('passthrough', 'passthrough', use_cols),
], remainder='drop')

model = Pipeline([
    ('preprocessing', preproc),
    ('classification', RandomForestClassifier(max_depth=5))
])

all_models.append(model)

# Logistic Regression with firstherald, firstdragon, firsttower, firstblood, platediff, side
from sklearn.linear_model import LogisticRegression

use_cols = ['firstherald', 'firstdragon', 'firsttower', 'firstblood', 'platediff', 'golddiffat15', 'xpdiffat15']

preproc = ColumnTransformer(transformers=[
    ('onehot', OneHotEncoder(), ['side']),
    ('passthrough', 'passthrough', use_cols),
], remainder='drop')

model = Pipeline([
    ('preprocessing', preproc),
    ('classification', LogisticRegression())
])
all_models.append(model)


In [211]:
mdl_df = pd.DataFrame()
for model in tqdm(all_models):
    comparison = compare_train_test(model, X_train, X_test, y_train, y_test)
    
    new_df = pd.DataFrame({'model': model}, index=[0])
    new_df = pd.concat([new_df, pd.DataFrame(comparison, index=[0])], axis=1)
    
    mdl_df = pd.concat([mdl_df, new_df])
mdl_df = mdl_df.reset_index(drop=True)

  0%|          | 0/8 [00:00<?, ?it/s]

Unnamed: 0,model,test_acc,train_acc,diff,pcnt_diff
0,"(ColumnTransformer(transformers=[('onehot', On...",0.747953,0.746719,0.000693,0.00093
0,"(ColumnTransformer(transformers=[('onehot', On...",0.749509,0.750485,-0.001815,0.002416
0,"(ColumnTransformer(transformers=[('onehot', On...",0.75062,0.750596,-0.000734,0.000978
0,"(ColumnTransformer(transformers=[('onehot', On...",0.695166,0.694734,-0.000504,0.000726
0,"(ColumnTransformer(transformers=[('onehot', On...",0.756399,0.756214,-9e-05,0.000119
0,"(ColumnTransformer(transformers=[('onehot', On...",0.757177,0.756054,0.000301,0.000398
0,"(ColumnTransformer(transformers=[('onehot', On...",0.757066,0.756375,0.000187,0.000247
0,"(ColumnTransformer(transformers=[('onehot', On...",0.758363,0.756078,0.00183,0.002419


In [218]:
best_baseline_model = (
    mdl_df
    .sort_values('test_acc', ascending=False)
    .iloc[0]['model']
)
best_baseline_model

Pipeline(steps=[('preprocessing',
                 ColumnTransformer(transformers=[('onehot', OneHotEncoder(),
                                                  ['side']),
                                                 ('passthrough', 'passthrough',
                                                  ['firstherald', 'firstdragon',
                                                   'firsttower', 'firstblood',
                                                   'platediff', 'golddiffat15',
                                                   'xpdiffat15'])])),
                ('classification', LogisticRegression())])

### Data Scraping 

In [185]:
import requests
import os
"""
For our final model, we decided to also consider the champions played in terms of whether or not they have a mobility skill.
To do this, we scraped the official Riot API, and the well-established CommunityDragon API for champion skill descriptions.
We compiled a list of mobility keywords, and then checked if any of the keywords were in the skill descriptions.
"""
print()




In [186]:
# Scrape the official Riot API for a list of all champion names
link = 'https://ddragon.leagueoflegends.com/cdn/13.24.1/data/en_US/champion.json'
r = requests.get(link)
all_champions = r.json()['data'].keys()
all_champions = np.array(list(all_champions))

# Check if the file already exists (If it does, read it in)
if os.path.exists('scraped/champ_skill_df.csv'):
    champ_skill_df = pd.read_csv('scraped/champ_skill_df.csv', index_col=0)
# If not, create a new dataframe
else:
    champ_skill_df = pd.DataFrame()

# Iterate through all champion names scraped from the official Riot API
for cur_champ in tqdm(all_champions):
    # If the champion has already been read, skip it
    if cur_champ in champ_skill_df.index:
        continue

    # Send a request to the Community Dragon API for the current champion's skills
    cur_link = f"https://cdn.communitydragon.org/13.24.1/champion/{cur_champ}/data"
    r2 = requests.get(cur_link)

    # Create a dictionary of the champion's skills
    champ_skills = {}
    for spell in r2.json()['spells']:
        spell_key = spell['spellKey']
        spell_desc = spell['description']
        champ_skills[spell_key.upper()] = spell_desc

    # Convert the dictionary to a dataframe and add it to the champ_skill_df dataframe
    cur_champ_df = pd.DataFrame(champ_skills, index=[cur_champ])
    champ_skill_df = pd.concat([champ_skill_df,cur_champ_df])

# Save the dataframe to a csv file
champ_skill_df.to_csv('scraped/champ_skill_df.csv')

  0%|          | 0/166 [00:00<?, ?it/s]

In [187]:
# Create a copy of champ_skill_df and change the index to lowercase for easier merging
champ_mobility_df = champ_skill_df.copy()
champ_mobility_df.index = champ_mobility_df.index.str.lower()

mobility_keywords = ['dash', 'teleport', 'blink', 'leap',' ram', 'beelines']

# Create a new column for each skill that indicates whether it has a mobility keyword
for col in champ_mobility_df.columns:
    champ_mobility_df[col] = champ_mobility_df[col].apply(lambda x: any(keyword in x for keyword in mobility_keywords))
has_mobility = champ_mobility_df.any(axis=1)
num_mobility_skills = champ_mobility_df.sum(axis=1)
champ_mobility_df['Mobility?'] = has_mobility
champ_mobility_df['# Mobility Skills'] = num_mobility_skills
champ_mobility_df.sort_values('# Mobility Skills', ascending=False)

Unnamed: 0,Q,W,E,R,Mobility?,# Mobility Skills
naafiri,True,True,True,False,True,3
briar,True,True,False,True,True,3
yone,True,False,False,True,True,2
zed,False,True,False,True,True,2
shen,False,False,True,True,True,2
...,...,...,...,...,...,...
kennen,False,False,False,False,False,0
kayle,False,False,False,False,False,0
karthus,False,False,False,False,False,0
karma,False,False,False,False,False,0


In [188]:
# Check that all the champions in our main dataframe are in the index of champ_mobility_df
import re

# Create a mapping of champion names that are fundamentally different between the two dataframes
champion_name_map = {
    "Wukong": "Monkey King",
    "Nunu & Willump": "Nunu",
    "Renata Glasc": "Renata"
}

df['champion'] = df['champion'].replace(champion_name_map)

def format_champ(name):
    return re.sub(r'[^a-zA-Z]', '', name).lower()


In [189]:
df_players = df.query('position != "team"').copy()
df_players['champion'] = df_players['champion'].apply(format_champ)

# Find champions in the dataset that are not in the champion list
df_champs_list = np.sort(df_players['champion'].unique())
missing_champs = np.setdiff1d(df_champs_list, [c.lower() for c in all_champions])
if len(missing_champs) > 0:
    print(f"Missing {len(missing_champs)} champions: \n{missing_champs}")
else:
    print("All champions found")

All champions found


In [190]:
# Function to help data exploration, prints out all champions with a given keyword in their skill descriptions
def search_term(search_terms):
    out = {}
    if isinstance(search_terms, str):
        search_terms = [search_terms]
    for search_term in search_terms:
        search_out = {}
        for col in champ_skill_df.columns:
            print(f"Champs with '{search_term}' in their {col} description:")
            ans = champ_skill_df[[col]].query(f'{col}.str.contains("{search_term}")').index
            print(f"{', '.join(ans)}\n" if len(ans) > 0 else "None\n")
            search_out[col] = np.array(ans)
        out[search_term] = search_out
    return out
search = ['bind', 'root']

search_term(search)

Champs with 'bind' in their Q description:
Lux, Morgana, Thresh

Champs with 'bind' in their W description:
None

Champs with 'bind' in their E description:
None

Champs with 'bind' in their R description:
Morgana

Champs with 'root' in their Q description:
Ivern, Renata

Champs with 'root' in their W description:
Jhin, Karma, Lissandra, Maokai, Ryze, Senna

Champs with 'root' in their E description:
Belveth, Jinx, Leblanc, Neeko, Nunu, Rengar, Singed, Soraka, Swain, Xayah, Zyra

Champs with 'root' in their R description:
Maokai



{'bind': {'Q': array(['Lux', 'Morgana', 'Thresh'], dtype=object),
  'W': array([], dtype=object),
  'E': array([], dtype=object),
  'R': array(['Morgana'], dtype=object)},
 'root': {'Q': array(['Ivern', 'Renata'], dtype=object),
  'W': array(['Jhin', 'Karma', 'Lissandra', 'Maokai', 'Ryze', 'Senna'],
        dtype=object),
  'E': array(['Belveth', 'Jinx', 'Leblanc', 'Neeko', 'Nunu', 'Rengar', 'Singed',
         'Soraka', 'Swain', 'Xayah', 'Zyra'], dtype=object),
  'R': array(['Maokai'], dtype=object)}}

### Final Model

In [191]:
# Make a list of all champions with mobility skills
mobile_champs = champ_mobility_df[champ_mobility_df['Mobility?']]
mobile_champs = np.array(mobile_champs["Mobility?"].index)

incorrect_mobility = ['neeko', 'yuumi', 'illaoi']
for name in incorrect_mobility:
    if name in mobile_champs:
        mobile_champs = np.delete(mobile_champs, np.where(mobile_champs == name))

df_players['has_mobility'] = df_players['champion'].isin(mobile_champs)
df_players

Unnamed: 0,gameid,datacompleteness,league,year,date,game,patch,side,position,playername,teamname,champion,gamelength,result,firstblood,firstdragon,elders,opp_elders,firstherald,firstbaron,barons,opp_barons,firsttower,turretplates,opp_turretplates,golddiffat10,xpdiffat10,csdiffat10,golddiffat15,xpdiffat15,csdiffat15,killsat15,assistsat15,deathsat15,opp_killsat15,opp_assistsat15,opp_deathsat15,has_mobility
396,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Blue,top,Helper,Ever8 Winners,nautilus,3014,1,1.0,,,,,,,,,,,-200.0,-364.0,-11.0,173.0,15.0,-3.0,0.0,1.0,0.0,0.0,0.0,1.0,False
397,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Blue,jng,OldB,Ever8 Winners,olaf,3014,1,1.0,,,,,,,,,,,226.0,252.0,4.0,642.0,833.0,20.0,1.0,0.0,0.0,0.0,0.0,0.0,False
398,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Blue,mid,Cepted,Ever8 Winners,aurelionsol,3014,1,0.0,,,,,,,,,,,-234.0,-27.0,-12.0,-231.0,-412.0,-23.0,1.0,0.0,0.0,0.0,0.0,0.0,False
399,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Blue,bot,DeuL,Ever8 Winners,varus,3014,1,0.0,,,,,,,,,,,-181.0,-128.0,-10.0,29.0,205.0,-5.0,0.0,1.0,0.0,0.0,0.0,0.0,False
400,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Blue,sup,Ella,Ever8 Winners,missfortune,3014,1,0.0,,,,,,,,,,,-30.0,-308.0,11.0,88.0,217.0,18.0,0.0,1.0,0.0,0.0,0.0,1.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122633,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,top,Melonik,Solary,ksante,1550,0,0.0,,,,,,0.0,0.0,,,,-1148.0,-634.0,-21.0,-1660.0,-1028.0,-35.0,0.0,0.0,0.0,0.0,1.0,0.0,True
122634,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,jng,Shlatan,Solary,taliyah,1550,0,0.0,,,,,,0.0,1.0,,,,-963.0,-929.0,-2.0,-413.0,-623.0,3.0,1.0,2.0,3.0,2.0,3.0,0.0,True
122635,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,mid,Peng,Solary,lucian,1550,0,0.0,,,,,,0.0,0.0,,,,-264.0,103.0,16.0,-243.0,159.0,14.0,0.0,1.0,1.0,2.0,0.0,0.0,True
122636,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,bot,TakeSet,Solary,kaisa,1550,0,0.0,,,,,,0.0,0.0,,,,245.0,-154.0,12.0,434.0,173.0,18.0,2.0,1.0,1.0,2.0,3.0,1.0,True


In [196]:
# Another feature we decided to add was the difference in gold, xp, and cs at 15 minutes for each role in the team.

# Calculate the difference in gold, xp, and cs at 15 minutes for each role in the team
diff_cols = ['golddiffat15', 'xpdiffat15']

# Groupby gameid, then find the difference in each role
grouped = df_players.groupby(['gameid','position', 'side'], as_index=False)[diff_cols].first()
grouped

# Pivot the table so that each role is a column
grouped = grouped.pivot(index=['gameid', 'side'], columns='position', values=diff_cols)

# Remove the multi-index such that the columns are ['bot_golddiffat15', 'bot_xpdiffat15', 'bot_csdiffat15'] etc
grouped.columns = [f"{role}_{stat}" for stat, role in grouped.columns]
grouped = grouped.reset_index()
grouped.head()

Unnamed: 0,gameid,side,bot_golddiffat15,jng_golddiffat15,mid_golddiffat15,sup_golddiffat15,top_golddiffat15,bot_xpdiffat15,jng_xpdiffat15,mid_xpdiffat15,sup_xpdiffat15,top_xpdiffat15
0,2899-3157,Blue,1662.0,370.0,178.0,229.0,220.0,306.0,631.0,-538.0,472.0,298.0
1,2899-3157,Red,-1662.0,-370.0,-178.0,-229.0,-220.0,-306.0,-631.0,538.0,-472.0,-298.0
2,2899-3158,Blue,648.0,901.0,695.0,744.0,1212.0,741.0,-261.0,487.0,80.0,156.0
3,2899-3158,Red,-648.0,-901.0,-695.0,-744.0,-1212.0,-741.0,261.0,-487.0,-80.0,-156.0
4,2899-3159,Blue,-368.0,-750.0,-319.0,227.0,-525.0,-366.0,-897.0,-315.0,-209.0,-86.0


In [197]:
df_teams_merged = df_teams.merge(grouped, on=['gameid', 'side'])
df_teams_merged.head()

Unnamed: 0,gameid,datacompleteness,league,year,date,game,patch,side,position,playername,teamname,champion,gamelength,result,firstblood,firstdragon,elders,opp_elders,firstherald,firstbaron,barons,opp_barons,firsttower,turretplates,opp_turretplates,golddiffat10,xpdiffat10,csdiffat10,golddiffat15,xpdiffat15,csdiffat15,killsat15,assistsat15,deathsat15,opp_killsat15,opp_assistsat15,opp_deathsat15,platediff,bot_golddiffat15,jng_golddiffat15,mid_golddiffat15,sup_golddiffat15,top_golddiffat15,bot_xpdiffat15,jng_xpdiffat15,mid_xpdiffat15,sup_xpdiffat15,top_xpdiffat15
0,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Blue,team,unknown player,Ever8 Winners,0,3014,1,1.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,-419.0,-575.0,-18.0,701.0,858.0,7.0,2.0,3.0,0.0,0.0,0.0,2.0,0.0,29.0,642.0,-231.0,88.0,173.0,205.0,833.0,-412.0,217.0,15.0
1,ESPORTSTMNT06/20195,complete,CK,2017,2017-01-16 07:36:08,1.0,7.01,Red,team,unknown player,CJ Entus,0,3014,0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,419.0,575.0,18.0,-701.0,-858.0,-7.0,0.0,0.0,2.0,2.0,3.0,0.0,0.0,-29.0,-642.0,231.0,-88.0,-173.0,-205.0,-833.0,412.0,-217.0,-15.0
2,ESPORTSTMNT06/20207,complete,CK,2017,2017-01-16 09:30:58,2.0,7.01,Blue,team,unknown player,CJ Entus,0,2211,1,1.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,-133.0,-79.0,-4.0,-1653.0,-1264.0,8.0,3.0,2.0,6.0,6.0,12.0,3.0,0.0,-656.0,-373.0,-580.0,-826.0,782.0,-1148.0,-674.0,-626.0,-402.0,1586.0
3,ESPORTSTMNT06/20207,complete,CK,2017,2017-01-16 09:30:58,2.0,7.01,Red,team,unknown player,Ever8 Winners,0,2211,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,133.0,79.0,4.0,1653.0,1264.0,-8.0,6.0,12.0,3.0,3.0,2.0,6.0,0.0,656.0,373.0,580.0,826.0,-782.0,1148.0,674.0,626.0,402.0,-1586.0
4,ESPORTSTMNT06/20215,complete,CK,2017,2017-01-16 10:27:14,3.0,7.01,Blue,team,unknown player,Ever8 Winners,0,2728,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,-3135.0,-1044.0,-3.0,-4307.0,-1843.0,-49.0,2.0,7.0,2.0,2.0,2.0,2.0,0.0,-1349.0,-897.0,-268.0,-1389.0,-404.0,-249.0,-718.0,-337.0,-550.0,11.0


### Fairness Analysis

In [194]:
# TODO