# Your Title Here

**Name(s)**: (your name(s) here)

**Website Link**: (your website link)

## Code

In [2]:
import pandas as pd
import numpy as np
import os

import plotly.express as px
pd.options.plotting.backend = 'plotly'

### Cleaning and EDA

In [115]:
# Reading the data
fp = r"data\2014_LoL_esports_match_data_from_OraclesElixir.csv"
df = pd.read_csv(fp, index_col=0)
from tqdm.notebook import tqdm
import os

pd.set_option('display.max_columns', None)

directory = r'data'
usecols = ['gameid','datacompleteness', 'league', 'year', 'date', 'patch', 
           'side', 'position', 'playername', 'teamname', 'champion', 'gamelength', 'result',
           'barons', 'opp_barons', 'dragons', 'elders', 'opp_elders']

df = pd.DataFrame()
for filename in tqdm(os.listdir(directory)):
    new_df = pd.read_csv(os.path.join(directory, filename), usecols=usecols)
    df = pd.concat([df, new_df])

  0%|          | 0/10 [00:00<?, ?it/s]

In [116]:
# Data Cleaning
# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

# Remove games after October 2023 (the current league is still ongoing)
time_cutoff = pd.to_datetime('2023-10-01')

df = df[df['date'] < time_cutoff]
df['Win?'] = df['result'].map({0: 'Loss', 1: 'Win'})

# Filter out incomplete data, and fill in NA player names with 'unknown player'
df = df[df['datacompleteness'] == 'complete']
df = df.drop(columns=['datacompleteness', 'year', 'date'])
df_teams = df.query('position == "team"')
df_teams = df_teams.drop(columns=['gameid','position', 'playername', 'champion', 'teamname'])
df_players = df.query('position != "team"')


In [117]:
df

Unnamed: 0,gameid,league,patch,side,position,playername,teamname,champion,gamelength,result,dragons,elders,opp_elders,barons,opp_barons,Win?
0,TRLH3/33,EU LCS,3.15,Blue,top,sOAZ,Fnatic,Trundle,1924,1,,,,,,Win
1,TRLH3/33,EU LCS,3.15,Blue,jng,Cyanide,Fnatic,Vi,1924,1,,,,,,Win
2,TRLH3/33,EU LCS,3.15,Blue,mid,xPeke,Fnatic,Orianna,1924,1,,,,,,Win
3,TRLH3/33,EU LCS,3.15,Blue,bot,Rekkles,Fnatic,Jinx,1924,1,,,,,,Win
4,TRLH3/33,EU LCS,3.15,Blue,sup,YellOwStaR,Fnatic,Annie,1924,1,,,,,,Win
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122635,ESPORTSTMNT05_3242449,CDF,13.18,Red,mid,Peng,Solary,Lucian,1550,0,,,,0.0,0.0,Loss
122636,ESPORTSTMNT05_3242449,CDF,13.18,Red,bot,TakeSet,Solary,Kai'Sa,1550,0,,,,0.0,0.0,Loss
122637,ESPORTSTMNT05_3242449,CDF,13.18,Red,sup,Steeelback,Solary,Alistar,1550,0,,,,0.0,0.0,Loss
122638,ESPORTSTMNT05_3242449,CDF,13.18,Blue,team,,Team du Sud,,1550,1,3.0,0.0,0.0,1.0,0.0,Win


In [121]:
print(df_teams.head().to_markdown())

|    | gameid   | league   |   patch | side   |   gamelength |   result |   dragons |   elders |   opp_elders |   barons |   opp_barons | Win?   |
|---:|:---------|:---------|--------:|:-------|-------------:|---------:|----------:|---------:|-------------:|---------:|-------------:|:-------|
| 10 | TRLH3/33 | EU LCS   |    3.15 | Blue   |         1924 |        1 |         0 |        0 |            0 |        1 |            0 | Win    |
| 11 | TRLH3/33 | EU LCS   |    3.15 | Red    |         1924 |        0 |         0 |        0 |            0 |        0 |            1 | Loss   |
| 22 | TRLH3/44 | EU LCS   |    3.15 | Blue   |         2474 |        1 |         0 |        0 |            0 |        1 |            0 | Win    |
| 23 | TRLH3/44 | EU LCS   |    3.15 | Red    |         2474 |        0 |         0 |        0 |            0 |        0 |            1 | Loss   |
| 34 | TRLH3/76 | EU LCS   |    3.15 | Blue   |         2629 |        0 |         0 |        0 |            0 |       

In [118]:
df_teams.head().to_markdown('df_teams.md')

In [106]:
df_teams['missing_dragons'] = df_teams['dragons'].isna()
df_teams.groupby('league')['missing_dragons'].mean().sort_values()

league
AL        0.000000
NASG      0.000000
NACL      0.000000
NA LCS    0.000000
NA CS     0.000000
            ...   
EU LCS    0.000000
ESLOL     0.000000
WLDs      0.000000
OTBLX     0.122222
EGL       1.000000
Name: missing_dragons, Length: 92, dtype: float64

In [5]:
print(df_teams.head().to_markdown('plots/df_teams.md'))

None


In [64]:
df_teams.head().to_markdown('plots/df_teams.md')

In [125]:
univ = df_teams.groupby('side')['result'].mean()*100
fig = px.pie(univ, values='result', names=univ.index, title='Win Rate by Side', template='plotly_dark')
fig.update_layout(showlegend=False, hovermode='x')
fig.update_traces(textposition='inside', textinfo='percent+label', hovertemplate='%{label} side winrate: %{value:.2f}%')
fig.write_html('plots/univariate.html', include_plotlyjs='cdn')
fig.show()

In [8]:
def convert_minutes(s):
    m = s // 60
    s = s % 60
    return f'{m}:{s:02d}'

In [None]:
# Show the plot in r'plots/univariate.html' 


In [124]:
# Split the gamelength column into 3 bins
gamelength_bins = pd.cut(df_teams['gamelength'], bins=[900, 1800, 2700, np.inf], labels=['15-30 Minutes', '31-45 Minutes', '45+ Minutes'])

wr_length = (df_teams.assign(gamelength_bins=gamelength_bins)
 .groupby(["gamelength_bins", 'side'])
 .agg({'result': lambda x: np.round(x.mean()*100,2)})
 .unstack()
 .assign(blue_wr_advantage = lambda x: (x['result']['Blue'] - x['result']['Red']))
 ['blue_wr_advantage']
 )

customdata = np.stack([gamelength_bins.value_counts(), gamelength_bins.value_counts(normalize=True)*100], axis=-1)
fig = px.bar(wr_length, title='Blue Side Winrate Advantage by Game Length', template='plotly_dark')
# Change it to categorical
fig.update_xaxes(type='category', title='Game Length')
fig.update_yaxes(title='Blue Side Winrate % Difference')
fig.update_layout(hovermode='x')
# Update the hover label text to show the year, and the winrate advantage
fig.update_traces(customdata=customdata, 
                  hovertemplate='Length: %{x}<br>Blue Winrate Advantage: %{y}%<br>%{customdata[0]:,} Games<br>(%{customdata[1]:.2f}% of all Games)')
# Add a text above each bar with the number of games in that bin, as stored in customdata
fig.update_layout(showlegend=False, annotations=[
    dict(
        x=bin,
        y=advantage,
        text=f"{advantage:.2f}% Higher Winrate",
        showarrow=False,
        font=dict(size=12, color='black'),
        xanchor='center',
        yanchor='bottom'
    )
    for bin, advantage, games in zip(wr_length.index, wr_length.values, customdata[:, 0])
])
fig.write_html('plots/bivariate.html', include_plotlyjs='cdn')
fig.show()
print(wr_length)
wr_length = []

gamelength_bins
15-30 Minutes    10.49
31-45 Minutes     4.34
45+ Minutes       0.72
Name: blue_wr_advantage, dtype: float64


In [39]:
df_obj_by_gl = (df_teams[['side', 'result', 'patch', 'elders', 'barons', 'opp_elders', 'opp_barons']]
 .assign(gamelength_bins=gamelength_bins)
)

df_obj_by_gl['elder advantage'] = df_obj_by_gl['elders'] - df_obj_by_gl['opp_elders']
df_obj_by_gl['baron advantage'] = df_obj_by_gl['barons'] - df_obj_by_gl['opp_barons']

baron_bins = pd.cut(df_obj_by_gl['baron advantage'], bins=[-np.inf, -0.01, 0.99, 1.99, np.inf], labels=['Less Barons', 'Same Barons', '1 More Baron', '2+ More Barons'], right=True)
elder_bins = pd.cut(df_obj_by_gl['elder advantage'], bins=[-np.inf, -0.01, 0.99, 1.99, np.inf], labels=['Less Elders', 'Same Elders', '1 More Elder', '2+ More Elders'], right=True)
df_obj_by_gl['baron_bins'] = baron_bins
df_obj_by_gl['elder_bins'] = elder_bins
df_obj_by_gl = df_obj_by_gl.dropna(axis=0, how='any')
df_obj_by_gl = df_obj_by_gl[['side', 'result', 'patch', 'gamelength_bins', 'baron advantage','baron_bins', 'elder advantage', 'elder_bins']]


(df_obj_by_gl
 .groupby(['gamelength_bins', 'side'])
 [['result']].mean().unstack()
)

Unnamed: 0_level_0,result,result
side,Blue,Red
gamelength_bins,Unnamed: 1_level_2,Unnamed: 2_level_2
15-30 Minutes,0.54899,0.450715
31-45 Minutes,0.519024,0.480976
45+ Minutes,0.492165,0.507835


In [40]:
df_obj_by_gl.sort_values('elder advantage')

Unnamed: 0,side,result,patch,gamelength_bins,baron advantage,baron_bins,elder advantage,elder_bins
7078,Blue,0,8.02,45+ Minutes,-4.0,Less Barons,-4.0,Less Elders
1534,Blue,1,8.01,45+ Minutes,-3.0,Less Barons,-4.0,Less Elders
26950,Blue,0,10.05,45+ Minutes,-1.0,Less Barons,-3.0,Less Elders
21190,Blue,0,10.04,45+ Minutes,-1.0,Less Barons,-3.0,Less Elders
45046,Blue,1,7.12,45+ Minutes,-1.0,Less Barons,-3.0,Less Elders
...,...,...,...,...,...,...,...,...
45047,Red,0,7.12,45+ Minutes,1.0,1 More Baron,3.0,2+ More Elders
6383,Red,1,9.02,45+ Minutes,-1.0,Less Barons,3.0,2+ More Elders
68267,Red,1,12.09,45+ Minutes,1.0,1 More Baron,3.0,2+ More Elders
7079,Red,1,8.02,45+ Minutes,4.0,2+ More Barons,4.0,2+ More Elders


In [42]:
df_obj_by_gl['gamelength_bins'].value_counts()

gamelength_bins
31-45 Minutes    59608
15-30 Minutes    40580
45+ Minutes       3446
Name: count, dtype: int64

In [59]:
(df_obj_by_gl.query('result == 1')
 .groupby(['gamelength_bins'])
 [['baron advantage', 'elder advantage']].mean()
 .rename(columns={'baron advantage': 'Winner baron advantage', 'elder advantage': 'Winner elder advantage'})
)

Unnamed: 0_level_0,Winner baron advantage,Winner elder advantage
gamelength_bins,Unnamed: 1_level_1,Unnamed: 2_level_1
15-30 Minutes,0.890801,0.00281
31-45 Minutes,0.97195,0.109482
45+ Minutes,0.658154,0.325595


In [62]:
(df_obj_by_gl
 .groupby(['baron_bins', 'side'])
 [['result']].mean().unstack()
 .rename(columns={'result': 'Winrate'}) * 100
).round(2)

In [61]:
(df_obj_by_gl
 .groupby(['elder_bins', 'side'])
 [['result']].mean().unstack()
  .rename(columns={'result': 'Winrate'}) * 100
).round(2)

Unnamed: 0_level_0,Winrate,Winrate
side,Blue,Red
elder_bins,Unnamed: 1_level_2,Unnamed: 2_level_2
Less Elders,16.76,19.4
Same Elders,53.61,46.37
1 More Elder,80.42,83.23
2+ More Elders,83.0,83.33


### Assessment of Missingness

In [None]:
# TODO
# We believe the Champion column to be missing by design, since each game contains 12 rows. 
#10 of those rows are for the individual players, and 2 are for the teams. The player columns have champions while the teams
#do not, since they can be infered from the player rows.
# We believe that the missingness found in the 'side' 
df.columns


Index(['gameid', 'datacompleteness', 'league', 'year', 'date', 'game', 'patch',
       'side', 'position', 'playername', 'teamname', 'champion', 'gamelength',
       'result', 'dragons', 'elders', 'opp_elders', 'heralds', 'barons',
       'opp_barons', 'towers', 'inhibitors', 'vspm', 'Win?'],
      dtype='object')

### Hypothesis Testing

In [None]:
# TODO