# Your Title Here

**Name(s)**: (your name(s) here)

**Website Link**: (your website link)

## Code

In [1]:
import pandas as pd
import numpy as np
import os

import plotly.express as px
pd.options.plotting.backend = 'plotly'

['.git',
 '2014_LoL_esports_match_data_from_OraclesElixir.csv',
 '2015_LoL_esports_match_data_from_OraclesElixir.csv',
 'README.md',
 'template (1).ipynb',
 'template (2).ipynb',
 'template.ipynb',
 '_config.yml']

In [2]:
# Reading the data
fp = r"data\2014_LoL_esports_match_data_from_OraclesElixir.csv"
df = pd.read_csv(fp, index_col=0)
from tqdm.notebook import tqdm

pd.set_option('display.max_columns', None)

directory = r'data'
usecols = ['gameid','datacompleteness', 'league', 'year', 'date', 'game', 'patch', 
           'side', 'position', 'playername', 'teamname', 'champion', 'gamelength', 'result']

df = pd.DataFrame()
for filename in tqdm(os.listdir(directory)):
    new_df = pd.read_csv(os.path.join(directory, filename), usecols=usecols)
    df = pd.concat([df, new_df])
df

FileNotFoundError: [Errno 2] No such file or directory: 'data\\2014_LoL_esports_match_data_from_OraclesElixir.csv'

### Cleaning and EDA

In [None]:
# Data Cleaning
# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

# Remove games after October 2023 (the current league is still ongoing)
time_cutoff = pd.to_datetime('2023-10-01')
df = df[df['date'] < time_cutoff]

# Filter out incomplete data, and fill in NA player names with 'unknown player'
df = df[df['datacompleteness'] == 'complete']
df['playername'] = df['playername'].fillna('unknown player')
df

Unnamed: 0,gameid,datacompleteness,league,year,date,game,patch,side,position,playername,teamname,champion,gamelength,result
0,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,top,sOAZ,Fnatic,Trundle,1924,1
1,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,jng,Cyanide,Fnatic,Vi,1924,1
2,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,mid,xPeke,Fnatic,Orianna,1924,1
3,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,bot,Rekkles,Fnatic,Jinx,1924,1
4,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,sup,YellOwStaR,Fnatic,Annie,1924,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122635,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,mid,Peng,Solary,Lucian,1550,0
122636,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,bot,TakeSet,Solary,Kai'Sa,1550,0
122637,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,sup,Steeelback,Solary,Alistar,1550,0
122638,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Blue,team,unknown player,Team du Sud,,1550,1


In [None]:
# Group winrate by year and side
wr_year = (df.groupby(["year", 'side'])
 .agg({'result': lambda x: np.round(x.mean()*100,2)})
 .unstack()
 .assign(blue_wr_advantage = lambda x: (x['result']['Blue'] - x['result']['Red']))
 ['blue_wr_advantage']
 )
fig = px.line(wr_year, title='Blue Side Winrate Advantage by Year')
# Change the x axis to categorical
fig.update_xaxes(type='category')
fig.update_yaxes(title='Winrate %')
fig.update_layout(hovermode='x')
# Update the hover label text to show the year, and the winrate advantage
fig.update_traces(hovertemplate='Year: %{x}<br>Blue Winrate Advantage: %{y}%')
# Hide legend
fig.update_layout(showlegend=False)
fig.show()
wr_year

year
2014    16.46
2015    10.02
2016     5.84
2017    10.12
2018     7.69
2019     5.98
2020     5.42
2021     6.29
2022     4.58
2023     6.00
2024     8.76
Name: blue_wr_advantage, dtype: float64

In [None]:
wr_year = []

In [None]:
def convert_minutes(s):
    m = s // 60
    s = s % 60
    return f'{m}:{s:02d}'

In [None]:
# Split the gamelength column into 3 bins
gamelength_bins = pd.cut(df['gamelength'], bins=[900, 1800, 2700, np.inf], labels=['15-30 Minutes', '31-45 Minutes', '45+ Minutes'])
gamelength_bins


0         31-45 Minutes
1         31-45 Minutes
2         31-45 Minutes
3         31-45 Minutes
4         31-45 Minutes
              ...      
122635    15-30 Minutes
122636    15-30 Minutes
122637    15-30 Minutes
122638    15-30 Minutes
122639    15-30 Minutes
Name: gamelength, Length: 723264, dtype: category
Categories (3, object): ['15-30 Minutes' < '31-45 Minutes' < '45+ Minutes']

In [None]:
wr_length = (df.assign(gamelength_bins=gamelength_bins)
 .groupby(["gamelength_bins", 'side'])
 .agg({'result': lambda x: np.round(x.mean()*100,2)})
 .unstack()
 .assign(blue_wr_advantage = lambda x: (x['result']['Blue'] - x['result']['Red']))
 ['blue_wr_advantage']
 )

customdata = np.stack((wr_length.groupby('gamelength_bins').size(), wr_length.groupby('gamelength_bins').size() / len(df) * 100), axis=-1)
fig = px.bar(wr_length, title='Blue Side Winrate Advantage by Game Length')
# Change it to categorical
fig.update_xaxes(type='category', title='Game Length')
fig.update_yaxes(title='Blue Side Winrate % Difference')
fig.update_layout(hovermode='x')
# Update the hover label text to show the year, and the winrate advantage
fig.update_traces(customdata=customdata, hovertemplate='Length: %{x}<br>Blue Winrate Advantage: %{y}%<br>%{customdata[0]:,} Games<br>(%{customdata[1]:.2f}% of all Games)')
# Add a text above each bar with the number of games in that bin, as stored in customdata
fig.update_layout(showlegend=False, annotations=[
    dict(
        x=bin,
        y=advantage,
        text=f"{int(games):,} Games",
        showarrow=False,
        font=dict(size=12, color='black'),
        xanchor='center',
        yanchor='bottom'
    )
    for bin, advantage, games in zip(wr_length.index, wr_length.values, customdata[:, 0])
])
fig.show()
wr_length

gamelength_bins
15-30 Minutes    10.49
31-45 Minutes     4.34
45+ Minutes       0.72
Name: blue_wr_advantage, dtype: float64

In [None]:
df.columns

Index(['gameid', 'datacompleteness', 'league', 'year', 'date', 'game', 'patch',
       'side', 'position', 'playername', 'teamname', 'champion', 'gamelength',
       'result'],
      dtype='object')

In [None]:
df.groupby('side')[['result']].mean()

Unnamed: 0_level_0,result
side,Unnamed: 1_level_1
Blue,0.532055
Red,0.467846


In [None]:
wr_length = []

### Assessment of Missingness

In [None]:
# TODO
# We believe the Champion column to be missing by design, since each game contains 12 rows. 
#10 of those rows are for the individual players, and 2 are for the teams. The player columns have champions while the teams
#do not, since they can be infered from the player rows.
# We believe that the missingness found in the 'side' 
df.columns


Index(['gameid', 'datacompleteness', 'league', 'year', 'date', 'game', 'patch',
       'side', 'position', 'playername', 'teamname', 'champion', 'gamelength',
       'result'],
      dtype='object')


### Hypothesis Testing

In [None]:
# Does blue side actually have an advantage in pro matches?
#null hyptothesis: blue side wr <= 0.5
#alternative hypothesis: blue side wr > 0.5

number_games = df['gameid'].nunique()
blue_wr = df.groupby('side')['result']["Blue"].mean()
random_generations = np.random.multinomial(number_games, [0.5, 0.5], size=100_000)[:, 0]
null_wins = blue_wr * number_games
p_value_overall = (random_generations >= null_wins).mean()
print(f"P value: {p_value_overall}, Maximum blue wr: {max(random_generations) / number_games}, Actual blue wr: {blue_wr}")
(p_value_overall, max(random_generations) / number_games, blue_wr)

#Our p_value is 0.0, which means we reject the null hypothesis that blue side wr = 0.5. This means our data 
#supports the alternative hypothesis that blue side wins more often than red side. 


P value: 0.0, Maximum blue wr: 0.5090598347326851, Actual blue wr: 0.5320546854260685


(0.0, 0.5090598347326851, 0.5320546854260685)

In [None]:
#For the second hypothesis test, we wanted to see if the side winrate distribution was 
#different for games before the rift herald was introduced. 

In [None]:
df.get('patch')

0          3.15
1          3.15
2          3.15
3          3.15
4          3.15
          ...  
122635    13.18
122636    13.18
122637    13.18
122638    13.18
122639    13.18
Name: patch, Length: 723264, dtype: float64

In [None]:
df.isna().sum()

gameid                  72
datacompleteness         0
league                   0
year                     0
date                     0
game                   684
patch                 2148
side                     0
position                 0
playername               0
teamname               162
champion            120544
gamelength               0
result                   0
dtype: int64

In [None]:
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df[df.get('patch').isna() == True])


                gameid datacompleteness  league  year                date  \
144   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
145   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
146   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
147   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
148   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
149   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
150   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
151   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
152   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
153   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
154   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   
155   TRLH1/1000050045         complete  NA LCS  2014 2014-01-17 20:04:22   

In [None]:
df

Unnamed: 0,gameid,datacompleteness,league,year,date,game,patch,side,position,playername,teamname,champion,gamelength,result
0,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,top,sOAZ,Fnatic,Trundle,1924,1
1,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,jng,Cyanide,Fnatic,Vi,1924,1
2,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,mid,xPeke,Fnatic,Orianna,1924,1
3,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,bot,Rekkles,Fnatic,Jinx,1924,1
4,TRLH3/33,complete,EU LCS,2014,2014-01-14 17:52:02,1.0,3.15,Blue,sup,YellOwStaR,Fnatic,Annie,1924,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122635,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,mid,Peng,Solary,Lucian,1550,0
122636,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,bot,TakeSet,Solary,Kai'Sa,1550,0
122637,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Red,sup,Steeelback,Solary,Alistar,1550,0
122638,ESPORTSTMNT05_3242449,complete,CDF,2023,2023-09-29 17:15:44,3.0,13.18,Blue,team,unknown player,Team du Sud,,1550,1


In [None]:
#For this question, we are looking at if the introduction of the rift herald in patch 5.22 lowered the blue side winrate.
#Which is the reason why it was added to the game, we will check if it succeded in this goal
#Null hypothesis, the blue side winrate distribution is the same before and after the rift herald was introduced
#Alternative hypthesis, the blue side winrate is lower after the rift herald was introduced
only_results = df[df.get('champion').isna() == True]
only_results_blue = only_results[only_results.get('side') == 'Blue']
new = only_results_blue.copy()
new['is_before_rift_herald'] = new['patch'].apply(lambda x: True if x < 5.22 else False)

g = new.groupby(['is_before_rift_herald', 'side'])['result'].mean()
wr_b4_rift = g[1]
wr_after_rift = g[0]
perms = []
obs_rift_diff = wr_after_rift - wr_b4_rift

In [None]:
for i in tqdm(range(10000)):
    with_shuffled = new.assign(Shuffled_Weights=np.random.permutation(new['is_before_rift_herald']))
    group_means = with_shuffled.groupby(['Shuffled_Weights'])['result'].mean()
    difference = group_means[False] - group_means[True]
    perms.append(difference)

p_value_rift = (perms <= obs_rift_diff).mean()
p_value_rift
"""
The P value is 0.0052 which is less than our significance threshold of 0.01, so we can reject the null hypothesis. 
This means that the win rate distribution is different before and after the rift herald was introduced, 
suggesting that the rift herald did indeed have an effect on the win rate between the two sides."""

  0%|          | 0/10000 [00:00<?, ?it/s]

0.004947133572606461