In [1]:
import os
import pandas as pd
from pathlib import Path

In [2]:
"""
pulls data from Data directories, turns the selected csv into a Pandas dataframe, and removes columns with NaNs (if present)

parameters:
    year -- year of data being pulled
    division -- division whose data is being pulled
    stat -- determines which of the three sheets in the directory will be pulled (0 = champion, 1 = player, 2 = team)
"""

#TODO: account for inconsistencies in division names

def get_dat(year, division, stat):
    # Check if folder exists
    if Path("./Data/" + str(year) + "/" + division).exists() == False:
        return pd.DataFrame()
    
    year_form = str(year) + "/"
    div_form = division + "/"
    
    if(division == "MSI" and (int(year) >= 2017 and int(year) < 2021)):
        msi_div = "main-event/"
        res_df = pd.read_csv("./Data/" + year_form + div_form + msi_div + os.listdir("./Data/" + year_form + div_form + msi_div)[stat])
    else:
        res_df = pd.read_csv("./Data/" + year_form + div_form + os.listdir("./Data/" + year_form + div_form)[stat])
        
    return res_df.dropna(axis = 1, how = "all")

In [3]:
divs = ["CBLOL", "LCK", "LCL", "LCO", "LCS", "LEC", "LJL", "LLA", "LPL", "MSI", "PCS", "TCL", "VCS"]

res_df = pd.DataFrame()

for year in os.listdir("./Data"):
    for div in divs:
        if get_dat(year, div, 2).empty:
            continue
        else:
            df = get_dat(year, div, 2)
            #df = df[["Team", "W", "L"]]
            df = df.assign(
                win_rate = round((df["W"] / (df["W"] + df["L"])) * 100, 2)
            ).assign(
                year = year
            ).assign(
                division = div
            )
            
            res_df = pd.concat([df, res_df])
            
res_df = res_df.reset_index(drop = True)
res_df = res_df.sort_values(by = ["year", "Team"]).fillna(0)
res_df.head()

Unnamed: 0,Team,GP,W,L,AGT,K,D,KD,CKPM,GPR,...,FBN%,BN%,LNE%,JNG%,WPM,CWPM,WCPM,win_rate,year,division
371,7th heaven,13,7,6,42.7,239,235,1.02,0.85,0.0,...,0,40%,0,0,0.0,0.0,0.0,53.85,2015,LJL
341,ATLAS eSports Team,14,6,8,35.8,214,269,0.8,0.96,-0.89,...,43%,53%,49.5%,44.6%,2.21,0.32,0.69,42.86,2015,TCL
367,Bencheados,6,1,5,39.3,80,121,0.66,0.85,0.0,...,0,0,0,0,0.0,0.0,0.0,16.67,2015,LLA
342,Beşiktaş Esports,14,12,2,33.9,291,175,1.66,0.98,1.47,...,64%,88%,52.0%,50.6%,2.37,0.34,0.82,85.71,2015,TCL
354,Beşiktaş Esports,5,0,5,27.6,27,110,0.25,0.99,-4.84,...,0%,0%,45.7%,38.7%,2.82,0.27,0.6,0.0,2015,MSI


In [4]:
res_df.describe()

Unnamed: 0,GP,W,L,AGT,K,D,KD,CKPM,GPR,EGR,MLR,GD15,WPM,CWPM,WCPM,win_rate
count,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0,399.0
mean,9.719298,4.859649,4.859649,34.390476,125.087719,125.428571,1.012005,0.746416,-0.155388,32.896491,-2.355388,-135.273183,3.219173,0.975764,1.288195,44.597018
std,6.239073,4.208791,3.513842,3.457307,92.600155,84.89899,0.53649,0.146694,1.053208,24.840955,15.627106,1237.030261,1.076066,0.439101,0.473416,25.363616
min,2.0,0.0,0.0,24.5,5.0,8.0,0.17,0.33,-4.84,0.0,-56.3,-5932.0,0.0,0.0,0.0,0.0
25%,5.0,2.0,3.0,32.05,54.0,66.0,0.72,0.65,-0.605,0.0,-8.95,-744.5,3.075,0.695,1.14,26.39
50%,8.0,4.0,4.0,34.3,104.0,95.0,0.95,0.74,0.0,41.1,0.0,0.0,3.44,1.1,1.39,42.86
75%,13.5,7.0,6.0,36.4,176.5,161.5,1.22,0.85,0.425,53.2,1.2,558.0,3.77,1.3,1.575,60.555
max,28.0,26.0,25.0,48.5,477.0,460.0,7.0,1.13,3.21,82.3,53.4,3129.0,5.41,1.78,2.14,100.0


In [5]:
res_df.to_csv('All Data.csv', index = False)