# Imports

In [30]:
import pandas as pd
import numpy as np
import selenium
import requests
from bs4 import BeautifulSoup
import re

# Match stats table

### Building the function

In [31]:
def scraping(end_point):
    # We are extracting the desired table by looking for it 
    # by its tag and its specific class name()
    soup = BeautifulSoup(requests.get(f"https://lol.fandom.com/wiki/{end_point}/Match_History").content, "html.parser")
    table = soup.find("table",{"class":"wikitable hoverable-multirows mhgame sortable plainlinks column-show-hide-1"})
    table_rows = table.find_all('tr',class_ = ['mhgame-red multirow-highlighter', 'mhgame-blue multirow-highlighter'])
    table_list = []
    
    # Once our table is found, let's iterate through all the "tr" in our soup
    for tr in table_rows:
        col = 1
        row = []
        for td in tr:
        # As there are columns with different data types, each one 
        # receives a different treatment
            if col in [3,4,5]:
                a = td.find('a')
                x = re.search("""[a-z]{4}[-][a-z]{2}[-][a-z]{2}[=]["][a-zA-Z0-9_[-]{1,}["]""", a.__str__())
                if x is not None:
                    string = x[0][11:]
                    row.append(string.replace('"',""))
                
            elif col in [6,7,8,9]:
                span = td.findAll('span')
                champions = ''
                for i in span:
                    if i == span[-1]:
                        x = re.search("""[a-z]{5}[=]["][A-Z a-z']{1,}["]""", i.__str__())
                        if x is not None:
                            string = x[0][6:]
                            champions += string.replace('"',"")
                        
                    else:
                        x = re.search("""[a-z]{5}[=]["][A-Z a-z.']{1,}["]""", i.__str__())
                        if x is not None:
                            string = x[0][6:]
                            champions += string.replace('"',"") + ','
                row.append(champions)

            else:
                row.append(td.text)
            col += 1
        
        table_list.append(row)
    
    initial_columns = [
            'date',
            'patch',
            'blue_team',
            'red_team',
            'winner',
            'bans_blue_team',
            'bans_red_team',
            'picks_blue_team',
            'picks_red_team',
            'blue_team_roster',
            'red_team_roster',
            'sb',
            'vod'
            ]
    df = pd.DataFrame(data = table_list,columns=initial_columns) 
    return df

In [32]:
def df_organize(df, end_point):
    end_points_3_bans = [
    'Season_1_World_Championship',
    'Season_2_World_Championship',
    'Season_3_World_Championship',
    '2014_Season_World_Championship',
    '2015_Season_World_Championship',
    '2016_Season_World_Championship',
]   

    if end_point in end_points_3_bans:
        ## Bans
        df[['ban_1_blue_team','ban_2_blue_team','ban_3_blue_team']] = df['bans_blue_team'].str.split(',',expand=True)
        df[['ban_1_red_team','ban_2_red_team','ban_3_red_team']] = df['bans_red_team'].str.split(',',expand=True)
        df = df.drop(['bans_blue_team','bans_red_team'], axis=1)
        columns = [
        'date',
        'patch',
        'blue_Team',
        'red_Team',
        'Winner',
        'ban_1_blue_team',
        'ban_2_blue_team',
        'ban_3_blue_team',
        'ban_1_red_team',
        'ban_2_red_team',
        'ban_3_red_team',
        'pick_1_blue_team',
        'pick_2_blue_team',
        'pick_3_blue_team',
        'pick_4_blue_team',
        'pick_5_blue_team',
        'pick_1_red_team',
        'pick_2_red_team',
        'pick_3_red_team',
        'pick_4_red_team',
        'pick_5_red_team',
        'player_1_blue_team',
        'player_2_blue_team',
        'player_3_blue_team',
        'player_4_blue_team',
        'player_5_blue_team',
        'player_1_red_team',
        'player_2_red_team',
        'player_3_red_team',
        'player_4_red_team',
        'player_5_red_team'
        ]
    
    else:
        ## Bans
        df[['ban_1_blue_team','ban_2_blue_team','ban_3_blue_team', 'ban_4_blue_team', 'ban_5_blue_team']] = df['bans_blue_team'].str.split(',',expand=True)
        df[['ban_1_red_team','ban_2_red_team','ban_3_red_team', 'ban_4_red_team', 'ban_5_red_team']] = df['bans_red_team'].str.split(',',expand=True)
        df = df.drop(['bans_blue_team','bans_red_team'], axis=1)
        columns = [
        'date',
        'patch',
        'blue_team',
        'red_team',
        'winner',
        'ban_1_blue_team',
        'ban_2_blue_team',
        'ban_3_blue_team',
        'ban_4_blue_team',
        'ban_5_blue_team',
        'ban_1_red_team',
        'ban_2_red_team',
        'ban_3_red_team',
        'ban_4_red_team',
        'ban_5_red_team',
        'pick_1_blue_team',
        'pick_2_blue_team',
        'pick_3_blue_team',
        'pick_4_blue_team',
        'pick_5_blue_team',
        'pick_1_red_team',
        'pick_2_red_team',
        'pick_3_red_team',
        'pick_4_red_team',
        'pick_5_red_team',
        'player_1_blue_team',
        'player_2_blue_team',
        'player_3_blue_team',
        'player_4_blue_team',
        'player_5_blue_team',
        'player_1_red_team',
        'player_2_red_team',
        'player_3_red_team',
        'player_4_red_team',
        'player_5_red_team'
        ]
        
    ## Picks
    df[['pick_1_blue_team','pick_2_blue_team','pick_3_blue_team','pick_4_blue_team','pick_5_blue_team']] = df['picks_blue_team'].str.split(',',expand=True)
    df[['pick_1_red_team','pick_2_red_team','pick_3_red_team','pick_4_red_team','pick_5_red_team']] = df['picks_red_team'].str.split(',',expand=True)
    df = df.drop(['picks_blue_team','picks_red_team'], axis=1)
    
    ## team Roster 
    df[['top_blue_team','jungler_blue_team','mid_blue_team','adc_blue_team','support_blue_team']] = df['blue_team_roster'].str.split(',',expand=True)
    df[['top_red_team','jungler_red_team','mid_red_team','adc_red_team','support_red_team']] = df['red_team_roster'].str.split(',',expand=True)
    df = df.drop(['blue_team_roster','red_team_roster'], axis=1)

    return df

In [33]:
def scrape_table(end_point):
    try:
        original_df = scraping(end_point)
        return df_organize(original_df, end_point)
    except:
        raise 'Error, please verify functions'

### Getting Data of match history table

In [34]:
end_points = [
    ('Season_1_World_Championship', 1,'Main'), 
    ('Season_2_World_Championship', 2, 'Main'),
    ('Season_3_World_Championship', 3,'Main'),
    ('2014_Season_World_Championship', 4,'Main'),
    ('2015_Season_World_Championship', 5,'Main'),
    ('2016_Season_World_Championship', 6,'Main'),
    ('2017_Season_World_Championship/Play-In',7, 'Play in'),
    ('2017_Season_World_Championship/Main_Event',7, 'Main'),
    ('2018_Season_World_Championship/Play-In',8,'Play in'),
    ('2018_Season_World_Championship/Main_Event',8, 'Main'),
    ('2019_Season_World_Championship/Play-In', 9, 'Play in'),
    ('2019_Season_World_Championship/Main_Event', 9, 'Main'),
    ('2020_Season_World_Championship/Play-In', 10, 'Play in'),
    ('2020_Season_World_Championship/Main_Event', 10, 'Main'),
    ('2021_Season_World_Championship/Play-In', 11,'Play in'),
    ('2021_Season_World_Championship/Main_Event',11, 'Main'),
    ('2022_Season_World_Championship/Play-In', 12, 'Play in'),
    ('2022_Season_World_Championship/Main_Event', 12, 'Main')
]

In [35]:
columns = [
        'date',
        'season',
        'event',
        'patch',
        'blue_team',
        'red_team',
        'winner',
        'ban_1_blue_team',
        'ban_2_blue_team',
        'ban_3_blue_team',
        'ban_4_blue_team',
        'ban_5_blue_team',
        'ban_1_red_team',
        'ban_2_red_team',
        'ban_3_red_team',
        'ban_4_red_team',
        'ban_5_red_team',
        'pick_1_blue_team',
        'pick_2_blue_team',
        'pick_3_blue_team',
        'pick_4_blue_team',
        'pick_5_blue_team',
        'pick_1_red_team',
        'pick_2_red_team',
        'pick_3_red_team',
        'pick_4_red_team',
        'pick_5_red_team',
        'top_blue_team',
        'jungler_blue_team',
        'mid_blue_team',
        'adc_blue_team',
        'support_blue_team',
        'top_red_team',
        'jungler_red_team',
        'mid_red_team',
        'adc_red_team',
        'support_red_team'
        ]
appended_data = []
for end_point in end_points:
    data = scrape_table(end_point[0])
    data['season'] = end_point[1]
    data['event'] = end_point[2]
    appended_data.append(data)
df_worlds = pd.concat(appended_data)
df_worlds = df_worlds[columns]
df_worlds.set_index('season', inplace=True)

In [36]:
df_worlds['date'] = pd.to_datetime(df_worlds['date'])

In [37]:
df_worlds.dtypes

date                 datetime64[ns]
event                        object
patch                        object
blue_team                    object
red_team                     object
winner                       object
ban_1_blue_team              object
ban_2_blue_team              object
ban_3_blue_team              object
ban_4_blue_team              object
ban_5_blue_team              object
ban_1_red_team               object
ban_2_red_team               object
ban_3_red_team               object
ban_4_red_team               object
ban_5_red_team               object
pick_1_blue_team             object
pick_2_blue_team             object
pick_3_blue_team             object
pick_4_blue_team             object
pick_5_blue_team             object
pick_1_red_team              object
pick_2_red_team              object
pick_3_red_team              object
pick_4_red_team              object
pick_5_red_team              object
top_blue_team                object
jungler_blue_team           

### Changing Dtypes

### Upload CSV file

In [38]:
df_worlds.to_csv(r'csv_files/matchs_stats.csv')


# Player Stats table

### Build function

In [39]:
def scrape_players(end_point):
    soup = BeautifulSoup(requests.get(f"https://lol.fandom.com/wiki/{end_point}/Player_Statistics").content, "html.parser")
    # Search for table with stats
    table = soup.find("table",{"class":"wikitable sortable spstats plainlinks hoverable-rows"})
    table_rows = table.find_all('tr')
    table_rows = table_rows[5:]
    table_list = []
    for tr in table_rows:
        col = 1
        row = []
        for td in tr:
            if col == 1:
                a = td.find('a')
                x = re.search("""[a-z]{4}[-][a-z]{2}[-][a-z]{2}[=]["][a-zA-Z0-9_[-]{1,}["]""", a.__str__())
                if x is not None:
                    string = x[0][11:]
                    row.append(string.replace('"',""))
            else:
                row.append(td.text)
            col += 1
        table_list.append(row)
    
    initial_columns = [
    'team',
    'player',
    'games_played',
    'wins',
    'loses',
    'win_rate',
    'kills',
    'deaths',
    'assists',
    'kill_death_assist_ratio',
    'creep_score',
    'cs/min',
    'gold',
    'gold/min',
    'damage',
    'damage/min',
    'kill_participation',
    'kill_share',
    'gold_share',
    'champions_played',
    'champions'
    ]
    df = pd.DataFrame(data = table_list,columns=initial_columns)

    
    return df
            

### Getting Data of player stats table

In [40]:
end_points = [
    ('Season_1_World_Championship', 1,'Main'), 
    ('Season_2_World_Championship', 2, 'Main'),
    ('Season_3_World_Championship', 3,'Main'),
    ('2014_Season_World_Championship', 4,'Main'),
    ('2015_Season_World_Championship', 5,'Main'),
    ('2016_Season_World_Championship', 6,'Main'),
    ('2017_Season_World_Championship/Play-In',7, 'Play in'),
    ('2017_Season_World_Championship/Main_Event',7, 'Main'),
    ('2018_Season_World_Championship/Play-In',8,'Play in'),
    ('2018_Season_World_Championship/Main_Event',8, 'Main'),
    ('2019_Season_World_Championship/Play-In', 9, 'Play in'),
    ('2019_Season_World_Championship/Main_Event', 9, 'Main'),
    ('2020_Season_World_Championship/Play-In', 10, 'Play in'),
    ('2020_Season_World_Championship/Main_Event', 10, 'Main'),
    ('2021_Season_World_Championship/Play-In', 11,'Play in'),
    ('2021_Season_World_Championship/Main_Event',11, 'Main'),
    ('2022_Season_World_Championship/Play-In', 12, 'Play in'),
    ('2022_Season_World_Championship/Main_Event', 12, 'Main')
]

In [41]:
columns = [
    'season',
    'event',
    'team',
    'player',
    'games_played',
    'wins',
    'loses',
    'win_rate',
    'kills',
    'deaths',
    'assists',
    'kill_death_assist_ratio',
    'creep_score',
    'cs/min',
    'gold',
    'gold/min',
    'damage',
    'damage/min',
    'kill_participation',
    'kill_share',
    'gold_share',
    'champions_played'
    ]
appended_data = []
for end_point in end_points:
    data = scrape_players(end_point[0])
    data['season'] = end_point[1]
    data['event'] = end_point[2]
    appended_data.append(data)
df_players = pd.concat(appended_data)
df_players = df_players[columns]
df_players.set_index('season', inplace=True)

### Changing Dtypes

In [42]:
df_players.replace('-', '', inplace=True)

In [43]:
for col in ['win_rate', 'kill_participation', 'kill_share', 'gold_share']:
    df_players[col] = df_players[col].str[:-1]

In [44]:
df_players['damage'] = df_players['damage'].str[:-1]
df_players['damage'] = pd.to_numeric(df_players['damage'], errors='coerce')
df_players['damage'] = df_players['damage'] * 1000

In [45]:
col = ['games_played',
 'wins',
 'loses',
 'win_rate',
 'kills',
 'deaths',
 'assists',
 'kill_death_assist_ratio',
 'creep_score',
 'cs/min',
 'gold',
 'gold/min',
 'damage/min',
 'kill_participation',
 'kill_share',
 'gold_share',
 'champions_played']
for i in col:
    df_players[i] = pd.to_numeric(df_players[i], errors='coerce')

In [46]:
df_players.dtypes

event                       object
team                        object
player                      object
games_played                 int64
wins                         int64
loses                        int64
win_rate                   float64
kills                      float64
deaths                     float64
assists                    float64
kill_death_assist_ratio    float64
creep_score                float64
cs/min                     float64
gold                       float64
gold/min                     int64
damage                     float64
damage/min                 float64
kill_participation         float64
kill_share                 float64
gold_share                 float64
champions_played             int64
dtype: object

### Upload CSV file

In [47]:
df_players.drop('champions_played',axis=1, inplace=True)

In [48]:
df_players.to_csv(r'csv_files/players_stats.csv')

  values = values.astype(str)


In [49]:
df_players.to_excel(r'csv_files/players_stats.xlsx')

# Champion stats table

In [50]:
def scrape_champions(end_point):
    soup = BeautifulSoup(requests.get(f"https://lol.fandom.com/wiki/{end_point}/Champion_Statistics").content, "html.parser")
    # Search for table with stats
    table = soup.find("table",{"class":"wikitable sortable spstats plainlinks hoverable-rows"})
    table_rows = table.find_all('tr')
    table_rows = table_rows[5:]
    table_list = []
    for tr in table_rows:
        col = 1
        row = []
        for td in tr:
            row.append(td.text)
        table_list.append(row)
    
    initial_columns = [
    'champion',
    'games_contests',
    'pick_ban_ratio',
    'banned_games',
    'played_games',
    'played_by_number_of_players',
    'win',
    'lose',
    'win_rate',
    'kills',
    'deaths',
    'assists',
    'kill_death_assist_ratio',
    'creep_score',
    'cs/min',
    'gold',
    'gold/min',
    'damage',
    'damage/min',
    'kill_participation',
    'kill_share',
    'gold_share',
    'played_as'
    ]
    df = pd.DataFrame(data = table_list, columns=initial_columns)

    
    return df

In [51]:
end_points = [
    ('Season_1_World_Championship', 1,'Main'), 
    ('Season_2_World_Championship', 2, 'Main'),
    ('Season_3_World_Championship', 3,'Main'),
    ('2014_Season_World_Championship', 4,'Main'),
    ('2015_Season_World_Championship', 5,'Main'),
    ('2016_Season_World_Championship', 6,'Main'),
    ('2017_Season_World_Championship/Play-In',7, 'Play in'),
    ('2017_Season_World_Championship/Main_Event',7, 'Main'),
    ('2018_Season_World_Championship/Play-In',8,'Play in'),
    ('2018_Season_World_Championship/Main_Event',8, 'Main'),
    ('2019_Season_World_Championship/Play-In', 9, 'Play in'),
    ('2019_Season_World_Championship/Main_Event', 9, 'Main'),
    ('2020_Season_World_Championship/Play-In', 10, 'Play in'),
    ('2020_Season_World_Championship/Main_Event', 10, 'Main'),
    ('2021_Season_World_Championship/Play-In', 11,'Play in'),
    ('2021_Season_World_Championship/Main_Event',11, 'Main'),
    ('2022_Season_World_Championship/Play-In', 12, 'Play in'),
    ('2022_Season_World_Championship/Main_Event', 12, 'Main')
]

In [52]:
columns = [
    'season',
    'event',
    'champion',
    'games_contests',
    'pick_ban_ratio',
    'banned_games',
    'played_games',
    'played_by_number_of_players',
    'win',
    'lose',
    'win_rate',
    'kills',
    'deaths',
    'assists',
    'kill_death_assist_ratio',
    'creep_score',
    'cs/min',
    'gold',
    'gold/min',
    'damage',
    'damage/min',
    'kill_participation',
    'kill_share',
    'gold_share',
    'played_as'
    ]
appended_data = []
for end_point in end_points:
    data = scrape_champions(end_point[0])
    data['season'] = end_point[1]
    data['event'] = end_point[2]
    appended_data.append(data)
df_champions = pd.concat(appended_data)
df_champions = df_champions[columns]
df_champions.set_index('season',inplace=True)

### Changing Dtypes

In [53]:
for col in ['pick_ban_ratio','win_rate','kill_participation','kill_share','gold_share']:
    df_champions[col] = df_champions[col].str[:-1]


In [54]:
df_champions.replace('-', '', inplace=True)

In [55]:
df_champions['damage'] = df_champions['damage'].str[:-1]
df_champions['damage'] = pd.to_numeric(df_champions['damage'], errors='coerce')
df_champions['damage'] = df_champions['damage'] * 1000

In [56]:
col = [
    'games_contests', 
    'pick_ban_ratio', 
    'banned_games',
    'played_games', 
    'played_by_number_of_players', 
    'win', 
    'lose',
    'win_rate', 
    'kills', 
    'deaths', 
    'assists', 
    'kill_death_assist_ratio',
    'creep_score', 
    'cs/min', 
    'gold', 
    'gold/min', 
    'damage/min',
    'kill_participation', 
    'kill_share', 
    'gold_share', 
    ]
for i in col:
    df_champions[i] = pd.to_numeric(df_champions[i], errors='coerce')

In [57]:
df_champions.drop('played_as', axis = 1, inplace=True)

In [58]:
df_champions.to_csv(r'csv_files/champions_stats.csv')

  values = values.astype(str)
