In [1]:
import os
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd

#first LoL db scraping platinum+ ranking statistics
url = 'https://champion.gg/statistics/overview?queue=ranked-solo-duo&rank=platinum_plus&region=world'
response = requests.get(url)
soup = bs(response.text, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <!--CHAMPIONGG-SPECIFIC. THIS MUST NOT MERGE INTO MASTER-->
  <link href="https://blitz-cdn.blitz.gg/championgg/chgg_favicon.png" rel="shortcut icon"/>
  <!--
      <link
      rel="shortcut icon"
      href="https://blitz-cdn-plain.blitz.gg/favicon-new.ico.png"
    />


    <!-- https://i.gyazo.com/74ea5a0af7ed9abd01dec68d0b37cd31.png -->
  <!-- https://app.clubhouse.io/blitz/story/10554/add-dns-prefetch-small-task-should-be-quick -->
  <link href="https://config.playwire.com/" rel="dns-prefetch"/>
  <link href="https://google-analytics.com/" rel="dns-prefetch"/>
  <link href="https://mb.moatads.com/" rel="dns-prefetch"/>
  <link href="https://securepubads.g.doubleclick.net/" rel="dns-prefetch"/>
  <link href="https://adservice.google.com/" rel="dns-prefetch"/>
  <link href="https://cdn.intergi.com/"

In [2]:
#find column names
results_columns = soup.find('div', class_="StickyTableHeader-sc-111omlw-0 ldshpo")
print(f'Number of columns initially extracted: {len(results_columns)}')
print('-'*20)

list_columns = []
for result in results_columns:
    list_columns.append(result.text)
print(list_columns)

Number of columns initially extracted: 9
--------------------
['Rank', 'Role', 'Champion', 'helpTierarrow-dropdown', 'Win Rate', 'Ban Rate', 'Pick Rate', 'Counters', 'Matches']


In [3]:
#create dataframe-column template & drop unwanted columns
df_1 = pd.DataFrame(columns = (list_columns))
df_1 = df_1.rename(columns={"helpTierarrow-dropdown": "Tier"})
df_1 = df_1.rename(columns={"Win Rate": "Win_Rate", "Ban Rate": "Ban_Rate", "Pick Rate": "Pick_Rate"})
df_1 = df_1.drop(columns=['Counters'])
df_1 = df_1.drop(columns=['Tier'])
df_1 = df_1.drop(columns=['Matches'])
df_1 = df_1.drop(columns=['Ban_Rate'])
df_1 = df_1.drop(columns=['Pick_Rate'])

df_1

Unnamed: 0,Rank,Role,Champion,Win_Rate


In [4]:
#find all rows to iterate through
results_rows = soup.find_all('div', class_="champion-row")
print(f'Number of Rows: {len(results_rows)}')

#append rows to dataframe
for result in results_rows:
    try:
        role = result.find('div', class_="champion-role").text
        role = role.replace("role-", "")
        role = role.capitalize()
        champion = result.find('span', class_="champion-name").text
        win_rate = result.find('div', class_="champion-win-rate").text
        rank = "Platinum plus"
        df_1 = df_1.append({'Rank': rank, 'Champion': champion, 'Role': role, 'Win_Rate': win_rate}, ignore_index=True)
    except AtributeError as e:
        print(e)

#rearrange columns
df_1 = df_1[['Rank','Champion', 'Role', 'Win_Rate']]

#sort df by descending champion win rates & reset index
df_1 = df_1.sort_values(by='Win_Rate', ascending=False)
df_1 = df_1.reset_index(drop=True)

df_1

Number of Rows: 192


Unnamed: 0,Rank,Champion,Role,Win_Rate
0,Platinum plus,Gragas,Mid,56.3%
1,Platinum plus,Swain,Bot,55.4%
2,Platinum plus,Swain,Mid,55.2%
3,Platinum plus,Maokai,Support,54.8%
4,Platinum plus,Aurelion Sol,Mid,54.8%
...,...,...,...,...
187,Platinum plus,Senna,Support,45.2%
188,Platinum plus,Dr. Mundo,Top,44.9%
189,Platinum plus,Irelia,Mid,44.8%
190,Platinum plus,Sett,Support,44.5%


In [5]:
#create platinum plus rankings csv
df_1.to_csv('platinum_plus.csv')

In [6]:
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

#second LoL db scraping iron+ ranking statistics
url = 'https://www.leagueofgraphs.com/champions/builds/iron/sr-ranked'
browser.visit(url)

[WDM] - Current google-chrome version is 87.0.4280
[WDM] - Get LATEST driver version for 87.0.4280
[WDM] - Driver [C:\Users\Robert Anthony\.wdm\drivers\chromedriver\win32\87.0.4280.88\chromedriver.exe] found in cache


 


In [7]:
html = browser.html
soup = bs(html, 'html.parser')

In [8]:
#create seperate dataframe-column templates
rank_df = pd.DataFrame(columns = {"Rank"})

champion_df = pd.DataFrame(columns = {"Champion"})

role_df = pd.DataFrame(columns = {"Role"})

pick_rate_df = pd.DataFrame(columns = {"Pick_Rate"})

win_rate_df = pd.DataFrame(columns = {"Win_Rate"})

ban_rate_df = pd.DataFrame(columns = {"Ban_Rate"})

champion_df

Unnamed: 0,Champion


In [9]:
#append champion/role dataframes & join together
champions = soup.find_all('span', class_='name')        
for champion in champions:
    champion = champion.text.strip()
    champion_df = champion_df.append({'Champion': champion}, ignore_index=True)
    
roles = soup.find_all('i')
role_df = pd.DataFrame(columns = {'Role'})
for role in roles:
    if not str(role.text) =="":
        role = role.text.strip()
        role_df = role_df.append({'Role': role}, ignore_index=True)
        
first_join_df = champion_df.join(role_df)

In [10]:
#append pick rate dataframe & join to previous dataframe
pick_rates = soup.find_all('div', class_="progressBarTxt")

for index, pick_rate in enumerate(pick_rates):
    if index % 3 == 0:
        pick_rate = pick_rate.text.strip()
        pick_rate_df = pick_rate_df.append({'Pick_Rate': pick_rate}, ignore_index=True)

second_join_df = first_join_df.join(pick_rate_df)

In [11]:
#append win rate dataframe & join to previous dataframe
win_rates = soup.find_all('div', class_="progressBarTxt")

for index, win_rate in enumerate(win_rates):
    if (index -1) % 3 == 0:
        win_rate = win_rate.text.strip()
        win_rate_df = win_rate_df.append({'Win_Rate': win_rate}, ignore_index=True)

third_join_df = second_join_df.join(win_rate_df)

In [12]:
#append ban rate dataframe & join to previous dataframe
ban_rates = soup.find_all('div', class_="progressBarTxt")

for index, ban_rate in enumerate(ban_rates):
    if (index -2) % 3 == 0:
        ban_rate = ban_rate.text.strip()
        ban_rate_df = ban_rate_df.append({'Ban_Rate': ban_rate}, ignore_index=True)

fourth_join_df = third_join_df.join(ban_rate_df)

In [13]:
#append ranks to rank dataframe & join to previous dataframe
ranks = soup.find_all('div', class_="progressBarTxt")

for index, rank in enumerate(ranks):
    if index % 3 == 0:
        rank = "Iron plus"
        rank_df = rank_df.append({'Rank': rank}, ignore_index=True)

fifth_join_df = fourth_join_df.join(rank_df)

df_2 = fifth_join_df
df_2 = df_2[['Rank', 'Champion', 'Role', 'Win_Rate', 'Pick_Rate', 'Ban_Rate']]

#decided to drop Ban & Pick Rates
df_2 = df_2.drop(columns=['Ban_Rate'])
df_2 = df_2.drop(columns=['Pick_Rate'])

#sort df by descending champion win rates & reset index
df_2 = df_2.sort_values(by='Win_Rate', ascending=False)
df_2 = df_2.reset_index(drop=True)
df_2

Unnamed: 0,Rank,Champion,Role,Win_Rate
0,Iron plus,Rammus,Jungler,53.6%
1,Iron plus,Malzahar,Mid,53.3%
2,Iron plus,Skarner,Jungler,53.2%
3,Iron plus,Swain,Support,52.9%
4,Iron plus,Maokai,Support,52.9%
...,...,...,...,...
148,Iron plus,Karma,Support,46.5%
149,Iron plus,Nidalee,Jungler,46.0%
150,Iron plus,Gangplank,Top,45.8%
151,Iron plus,Azir,Mid,44.3%


In [14]:
browser.quit()

In [15]:
#create iron plus rankings csv
df_2.to_csv('iron_plus.csv')

In [16]:
#first LoL db again scraping master ranking statistics
url = 'https://champion.gg/statistics/overview?queue=ranked-solo-duo&rank=master&region=world'
response = requests.get(url)
soup = bs(response.text, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <!--CHAMPIONGG-SPECIFIC. THIS MUST NOT MERGE INTO MASTER-->
  <link href="https://blitz-cdn.blitz.gg/championgg/chgg_favicon.png" rel="shortcut icon"/>
  <!--
      <link
      rel="shortcut icon"
      href="https://blitz-cdn-plain.blitz.gg/favicon-new.ico.png"
    />


    <!-- https://i.gyazo.com/74ea5a0af7ed9abd01dec68d0b37cd31.png -->
  <!-- https://app.clubhouse.io/blitz/story/10554/add-dns-prefetch-small-task-should-be-quick -->
  <link href="https://config.playwire.com/" rel="dns-prefetch"/>
  <link href="https://google-analytics.com/" rel="dns-prefetch"/>
  <link href="https://mb.moatads.com/" rel="dns-prefetch"/>
  <link href="https://securepubads.g.doubleclick.net/" rel="dns-prefetch"/>
  <link href="https://adservice.google.com/" rel="dns-prefetch"/>
  <link href="https://cdn.intergi.com/"

In [17]:
#find column names
results_columns_2 = soup.find('div', class_="StickyTableHeader-sc-111omlw-0 ldshpo")
print(f'Number of columns initially extracted: {len(results_columns_2)}')
print('-'*20)

list_columns_2 = []
for result in results_columns_2:
    list_columns_2.append(result.text)
print(list_columns_2)

Number of columns initially extracted: 9
--------------------
['Rank', 'Role', 'Champion', 'helpTierarrow-dropdown', 'Win Rate', 'Ban Rate', 'Pick Rate', 'Counters', 'Matches']


In [18]:
#create dataframe-column template & drop unwanted columns
df_3 = pd.DataFrame(columns = (list_columns_2))
df_3 = df_3.rename(columns={"helpTierarrow-dropdown": "Tier"})
df_3 = df_3.rename(columns={"Win Rate": "Win_Rate", "Ban Rate": "Ban_Rate", "Pick Rate": "Pick_Rate"})
df_3 = df_3.drop(columns=['Counters'])
df_3 = df_3.drop(columns=['Tier'])
df_3 = df_3.drop(columns=['Matches'])
df_3 = df_3.drop(columns=['Ban_Rate'])
df_3 = df_3.drop(columns=['Pick_Rate'])
df_3

Unnamed: 0,Rank,Role,Champion,Win_Rate


In [19]:
#find all rows to iterate through
results_rows_2 = soup.find_all('div', class_="champion-row")
print(f'Number of Rows: {len(results_rows_2)}')

#append rows to dataframe
for result in results_rows_2:
    try:
        role = result.find('div', class_="champion-role").text
        role = role.replace("role-", "")
        role = role.capitalize()
        champion = result.find('span', class_="champion-name").text
        win_rate = result.find('div', class_="champion-win-rate").text
        rank = "Master"
        df_3 = df_3.append({'Rank': rank, 'Champion': champion, 'Role': role, 'Win_Rate': win_rate}, ignore_index=True)
    except AtributeError as e:
        print(e)

#rearrange columns
df_3 = df_3[['Rank','Champion', 'Role', 'Win_Rate']]        

#sort df by descending champion win rates & reset index        
df_3 = df_3.sort_values(by='Win_Rate', ascending=False)
df_3 = df_3.reset_index(drop=True)
df_3

Number of Rows: 196


Unnamed: 0,Rank,Champion,Role,Win_Rate
0,Master,Neeko,Top,74.3%
1,Master,Rumble,Top,66.7%
2,Master,Nocturne,Jungle,66.7%
3,Master,Kog'Maw,Bot,61.4%
4,Master,Rumble,Mid,60.1%
...,...,...,...,...
191,Master,Heimerdinger,Mid,35.7%
192,Master,Ziggs,Mid,35.3%
193,Master,Amumu,Jungle,33.3%
194,Master,Varus,Bot,31.3%


In [20]:
#create master rankings csv
df_3.to_csv('master.csv')

In [21]:
#join all df's
df_4 = pd.concat([df_1, df_2, df_3])
df_4 = df_4.sort_values(by='Win_Rate', ascending=False)
df_4 = df_4.reset_index(drop=True)
df_4

Unnamed: 0,Rank,Champion,Role,Win_Rate
0,Master,Neeko,Top,74.3%
1,Master,Rumble,Top,66.7%
2,Master,Nocturne,Jungle,66.7%
3,Master,Kog'Maw,Bot,61.4%
4,Master,Rumble,Mid,60.1%
...,...,...,...,...
536,Master,Heimerdinger,Mid,35.7%
537,Master,Ziggs,Mid,35.3%
538,Master,Amumu,Jungle,33.3%
539,Master,Varus,Bot,31.3%


In [22]:
df_4.to_csv('league_winrates.csv')