In [29]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

For the data extraction process we will be utilizing the request and bs4 library in order to grab our desired dataset.

In [None]:
# Send a GET request to the website
response = requests.get('https://www.metasrc.com/5v5/12.18/stats?ranks=challenger')

# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table in the HTML
table = soup.find('table')

# Iterate through the rows of the table
for row in table.find_all('tr'):
  # Iterate through the cells of the row
  for cell in row.find_all('td'):
    # Extract the text from the cell
    print(cell.text)

In [None]:
df3 = pd.DataFrame()

# Iterate through the rows of the table
for row in table.find_all('tr'):
  # Extract the text from the cells
  data = [cell.text for cell in row.find_all('td')]

  # Append the data to the DataFrame
  df3 = df3.append(pd.Series(data), ignore_index=True)

# Print the resulting DataFrame
print(df3)

With beautifulsoup, unfortunately, some of the column headers were converted into 'NaN', we will be re-naming the columns to match the dataset seen online.

In [52]:
column_labels = ['Name', 'Role', 'Tier', 'Score', 'Trend', 'Win%', 'Role%', 'Pick%', 'Ban%', 'KDA']

In [53]:
df3 = df3.rename(columns=dict(zip(df3.columns, column_labels)))

In [None]:
df3.head()

Once the columns have been relabeled, the NaN values were converted into the top row of the dataframe, we will now be dropping the first row as it was the previous NaN titles

In [54]:
df3 = df3.drop(index=0)

In [None]:
df3.head()

We see that for the Names of the champions, they are repeated here twice, when we want it to be once. i.e. 'AatroxAatrox' --> 'Aatrox'. This was likely as the website utilizes an image of the champion which was labeled as the champions name. The line of code before will iterate through the name column and make it so the repeated values are made singular.

In [55]:
df3['Name'] = df3['Name'].str.replace(r'(\b\w+)\1', r'\1')


  df3['Name'] = df3['Name'].str.replace(r'(\b\w+)\1', r'\1')


In [56]:
df3.head()

Unnamed: 0,Name,Role,Tier,Score,Trend,Win%,Role%,Pick%,Ban%,KDA
1,Aatrox,TOP,God / S+,87.01,2.37,52.00%,93.22%,21.88%,32.96%,2.12
2,Ahri,MID,Strong / S,57.3,-4.49,51.54%,98.48%,7.33%,2.27%,2.54
3,Akali,MID,Strong / S,57.7,0.69,48.07%,73.73%,8.10%,10.24%,2.56
4,Akali,TOP,Fair / B,47.84,10.61,48.78%,25.95%,3.05%,10.24%,2.28
5,Akshan,MID,Strong / S,59.47,12.85,64.71%,55.28%,3.01%,5.33%,2.74


df3.to_csv('challenger.csv', index=False) #turn this into a line of code in order to download the CSV