In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
team_list_2018 = ['Chennai_Super_Kings', 'Delhi_Daredevils', 'Kings_XI_Punjab', 'Kolkata_Knight_Riders', 'Mumbai_Indians', 'Rajasthan_Royals', 'Royal_Challengers_Bangalore', 'Sunrisers_Hyderabad']

In [34]:
team_df_list_2018 = []

for team in team_list_2018:
    # URL of the Wikipedia page
    url = f'https://en.wikipedia.org/wiki/{team}_in_2018#Squad'

    # Send a GET request to retrieve the web page content
    response = requests.get(url)

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the player details
    table = soup.find('table', class_='wikitable')

    # Initialize an empty dictionary to store player details
    player_details = {}

    # Initialize variables to store current section headers
    current_role = ''
    current_header = None

    # Iterate over the table rows
    for row in table.find_all('tr'):
        # Check if the row is a section header
        header = row.find('th')
        if header and header.has_attr('colspan'):
            # Extract the current role from the header
            current_role = header.text.strip()
            current_header = header
        else:
            # Extract the player details from the cells
            cells = row.find_all('td')
            if len(cells) >= 2:
                name_cell = cells[1].find('a')
                nationality_cell = cells[2].find('a')

                if name_cell and nationality_cell:
                    player_name = name_cell.text.strip()
                    nationality = nationality_cell.text.strip()

                    # Add the player details to the dictionary
                    player_details[player_name] = {'Nationality': nationality, 'Role': current_role, 'Team': team.replace('_', ' ')}

    # Convert the dictionary to a DataFrame
    df = pd.DataFrame.from_dict(player_details, orient='index')

    # Reset the index to convert player_name from index to a regular column
    df = df.reset_index()

    team_df_list_2018.append(df)

In [36]:
# Concatenate all DataFrames together
combined_df_2018 = pd.concat(team_df_list_2018)

# Reset the index of the combined DataFrame
combined_df_2018 = combined_df_2018.reset_index(drop=True)

# Print the combined DataFrame
combined_df_2018 = combined_df_2018.sort_values(['Nationality', 'Role'])
combined_df_2018.rename(columns={'index':'player_name'}, inplace=True)

combined_df_2018[combined_df_2018['Nationality']=='England']

Unnamed: 0,player_name,Nationality,Role,Team
11,David Willey,England,All-rounders,Chennai Super Kings
124,Ben Stokes,England,All-rounders,Rajasthan Royals
146,Moeen Ali,England,All-rounders,Royal Challengers Bangalore
147,Chris Woakes,England,All-rounders,Royal Challengers Bangalore
164,Alex Hales,England,Batsmen,Sunrisers Hyderabad
16,Mark Wood,England,Bowlers,Chennai Super Kings
46,Liam Plunkett,England,Bowlers,Delhi Daredevils
130,Jofra Archer,England,Bowlers,Rajasthan Royals
186,Chris Jordan,England,Bowlers,Sunrisers Hyderabad
14,Sam Billings,England,Wicket-keepers,Chennai Super Kings


In [27]:
team_list_2019 = ['Chennai_Super_Kings', 'Delhi_Capitals', 'Kings_XI_Punjab', 'Kolkata_Knight_Riders', 'Mumbai_Indians', 'Rajasthan_Royals', 'Royal_Challengers_Bangalore', 'Sunrisers_Hyderabad']

In [32]:
team_df_list_2019 = []

for team in team_list_2019:
    # URL of the Wikipedia page
    url = f'https://en.wikipedia.org/wiki/{team}_in_2019#Squad'

    # Send a GET request to retrieve the web page content
    response = requests.get(url)

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the player details
    table = soup.find('table', class_='wikitable')

    # Initialize an empty dictionary to store player details
    player_details = {}

    # Initialize variables to store current section headers
    current_role = ''
    current_header = None

    # Iterate over the table rows
    for row in table.find_all('tr'):
        # Check if the row is a section header
        header = row.find('th')
        if header and header.has_attr('colspan'):
            # Extract the current role from the header
            current_role = header.text.strip()
            current_header = header
        else:
            # Extract the player details from the cells
            cells = row.find_all('td')
            if len(cells) >= 2:
                name_cell = cells[1].find('a')
                nationality_cell = cells[2].find('a')

                if name_cell and nationality_cell:
                    player_name = name_cell.text.strip()
                    nationality = nationality_cell.text.strip()

                    # Add the player details to the dictionary
                    player_details[player_name] = {'Nationality': nationality, 'Role': current_role, 'Team': team.replace('_', ' ')}

    # Convert the dictionary to a DataFrame
    df = pd.DataFrame.from_dict(player_details, orient='index')

    # Reset the index to convert player_name from index to a regular column
    df = df.reset_index()

    team_df_list_2019.append(df)

In [37]:
# Concatenate all DataFrames together
combined_df_2019 = pd.concat(team_df_list_2019)

# Reset the index of the combined DataFrame
combined_df_2019 = combined_df_2019.reset_index(drop=True)

# Print the combined DataFrame
combined_df_2019 = combined_df_2019.sort_values(['Nationality', 'Role'])
combined_df_2019.rename(columns={'index':'player_name'}, inplace=True)

combined_df_2019[combined_df_2019['Nationality']=='Australia']

Unnamed: 0,player_name,Nationality,Role,Team
9,Shane Watson,Australia,All-rounders,Chennai Super Kings
33,Moises Henriques,Australia,All-rounders,Kings XI Punjab
82,Ben Cutting,Australia,All-rounders,Mumbai Indians
135,Marcus Stoinis,Australia,All-rounders,Royal Challengers Bangalore
53,Chris Lynn,Australia,Batsmen,Kolkata Knight Riders
106,Steve Smith,Australia,Batsmen,Rajasthan Royals
108,Ashton Turner,Australia,Batsmen,Rajasthan Royals
153,David Warner,Australia,Batsmen,Sunrisers Hyderabad
44,Andrew Tye,Australia,Bowlers,Kings XI Punjab
72,Matt Kelly,Australia,Bowlers,Kolkata Knight Riders
