In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
team_list_2018 = ['Chennai_Super_Kings', 'Delhi_Daredevils', 'Kings_XI_Punjab', 'Kolkata_Knight_Riders', 'Mumbai_Indians', 'Rajasthan_Royals', 'Royal_Challengers_Bangalore', 'Sunrisers_Hyderabad']

In [4]:
team_df_list_2018 = []

for team in team_list_2018:
    # URL of the Wikipedia page
    url = f'https://en.wikipedia.org/wiki/{team}_in_2018#Squad'

    # Send a GET request to retrieve the web page content
    response = requests.get(url)

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the player details
    table = soup.find('table', class_='wikitable')

    # Initialize an empty dictionary to store player details
    player_details = {}

    # Initialize variables to store current section headers
    current_role = ''
    current_header = None

    # Iterate over the table rows
    for row in table.find_all('tr'):
        # Check if the row is a section header
        header = row.find('th')
        if header and header.has_attr('colspan'):
            # Extract the current role from the header
            current_role = header.text.strip()
            current_header = header
        else:
            # Extract the player details from the cells
            cells = row.find_all('td')
            if len(cells) >= 2:
                name_cell = cells[1].find('a')
                nationality_cell = cells[2].find('a')

                if name_cell and nationality_cell:
                    player_name = name_cell.text.strip()
                    nationality = nationality_cell.text.strip()

                    # Add the player details to the dictionary
                    player_details[player_name] = {'Nationality': nationality, 'Role': current_role, 'Team': team.replace('_', ' ')}

    # Convert the dictionary to a DataFrame
    df = pd.DataFrame.from_dict(player_details, orient='index')

    # Reset the index to convert player_name from index to a regular column
    df = df.reset_index()

    team_df_list_2018.append(df)

In [5]:
# Concatenate all DataFrames together
combined_df_2018 = pd.concat(team_df_list_2018)

# Reset the index of the combined DataFrame
combined_df_2018 = combined_df_2018.reset_index(drop=True)

# Print the combined DataFrame
combined_df_2018 = combined_df_2018.sort_values(['Nationality', 'Role'])
combined_df_2018.rename(columns={'index':'player_name'}, inplace=True)

combined_df_2018[combined_df_2018['Nationality']=='England']

Unnamed: 0,player_name,Nationality,Role,Team
11,David Willey,England,All-rounders,Chennai Super Kings
124,Ben Stokes,England,All-rounders,Rajasthan Royals
146,Moeen Ali,England,All-rounders,Royal Challengers Bangalore
147,Chris Woakes,England,All-rounders,Royal Challengers Bangalore
164,Alex Hales,England,Batsmen,Sunrisers Hyderabad
16,Mark Wood,England,Bowlers,Chennai Super Kings
46,Liam Plunkett,England,Bowlers,Delhi Daredevils
130,Jofra Archer,England,Bowlers,Rajasthan Royals
186,Chris Jordan,England,Bowlers,Sunrisers Hyderabad
14,Sam Billings,England,Wicket-keepers,Chennai Super Kings


In [6]:
team_list_2019 = ['Chennai_Super_Kings', 'Delhi_Capitals', 'Kings_XI_Punjab', 'Kolkata_Knight_Riders', 'Mumbai_Indians', 'Rajasthan_Royals', 'Royal_Challengers_Bangalore', 'Sunrisers_Hyderabad']

In [7]:
team_df_list_2019 = []

for team in team_list_2019:
    # URL of the Wikipedia page
    url = f'https://en.wikipedia.org/wiki/{team}_in_2019#Squad'

    # Send a GET request to retrieve the web page content
    response = requests.get(url)

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the player details
    table = soup.find('table', class_='wikitable')

    # Initialize an empty dictionary to store player details
    player_details = {}

    # Initialize variables to store current section headers
    current_role = ''
    current_header = None

    # Iterate over the table rows
    for row in table.find_all('tr'):
        # Check if the row is a section header
        header = row.find('th')
        if header and header.has_attr('colspan'):
            # Extract the current role from the header
            current_role = header.text.strip()
            current_header = header
        else:
            # Extract the player details from the cells
            cells = row.find_all('td')
            if len(cells) >= 2:
                name_cell = cells[1].find('a')
                nationality_cell = cells[2].find('a')

                if name_cell and nationality_cell:
                    player_name = name_cell.text.strip()
                    nationality = nationality_cell.text.strip()

                    # Add the player details to the dictionary
                    player_details[player_name] = {'Nationality': nationality, 'Role': current_role, 'Team': team.replace('_', ' ')}

    # Convert the dictionary to a DataFrame
    df = pd.DataFrame.from_dict(player_details, orient='index')

    # Reset the index to convert player_name from index to a regular column
    df = df.reset_index()

    team_df_list_2019.append(df)

In [8]:
# Concatenate all DataFrames together
combined_df_2019 = pd.concat(team_df_list_2019)

# Reset the index of the combined DataFrame
combined_df_2019 = combined_df_2019.reset_index(drop=True)

# Print the combined DataFrame
combined_df_2019 = combined_df_2019.sort_values(['Nationality', 'Role'])
combined_df_2019.rename(columns={'index':'player_name'}, inplace=True)

combined_df_2019[combined_df_2019['Nationality']=='Australia']

Unnamed: 0,player_name,Nationality,Role,Team
9,Shane Watson,Australia,All-rounders,Chennai Super Kings
33,Moises Henriques,Australia,All-rounders,Kings XI Punjab
82,Ben Cutting,Australia,All-rounders,Mumbai Indians
135,Marcus Stoinis,Australia,All-rounders,Royal Challengers Bangalore
53,Chris Lynn,Australia,Batsmen,Kolkata Knight Riders
106,Steve Smith,Australia,Batsmen,Rajasthan Royals
108,Ashton Turner,Australia,Batsmen,Rajasthan Royals
153,David Warner,Australia,Batsmen,Sunrisers Hyderabad
44,Andrew Tye,Australia,Bowlers,Kings XI Punjab
72,Matt Kelly,Australia,Bowlers,Kolkata Knight Riders


In [9]:
team_df_list_2020 = []

for team in team_list_2019:
    # URL of the Wikipedia page
    url = f'https://en.wikipedia.org/wiki/{team}_in_2020#Squad'

    # Send a GET request to retrieve the web page content
    response = requests.get(url)

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the player details
    table = soup.find('table', class_='wikitable')

    # Initialize an empty dictionary to store player details
    player_details = {}

    # Initialize variables to store current section headers
    current_role = ''
    current_header = None

    # Iterate over the table rows
    for row in table.find_all('tr'):
        # Check if the row is a section header
        header = row.find('th')
        if header and header.has_attr('colspan'):
            # Extract the current role from the header
            current_role = header.text.strip()
            current_header = header
        else:
            # Extract the player details from the cells
            cells = row.find_all('td')
            if len(cells) >= 2:
                name_cell = cells[1].find('a')
                nationality_cell = cells[2].find('a')

                if name_cell and nationality_cell:
                    player_name = name_cell.text.strip()
                    nationality = nationality_cell.text.strip()

                    # Add the player details to the dictionary
                    player_details[player_name] = {'Nationality': nationality, 'Role': current_role, 'Team': team.replace('_', ' ')}

    # Convert the dictionary to a DataFrame
    df = pd.DataFrame.from_dict(player_details, orient='index')

    # Reset the index to convert player_name from index to a regular column
    df = df.reset_index()

    team_df_list_2020.append(df)

In [15]:
# Concatenate all DataFrames together
combined_df_2020 = pd.concat(team_df_list_2020)

# Reset the index of the combined DataFrame
combined_df_2020 = combined_df_2020.reset_index(drop=True)

# Print the combined DataFrame
combined_df_2020 = combined_df_2020.sort_values(['Nationality', 'Role'])
combined_df_2020.rename(columns={'index':'player_name'}, inplace=True)

combined_df_2020[(combined_df_2020['Nationality']=='India')&(combined_df_2020['Team'] == 'Chennai Super Kings')].sort_values(['Team', 'Role'])

Unnamed: 0,player_name,Nationality,Role,Team
5,Ravindra Jadeja,India,All-rounders,Chennai Super Kings
0,Suresh Raina,India,Batsmen,Chennai Super Kings
1,Ambati Rayudu,India,Batsmen,Chennai Super Kings
2,Murali Vijay,India,Batsmen,Chennai Super Kings
4,Ruturaj Gaikwad,India,Batsmen,Chennai Super Kings
16,Shardul Thakur,India,Pace Bowlers,Chennai Super Kings
17,Deepak Chahar,India,Pace Bowlers,Chennai Super Kings
19,KM Asif,India,Pace Bowlers,Chennai Super Kings
20,Monu Kumar,India,Pace Bowlers,Chennai Super Kings
9,Karn Sharma,India,Spin Bowlers,Chennai Super Kings


In [34]:
team_list_2009 = ['Deccan_Chargers' if element == 'Sunrisers_Hyderabad' else element for element in team_list_2018]

In [27]:
team_df_list_2009 = []

for team in team_list_2009:
    url = f'https://en.wikipedia.org/wiki/{team}_in_2009#Squad'

    # Send a GET request to retrieve the web page content
    response = requests.get(url)

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the player details
    table = soup.find('table', class_='wikitable')

    # Initialize an empty dictionary to store player details
    player_details = {}

    # Initialize variables to store current section headers
    current_role = ''
    current_header = None

    # Iterate over the table rows
    for row in table.find_all('tr'):
        # Check if the row is a section header
        header = row.find('th')

        if header and header.has_attr('colspan'):
            # Extract the current role from the header
            current_role = header.text.strip()
            current_header = header

        else:
            # Extract the player details from the cells
            cells = row.find_all('td')
            if len(cells) >= 2:
                name_cell = cells[0].find('a')
                nationality_cell = cells[1].find('a')

                if name_cell and nationality_cell:
                    player_name = name_cell.text.strip()
                    nationality = nationality_cell.text.strip()

                    # Add the player details to the dictionary
                    player_details[player_name] = {'Nationality': nationality, 'Role': current_role, 'Team': team.replace('_', ' ')}

    # Convert the dictionary to a DataFrame
    df = pd.DataFrame.from_dict(player_details, orient='index')

    # Reset the index to convert player_name from index to a regular column
    df = df.reset_index()
    
    team_df_list_2009.append(df)

In [30]:
df.sort_values('index')

Unnamed: 0,index,Nationality,Role,Team
8,Abhishek Nayar,India,All-rounders,Sunrisers Hyderabad
4,Ajinkya Rahane,India,Batsmen,Sunrisers Hyderabad
20,Chetanya Nanda,India,Bowlers,Sunrisers Hyderabad
16,Dhawal Kulkarni,India,Bowlers,Sunrisers Hyderabad
15,Dilhara Fernando,Sri Lanka,Bowlers,Sunrisers Hyderabad
9,Dwayne Bravo,Trinidad and Tobago,All-rounders,Sunrisers Hyderabad
7,Graham Napier,England,All-rounders,Sunrisers Hyderabad
14,Harbhajan Singh,India,Bowlers,Sunrisers Hyderabad
3,Jean-Paul Duminy,South Africa,Batsmen,Sunrisers Hyderabad
19,Kyle Mills,New Zealand,Bowlers,Sunrisers Hyderabad


In [44]:
url = 'http://www.cricmetric.com/playerstats.py?player=Devdutt+Padikkal&role=all&format=all&groupby=year'
response = requests.get(url)
international_list = ['Afghanistan', 'Australia', 'Bangladesh', 'England', 'India', 'Ireland', 'New Zealand', 'Pakistan', 'South Africa', 'Sri Lanka', 'West Indies', 'Zimbabwe', 'Nepal', 'Netherlands', 'Scotland']

# Create a BeautifulSoup object to parse the HTML soup
soup = BeautifulSoup(response.text, 'html.parser')

country_element = soup.find('b', string='Teams played for')
role_element = soup.find('b', string = 'Role')

if role_element:
    role_text = role_element.next_sibling.strip(': ')
else:
    role_text = ''
print(role_text)

player_country_element = country_element.find_next_sibling('a')
if player_country_element:
    player_country = player_country_element.text.strip()
    if player_country not in international_list:
        player_country = 'Uncapped'
    print(player_country)


Top-order batsman
India
