In [37]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time

driver = webdriver.Chrome()
# URL of the Chennai Super Kings squad page for IPL 2019
url = "https://www.espncricinfo.com/series/ipl-2019-1165643/chennai-super-kings-squad-1166261/series-squads"

# Send a GET request to fetch the webpage
driver.get(url)
time.sleep(2)

# Check if the request was successful
# if response.status_code != 200:
#     print(f"Failed to fetch the webpage. Status code: {response.status_code}")
#     exit()

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")

# Initialize lists to store players
batters = []
allrounders = []
bowlers = []

# Find all sections with player categories (BATTERS, ALLROUNDERS, BOWLERS)
# From the HTML snippet, we see that categories are in <span> tags with class "ds-text-eyebrow-s ds-font-bold ds-uppercase"
category_sections = soup.find_all("span", class_="ds-text-eyebrow-s ds-font-bold ds-uppercase")
# Loop through each category section to extract players
for section in category_sections:
    category_name = section.get_text(strip=True)
    
    # Get the parent div of the category (which contains the players)
    parent_div = section.find_parent("div")
    
    # Find the grid containing the players (div with class "ds-grid lg:ds-grid-cols-2")
    player_grid = parent_div.find_next_sibling("div", class_="ds-grid lg:ds-grid-cols-2")
    
    if not player_grid:
        continue
    
    # Find all player names within this grid
    # Player names are in <span> tags with class "ds-text-compact-s ds-font-bold"
    player_names = player_grid.find_all("span", class_="ds-text-compact-s ds-font-bold ds-text-typo ds-underline ds-decoration-ui-stroke hover:ds-text-typo-primary hover:ds-decoration-ui-stroke-primary ds-block")
    # print(f"{category_name} : {player_names}")
    # Categorize players based on the section
    if category_name == "BATTERS":
        batters.extend([player.get_text(strip=True) for player in player_names])
    elif category_name == "ALLROUNDERS":
        allrounders.extend([player.get_text(strip=True) for player in player_names])
    elif category_name == "BOWLERS":
        bowlers.extend([player.get_text(strip=True) for player in player_names])

# Print the categorized players
print("=== Chennai Super Kings Squad (IPL 2019) ===")
print("\nBatters:")
for batter in batters:
    print(f"- {batter}")

print("\nAll-Rounders:")
for allrounder in allrounders:
    print(f"- {allrounder}")

print("\nBowlers:")
for bowler in bowlers:
    print(f"- {bowler}")

=== Chennai Super Kings Squad (IPL 2019) ===

Batters:
- MS Dhoni
- Sam Billings
- Faf du Plessis
- Ruturaj Gaikwad
- Narayan Jagadeesan
- Suresh Raina
- Ambati Rayudu
- Dhruv Shorey
- Murali Vijay

All-Rounders:
- Chaitanya Bishnoi
- Dwayne Bravo
- Ravindra Jadeja
- Kedar Jadhav
- Scott Kuggeleijn
- Monu Kumar
- Mitchell Santner
- Shane Watson
- David Willey

Bowlers:
- KM Asif
- Deepak Chahar
- Harbhajan Singh
- Imran Tahir
- Karn Sharma
- Mohit Sharma
- Shardul Thakur
- Lungi Ngidi


In [42]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import json

# Set up the Selenium WebDriver (Chrome)
driver = webdriver.Chrome()

# Maximize the browser window to ensure the squads panel is displayed
driver.maximize_window()

# URL of the IPL 2019 series squads page
url = "https://www.espncricinfo.com/series/ipl-2019-1165643/chennai-super-kings-squad-1166261/series-squads"
driver.get(url)
time.sleep(2)  # Wait for the page to load

# Find the team links using the CSS selector equivalent to document.querySelectorAll('.ds-group.ds-block.ds-px-4.ds-py-2')
team_elements = driver.find_elements(By.CSS_SELECTOR, '.ds-group.ds-block.ds-px-4.ds-py-2')
# for element in team_elements[:5]:  # Print first 5 elements for debugging
#     print(element.get_attribute('outerHTML'))
# Store team names and their corresponding links
team_data = []
for element in team_elements:
    # Within each team element, find the <a> tag to get the team name and href
    team_name = element.text.strip()
    team_href = element.get_attribute('href')
    team_data.append((team_name, team_href))

# Dictionary to store squads for all teams
all_teams_squads = {}
driver.quit()

# Loop through each team
for team_name, team_href in team_data:
    print(f"\nFetching squad for {team_name}...")
    
    # Navigate to the team's squad page
    driver = webdriver.Chrome()
    driver.get(team_href)
    time.sleep(2)  # Wait for the page to load

    # Parse the page source after loading the team squad
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Initialize lists to store players for the current team
    batters = []
    allrounders = []
    bowlers = []

    # Find all sections with player categories (BATTERS, ALLROUNDERS, BOWLERS)
    category_sections = soup.find_all("span", class_="ds-text-eyebrow-s ds-font-bold ds-uppercase")

    # Loop through each category section to extract players
    for section in category_sections:
        category_name = section.get_text(strip=True)
        
        # Get the parent div of the category (which contains the players)
        parent_div = section.find_parent("div")
        
        # Find the grid containing the players (div with class "ds-grid lg:ds-grid-cols-2")
        player_grid = parent_div.find_next_sibling("div", class_="ds-grid lg:ds-grid-cols-2")
        
        if not player_grid:
            continue
        
        # Find all player names within this grid
        player_names = player_grid.find_all("span", class_="ds-text-compact-s ds-font-bold ds-text-typo ds-underline ds-decoration-ui-stroke hover:ds-text-typo-primary hover:ds-decoration-ui-stroke-primary ds-block")
        # print(f"{category_name} : {player_names}")
        # Categorize players based on the section
        if category_name == "BATTERS":
            batters.extend([player.get_text(strip=True) for player in player_names])
        elif category_name == "ALLROUNDERS":
            allrounders.extend([player.get_text(strip=True) for player in player_names])
        elif category_name == "BOWLERS":
            bowlers.extend([player.get_text(strip=True) for player in player_names])
    driver.quit()
    # Store the squad for the current team
    all_teams_squads[team_name] = {
        "Batters": batters,
        "All-Rounders": allrounders,
        "Bowlers": bowlers
    }
with open("squads.json", "w") as file:
    json.dump(all_teams_squads, file, indent=4)

<a href="/series/ipl-2019-1165643/chennai-super-kings-squad-1166261/series-squads" class="ds-group ds-block ds-px-4 ds-py-2 hover:ds-bg-ui-fill-hover ds-bg-ui-fill-primary hover:ds-bg-ui-fill-primary"><span class="ds-text-tight-m ds-font-regular ds-text-typo-inverse1">Chennai Super Kings Squad</span></a>
<a href="/series/ipl-2019-1165643/delhi-capitals-squad-1166256/series-squads" class="ds-group ds-block ds-px-4 ds-py-2 hover:ds-bg-ui-fill-hover"><span class="ds-text-tight-m ds-font-regular">Delhi Capitals Squad</span></a>
<a href="/series/ipl-2019-1165643/kings-xi-punjab-squad-1166263/series-squads" class="ds-group ds-block ds-px-4 ds-py-2 hover:ds-bg-ui-fill-hover"><span class="ds-text-tight-m ds-font-regular">Kings XI Punjab Squad</span></a>
<a href="/series/ipl-2019-1165643/kolkata-knight-riders-squad-1166262/series-squads" class="ds-group ds-block ds-px-4 ds-py-2 hover:ds-bg-ui-fill-hover"><span class="ds-text-tight-m ds-font-regular">Kolkata Knight Riders Squad</span></a>
<a hre