In [30]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome()
driver.get("https://www.google.com")
print("Browser should be open now")
driver.quit()

Browser should be open now


In [39]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import re

def get_match_data(url):
    print("Starting script...")
    
    # Initialize ChromeDriver
    print("Initializing ChromeDriver...")
    driver = webdriver.Chrome()
    print("ChromeDriver initialized successfully. Browser should open now.")
    
    try:
        # Load the page
        print(f"Loading URL: {url}")
        driver.get(url)
        time.sleep(5)  # Wait for page to load
        print("Paused for 5 seconds. Parsing page now...")
        
        # Parse with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        # Find all match containers
        matches = soup.find_all('div', class_='ds-p-4 ds-border-y ds-border-line')
        print(f"Number of match containers found: {len(matches)}")
        
        match_data = []
        match_id = 1
        
        for match in matches:
            print(f"\nProcessing match {match_id}")
            
            # Extract the match link
            link_tag = match.find('a', href=True)
            if not link_tag:
                print("No link tag found in this match container")
                continue
            original_url = 'https://www.espncricinfo.com' + link_tag['href']
            print(f"Original URL: {original_url}")
            
            # Construct commentary URL
            url_parts = original_url.split('/')
            url_parts[-1] = 'ball-by-ball-commentary'
            commentary_url = '/'.join(url_parts)
            print(f"Commentary URL: {commentary_url}")
            
            # Find all team score containers
            team_scores = match.find_all('div', class_='ci-team-score')
            target = None
            
            for score_div in team_scores:
                # Find the score text container
                score_text_div = score_div.find('div', class_='ds-text-compact-s ds-text-typo ds-text-right ds-whitespace-nowrap')
                if score_text_div:
                    # Find the span that might contain target info
                    target_span = score_text_div.find('span', class_='ds-text-compact-xs ds-mr-0.5')
                    if target_span and target_span.text.strip():
                        print(f"Found span with text: '{target_span.text.strip()}'")
                        # Extract numeric target score if present
                        target_match = re.search(r'T:(\d+)', target_span.text)
                        if target_match:
                            target = int(target_match.group(1))
                            print(f"Target score extracted: {target}")
                            break  # Found target, no need to continue searching
            
            if target is None:
                print("No target score found for this match")
            
            # Store the data
            match_data.append({
                'match_id': match_id,
                'original_url': original_url,
                'commentary_url': commentary_url,
                'target_score': target
            })
            match_id += 1
        
        print(f"\nTotal matches processed: {len(match_data)}")
        return match_data
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return []
    finally:
        print("Processing complete. Browser will close in 5 seconds...")
        time.sleep(5)
        driver.quit()
        print("Browser closed.")

# Example usage
if __name__ == "__main__":
    schedule_url = "https://www.espncricinfo.com/series/indian-premier-league-2023-1345038/match-schedule-fixtures-and-results"
    data = get_match_data(schedule_url)
    
    # Print the results
    print("\nFinal Match Data:")
    for match in data:
        print(match)

Starting script...
Initializing ChromeDriver...
ChromeDriver initialized successfully. Browser should open now.
Loading URL: https://www.espncricinfo.com/series/indian-premier-league-2023-1345038/match-schedule-fixtures-and-results
Paused for 5 seconds. Parsing page now...
Number of match containers found: 74

Processing match 1
Original URL: https://www.espncricinfo.com/series/indian-premier-league-2023-1345038/gujarat-titans-vs-chennai-super-kings-1st-match-1359475/full-scorecard
Commentary URL: https://www.espncricinfo.com/series/indian-premier-league-2023-1345038/gujarat-titans-vs-chennai-super-kings-1st-match-1359475/ball-by-ball-commentary
Found span with text: '(19.2/20 ov, T:179)'
Target score extracted: 179

Processing match 2
Original URL: https://www.espncricinfo.com/series/indian-premier-league-2023-1345038/punjab-kings-vs-kolkata-knight-riders-2nd-match-1359476/full-scorecard
Commentary URL: https://www.espncricinfo.com/series/indian-premier-league-2023-1345038/punjab-king