In [1]:
!pip install selenium



In [2]:
!pip install bs4



In [3]:
!pip install webdriver-manager



In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
from IPython.display import display

# Define the URL
url = "https://www.espncricinfo.com/series/indian-premier-league-2022-1298423/gujarat-titans-vs-rajasthan-royals-final-1312200/ball-by-ball-commentary"

# Define headers
headers = {"User-Agent": "Mozilla/5.0"}

# Request the webpage
response = requests.get(url, headers=headers)

if response.status_code != 200:
    print(f"❌ Failed to retrieve data. Status code: {response.status_code}")
else:
    print("✅ Successfully retrieved the webpage!")

    # Parse the page content
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract commentary
    commentary_divs = soup.find_all("div", class_="ds-text-tight-m")
    data = []

    # Dummy match details (These should ideally be scraped separately)
    match_name = "Gujarat Titans vs Rajasthan Royals - Final"
    match_winner = "Gujarat Titans"
    team1_score = "133/3"
    team2_score = "130/9"
    match_venue = "Narendra Modi Stadium, Ahmedabad"
    match_date = "May 29, 2022"

    for comment in commentary_divs:
        text = comment.get_text(strip=True)

        # Extract ball number and details
        if "." in text[:4]:  
            ball_no = text.split()[0]
            over, ball = ball_no.split(".")

            # Extract bowler and batter
            details = text.split(",")
            bowler_batter = details[0].split("to")

            if len(bowler_batter) == 2:
                bowler_name = bowler_batter[0].strip()
                batter_name = bowler_batter[1].strip()
            else:
                continue  # Skip invalid entries

            # Determine shot type and runs scored
            if "SIX" in text:
                shot_type = "boundary"
                runs_scored = 6
            elif "FOUR" in text:
                shot_type = "boundary"
                runs_scored = 4
            elif "1 run" in text:
                shot_type = "single"
                runs_scored = 1
            elif "2 runs" in text:
                shot_type = "double"
                runs_scored = 2
            elif "3 runs" in text:
                shot_type = "triple"
                runs_scored = 3
            elif "no run" in text:
                shot_type = "dot"
                runs_scored = 0
            else:
                shot_type = "other"
                runs_scored = "unknown"

            ball_type = "Unknown"
            speed = "N/A"

            # Append 15 values to match column count
            data.append([
                ball_no, over, bowler_name, batter_name, ball_type, shot_type, speed, runs_scored,
                match_name, match_winner, team1_score, team2_score, match_venue, match_date
            ])

    # Define column names (must match number of values in data)
    columns = [
        "Ball No", "Over", "Bowler Name", "Batter Name", "Ball Type",
        "Shot Type", "Speed of Ball", "Runs Scored", "Match Name",
        "Match Won By", "Team 1 Score", "Team 2 Score", "Match Venue", "Match Date"
    ]

    # Create DataFrame
    df = pd.DataFrame(data, columns=columns)
    
    # Display first 5 rows
    display(df.head())

    # Save to CSV
    file_name = "IPL_BALLBYBALL_COMMENTARY.csv"
    df.to_csv(file_name, index=False)

    # Print file path
    file_path = os.path.abspath(file_name)
    print(f"✅ Data saved successfully at: {file_path}")


✅ Successfully retrieved the webpage!


Unnamed: 0,Ball No,Over,Bowler Name,Batter Name,Ball Type,Shot Type,Speed of Ball,Runs Scored,Match Name,Match Won By,Team 1 Score,Team 2 Score,Match Venue,Match Date
0,18.16McCoy,18,18.16McCoy,Gill,Unknown,boundary,,6,Gujarat Titans vs Rajasthan Royals - Final,Gujarat Titans,133/3,130/9,"Narendra Modi Stadium, Ahmedabad","May 29, 2022"
1,17.62Ashwin,17,17.62Ashwin,Miller,Unknown,double,,2,Gujarat Titans vs Rajasthan Royals - Final,Gujarat Titans,133/3,130/9,"Narendra Modi Stadium, Ahmedabad","May 29, 2022"
2,17.51Ashwin,17,17.51Ashwin,Gill,Unknown,single,,1,Gujarat Titans vs Rajasthan Royals - Final,Gujarat Titans,133/3,130/9,"Narendra Modi Stadium, Ahmedabad","May 29, 2022"
3,17.4•Ashwin,17,17.4•Ashwin,Gill,Unknown,dot,,0,Gujarat Titans vs Rajasthan Royals - Final,Gujarat Titans,133/3,130/9,"Narendra Modi Stadium, Ahmedabad","May 29, 2022"
4,17.31Ashwin,17,17.31Ashwin,Miller,Unknown,single,,1,Gujarat Titans vs Rajasthan Royals - Final,Gujarat Titans,133/3,130/9,"Narendra Modi Stadium, Ahmedabad","May 29, 2022"


✅ Data saved successfully at: C:\Users\sahan\IPL_BALLBYBALL_COMMENTARY.csv
