In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

def scrape_ipl_2021_matches():
    base_url = "https://www.cricbuzz.com/cricket-series/3472/indian-premier-league-2021/matches"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    try:
        response = requests.get(base_url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the page: {e}")
        return None
    soup = BeautifulSoup(response.content, 'html.parser')
    data = {
        'year': [],
        'series_type': [],
        'series_name': [],
        'match_no': [],
        'match_type': [],
        'match_name': [],
        'match_href': [],
        'match_team1': [],
        'match_team2': [],
        'match_datetime_start': [],
        'match_date_end': [],
        'match_venue': []
    }
    series_year = "2021"
    series_type = "T20"
    series_name = "Indian Premier League"
    match_cards = soup.find_all('div', class_='cb-col-75 cb-col')  
    for card in match_cards:
        match_link_tag = card.find('a', href=True)
        if not match_link_tag:
            continue           
        match_href = match_link_tag['href']
        match_name = match_link_tag.text.strip()
        match_no = match_name.split(' ')[0] if match_name.split(' ')[0].isdigit() else "N/A"
        match_type = "League"  
        if "Qualifier" in match_name:
            match_type = "Qualifier"
        elif "Eliminator" in match_name:
            match_type = "Eliminator"
        elif "Final" in match_name:
            match_type = "Final"
        teams = card.find_all('span', class_='cb-team-name')
        team1 = teams[0].text.strip() if len(teams) > 0 else "N/A"
        team2 = teams[1].text.strip() if len(teams) > 1 else "N/A"
        date_time = card.find('div', class_='cb-match-time').text.strip() if card.find('div', class_='cb-match-time') else "N/A"
        try:
            match_datetime_start = datetime.strptime(date_time, "%b %d, %Y, %H:%M %p")
            match_date_end = match_datetime_start.date() 
        except:
            match_datetime_start = "N/A"
            match_date_end = "N/A"
        venue = card.find('div', class_='cb-match-venue').text.strip() if card.find('div', class_='cb-match-venue') else "N/A"
        data['year'].append(series_year)
        data['series_type'].append(series_type)
        data['series_name'].append(series_name)
        data['match_no'].append(match_no)
        data['match_type'].append(match_type)
        data['match_name'].append(match_name)
        data['match_href'].append(match_href)
        data['match_team1'].append(team1)
        data['match_team2'].append(team2)
        data['match_datetime_start'].append(match_datetime_start)
        data['match_date_end'].append(match_date_end)
        data['match_venue'].append(venue)
    df = pd.DataFrame(data)
    return df
ipl_2021_matches = scrape_ipl_2021_matches()
if ipl_2021_matches is not None:
    print(ipl_2021_matches.head())
    ipl_2021_matches.to_csv('ipl_2021_matches.csv', index=False)
else:
    print("Failed to scrape data.")

   year series_type            series_name match_no match_type  \
0  2021         T20  Indian Premier League      N/A     League   
1  2021         T20  Indian Premier League      N/A     League   
2  2021         T20  Indian Premier League      N/A     League   
3  2021         T20  Indian Premier League      N/A     League   
4  2021         T20  Indian Premier League      N/A     League   

                                          match_name  \
0  MUMBAI INDIANS vs ROYAL CHALLENGERS BENGALURU,...   
1   CHENNAI SUPER KINGS vs DELHI CAPITALS, 2nd Match   
2  SUNRISERS HYDERABAD vs KOLKATA KNIGHT RIDERS, ...   
3        RAJASTHAN ROYALS vs PUNJAB KINGS, 4th Match   
4  KOLKATA KNIGHT RIDERS vs MUMBAI INDIANS, 5th M...   

                                          match_href match_team1 match_team2  \
0  /cricket-scores/35612/mi-vs-rcb-1st-match-indi...         N/A         N/A   
1  /cricket-scores/35617/csk-vs-dc-2nd-match-indi...         N/A         N/A   
2  /cricket-scores/35618/s