# Scrapping Time for every Match:

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import re
from bs4 import BeautifulSoup as bs
import time
import pandas as pd

In [3]:
def scrape_time(season):
    url = f'https://www.iplt20.com/matches/results/{season}'
    print(f"Fetching IPL {season} results from {url} ...")

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service)
    driver.get(url)

    time.sleep(10)

    soup = bs(driver.page_source, 'html.parser')
    driver.quit()

    match_cards = soup.find_all("li", attrs={"ng-repeat": "list in resultList | orderBy:'-timestamp'"})
    print(f"‚úÖ Found {len(match_cards)} match cards")

    matches_data = []

    for i, card in enumerate(match_cards, 1):
        try:
            match_number = card.find('span', class_='vn-matchOrder')
            match_number = match_number.get_text(strip=True) if match_number else f"Match {i}"

            team_names = [t.get_text(strip=True) for t in card.find_all('h3') if t.get_text(strip=True)]
            team1, team2 = team_names[0], team_names[2]

            # Extract date and time
            datetime_element = card.find('div', class_='vn-matchDateTime')
            datetime_text = datetime_element.get_text(strip=True) if datetime_element else ""

            # Parse the datetime string
            # Format: "JUN, SUN 1 , 8:00 pm IST"
            # Let's clean and parse it
            datetime_clean = re.sub(r'\s+', ' ', datetime_text.strip())

            # Split by commas to get parts
            parts = [part.strip() for part in datetime_clean.split(',')]

            if len(parts) >= 2:
                # First part: "JUN" (month)
                month_abbr = parts[0]

                # Second part: "SUN 1" (day name and day number)
                day_parts = parts[1].split()
                day_num = None
                for part in day_parts:
                    if part.isdigit():
                        day_num = part
                        break

                # Third part (if exists): "8:00 pm IST" (time)
                time_part = parts[2] if len(parts) > 2 else ""

                # Construct date
                month_map = {
                    'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06',
                    'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'
                }

                month_num = month_map.get(month_abbr.upper(), '01')
                if day_num:
                    match_date = f"{season}-{month_num}-{day_num.zfill(2)}"
                else:
                    match_date = ""

                # Extract time from time part
                time_match = re.search(r'(\d{1,2}:\d{2}\s*[ap]m)', time_part, re.IGNORECASE)
                match_time = time_match.group(1) if time_match else ""
            else:
                match_date = ""
                match_time = ""

            matches_data.append({
                "Match": match_number,
                "Team 1": team1,
                "Team 2": team2,
                "Date": match_date,
                "Time": match_time,
                "Season": season
            })

            print(f"{match_number}: {team1} vs {team2} | {match_date} | {match_time}")

        except Exception as e:
            print(f"‚ö†Ô∏è Error processing match {i}: {e}")
            continue

    df = pd.DataFrame(matches_data)
    df = df.iloc[::-1].reset_index(drop=True)
    return df

In [4]:
def scrape_all_seasons(start_year=2008, end_year=2024, output_csv='ipl_all_matches.csv'):
    all_data = []

    for season in range(start_year, end_year + 1):
        try:
            df = scrape_time(season)
            if not df.empty:
                all_data.append(df)
                print(f"‚úÖ Season {season} added ({len(df)} matches)")
            else:
                print(f"‚ö†Ô∏è No data found for {season}")
        except Exception as e:
            print(f"‚ùå Error scraping season {season}: {e}")
            continue

    if all_data:
        final_df = pd.concat(all_data, ignore_index=True)
        final_df.to_csv(output_csv, index=False)
        print(f"\nüéâ All IPL match data saved to '{output_csv}' ({len(final_df)} total matches)")
    else:
        print("No data collected!")

In [5]:
scrape_all_seasons(2008,2024)

Fetching IPL 2008 results from https://www.iplt20.com/matches/results/2008 ...
‚úÖ Found 59 match cards
Match 59: Chennai Super Kings vs Rajasthan Royals | 2008-06-01 | 8:00 pm
Match 58: Chennai Super Kings vs Punjab Kings | 2008-05-31 | 8:00 pm
Match 57: Delhi Daredevils vs Rajasthan Royals | 2008-05-30 | 8:00 pm
Match 56: Punjab Kings vs Rajasthan Royals | 2008-05-28 | 8:00 pm
Match 55: Royal Challengers Bangalore vs Mumbai Indians | 2008-05-28 | 4:00 pm
Match 54: Deccan Chargers vs Chennai Super Kings | 2008-05-27 | 8:00 pm
Match 53: Rajasthan Royals vs Mumbai Indians | 2008-05-26 | 8:00 pm
Match 52: Kolkata Knight Riders vs Punjab Kings | 2008-05-25 | 8:00 pm
Match 51: Deccan Chargers vs Royal Challengers Bangalore | 2008-05-25 | 4:00 pm
Match 50: Delhi Daredevils vs Mumbai Indians | 2008-05-24 | 8:00 pm
Match 49: Chennai Super Kings vs Rajasthan Royals | 2008-05-24 | 4:00 pm
Match 48: Punjab Kings vs Deccan Chargers | 2008-05-23 | 8:00 pm
Match 47: Delhi Daredevils vs Kolkata Knig