# Extracting IPL Data from Cricbuzz

In [210]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time

In [216]:
service = Service(executable_path='/Applications/Chdriver/chromedriver')
driver = webdriver.Chrome(service=service)

url = 'https://www.cricbuzz.com/cricket-series/5945/indian-premier-league-2023/matches'

driver.get(url)

match_urls = []
matches = driver.find_elements(By.CLASS_NAME, 'cb-series-matches')
for match in matches:
    link_element = match.find_element(By.CLASS_NAME, 'text-hvr-underline')
    link = link_element.get_attribute('href')
    link = link.replace("cricket-scores", "live-cricket-scorecard")
    match_urls.append(link)

batter_name = []
batter_team = []
batter_date = []
runs_scored = []
balls_faced = []
fours = []
sixes = []
strike_rate = []

bowler_name = []
bowler_team = []
bowler_date = []
overs = []
maiden = []
runs = []
wicket = []
noball = []
wides = []
economy = []

for match_url in match_urls:
    driver.get(match_url)
    time.sleep(.5)
    # Locate batting data for the first team if it exists
    try:
        batting1 = driver.find_element(By.XPATH, '//*[@id="innings_1"]/div[1]')
        batters = batting1.find_elements(By.CLASS_NAME, 'cb-scrd-itms')
        for batter in batters[:-3]:
            name = batter.find_element(By.CLASS_NAME, 'cb-text-link')
            batter_name.append(name.text)
            
            team = driver.find_element(By.XPATH, '//*[@id="innings_1"]/div[1]/div[1]/span[1]')
            team_part = team.text.split(' ')[:-1]
            team_name = ' '.join(team_part)
            batter_team.append(team_name)

            date = driver.find_element(By.CSS_SELECTOR, 'span.schedule-date')
            date_object = pd.to_datetime(date.text, format='%A, %B %d, %Y')
            batter_date.append(date_object.strftime('%Y-%m-%d'))

            scores = batter.find_elements(By.CLASS_NAME, 'text-right')
            runs_scored.append(scores[0].text)
            balls_faced.append(scores[1].text)
            fours.append(scores[2].text)
            sixes.append(scores[3].text)
            strike_rate.append(scores[4].text)
            
        batting2 = driver.find_element(By.XPATH, '//*[@id="innings_2"]/div[1]')
        batters = batting2.find_elements(By.CLASS_NAME, 'cb-scrd-itms')
        for batter in batters[:-3]:
            name = batter.find_element(By.CLASS_NAME, 'cb-text-link')
            batter_name.append(name.text)

            team = driver.find_element(By.XPATH, '//*[@id="innings_2"]/div[1]/div[1]/span[1]')
            team_part = team.text.split(' ')[:-1]
            team_name = ' '.join(team_part)
            batter_team.append(team_name)

            date = driver.find_element(By.CSS_SELECTOR, 'span.schedule-date')
            date_object = pd.to_datetime(date.text, format='%A, %B %d, %Y')
            batter_date.append(date_object.strftime('%Y-%m-%d'))

            scores = batter.find_elements(By.CLASS_NAME, 'text-right')
            runs_scored.append(scores[0].text)
            balls_faced.append(scores[1].text)
            fours.append(scores[2].text)
            sixes.append(scores[3].text)
            strike_rate.append(scores[4].text)
            
        bowling1 = driver.find_element(By.XPATH, '//*[@id="innings_1"]/div[4]')
        bowlers = bowling1.find_elements(By.CLASS_NAME, 'cb-scrd-itms')
        for bowler in bowlers:
            name = bowler.find_element(By.CLASS_NAME, 'cb-text-link')
            bowler_name.append(name.text)

            team = driver.find_element(By.XPATH, '//*[@id="innings_2"]/div[1]/div[1]/span[1]')
            team_part = team.text.split(' ')[:-1]
            team_name = ' '.join(team_part)
            bowler_team.append(team_name)

            date = driver.find_element(By.CSS_SELECTOR, 'span.schedule-date')
            date_object = pd.to_datetime(date.text, format='%A, %B %d, %Y')
            bowler_date.append(date_object.strftime('%Y-%m-%d'))

            scores = bowler.find_elements(By.CLASS_NAME, 'text-right')
            overs.append(scores[0].text)
            maiden.append(scores[1].text)
            runs.append(scores[2].text)
            wicket.append(scores[3].text)
            noball.append(scores[4].text)
            wides.append(scores[5].text)
            economy.append(scores[6].text)
            
        bowling2 = driver.find_element(By.XPATH, '//*[@id="innings_2"]/div[4]')
        bowlers = bowling2.find_elements(By.CLASS_NAME, 'cb-scrd-itms')
        for bowler in bowlers:
            name = bowler.find_element(By.CLASS_NAME, 'cb-text-link')
            bowler_name.append(name.text)
            
            team = driver.find_element(By.XPATH, '//*[@id="innings_1"]/div[1]/div[1]/span[1]')
            team_part = team.text.split(' ')[:-1]
            team_name = ' '.join(team_part)
            bowler_team.append(team_name)

            date = driver.find_element(By.CSS_SELECTOR, 'span.schedule-date')
            date_object = pd.to_datetime(date.text, format='%A, %B %d, %Y')
            bowler_date.append(date_object.strftime('%Y-%m-%d'))

            scores = bowler.find_elements(By.CLASS_NAME, 'text-right')
            overs.append(scores[0].text)
            maiden.append(scores[1].text)
            runs.append(scores[2].text)
            wicket.append(scores[3].text)
            noball.append(scores[4].text)
            wides.append(scores[5].text)
            economy.append(scores[6].text)
    
    except NoSuchElementException:
        pass

batter_data = pd.DataFrame({
    'Name': batter_name,
    'Runs': runs_scored,
    'Balls': balls_faced,
    'Fours': fours,
    'Sixes': sixes,
    'Strike Rate': strike_rate,
    'Team': batter_team,
    'Date': batter_date
})

bowler_data = pd.DataFrame({
    'Name': bowler_name,
    'Overs': overs,
    'Maiden': maiden,
    'Runs': runs,
    'Wicket': wicket,
    'No Balls': noball,
    'Wides': wides,
    'Economy': economy,
    'Team': bowler_team,
    'Date': bowler_date
})

batter_data.to_csv('ipl-2023-batter.csv', index=False)
bowler_data.to_csv('ipl-2023-bowler.csv', index=False)

driver.quit()

In [214]:
data = pd.read_csv('ipl-2023-bowler.csv')
data


Unnamed: 0,Name,Overs,Maiden,Runs,Wicket,No Balls,Wides,Economy,Team,Date
0,Shami,4.0,0,29,2,1,0,7.2,Gujarat Titans,2023-03-31
1,Hardik Pandya (c),3.0,0,28,0,0,0,9.3,Gujarat Titans,2023-03-31
2,Joshua Little,4.0,0,41,1,0,0,10.2,Gujarat Titans,2023-03-31
3,Rashid Khan,4.0,0,26,2,0,0,6.5,Gujarat Titans,2023-03-31
4,Alzarri Joseph,4.0,0,33,2,0,0,8.2,Gujarat Titans,2023-03-31
...,...,...,...,...,...,...,...,...,...,...
887,Hardik Pandya (c),1.0,0,14,0,0,1,14.0,Gujarat Titans,2023-05-29
888,Rashid Khan,3.0,0,44,0,0,0,14.7,Gujarat Titans,2023-05-29
889,Noor Ahmad,3.0,0,17,2,0,3,5.7,Gujarat Titans,2023-05-29
890,Joshua Little,2.0,0,30,0,0,0,15.0,Gujarat Titans,2023-05-29
