In [11]:
import pandas as pd
from pprint import pprint
from bs4 import BeautifulSoup
import io

pd.set_option('display.max_colwidth', 200)
pd.set_option('display.max_columns',None) #display all columns
pd.set_option('display.max_rows',None) #display all rows

# Required Input files
# When running for the very first time, `ipl2025_results.csv`` file is required with all the team managers and an initial row of 0s.
# IPL2025MockAuctionSummary.csv file is required with each of the managers, their teams and their players listed.

# Dependencies to install
#  pip3 install beautifulsoup4
#  pip3 install lxml ??? (Double check if required)
#  pip3 install html5lib ??? (Double check if required)
#  pip3 install pywhatkit
#  pip3 install matplotlib
#  pip3 install selenium
#  pip3 install tabulate

In [12]:
# Backup the input and output files for each day for posterity

# Change for each day
day_num = 3
day = 'day_' + str(day_num)
prev_day = 'day_' + str(day_num - 1)

In [13]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()

url = 'https://www.iplt20.com/stats/2025'
driver.get(url)

button = driver.find_element(By.CLASS_NAME, "awardsStats")

button.click()

button = driver.find_element(By.CLASS_NAME, "ups")

button.click()

button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, ".//a[contains(@ng-click, 'showAllmvp')]"))
)

driver.execute_script("arguments[0].click();", button)

html = driver.page_source

driver.quit()

In [14]:
tables = pd.read_html(io.StringIO(html))
mvp_df = [table for table in tables if 'Pts' in table][0]
## Clean up Player coloumn
mvp_df[['Player', 'Team']] = mvp_df['Player'].str.rsplit(' ', n=1, expand=True)
mvp_df['Player'] = mvp_df['Player'].str.replace('\\s+', ' ', regex=True)
mvp_df['Player'] = mvp_df['Player'].str.lower()
mvp_df.to_csv(f'./data/mvp_{day}.csv', index=False)
mvp_df

Unnamed: 0,POS,Player,Pts,Mat,Wkts,Dots,4s,6s,Catches,Run outs,Stumpings,Team
0,1,ishan kishan,51.0,1,0,0,11,6,1,0.0,0,SRH
1,2,nicholas pooran,44.5,1,0,0,6,7,2,0.0,0,LSG
2,3,mitchell marsh,36.0,1,0,0,6,6,0,0.0,0,LSG
3,4,dhruv jurel,33.5,1,0,0,5,6,0,0.0,0,RR
4,5,travis head,33.0,1,0,0,9,3,0,0.0,0,SRH
5,6,sanju samson,31.5,1,0,0,7,4,0,0.0,0,RR
6,7,sunil narine,31.5,1,1,5,5,3,0,0.0,0,KKR
7,8,ashutosh sharma,30.0,1,0,0,5,5,0,0.0,0,DC
8,9,phil salt,29.5,1,0,0,9,2,0,0.0,0,RCB
9,10,ajinkya rahane,29.0,1,0,0,6,4,0,0.0,0,KKR


In [15]:
import requests

url = 'https://www.espncricinfo.com/series/ipl-2025-1449924/points-table-standings'
response = requests.get(url, headers=headers)
response.raise_for_status()

tables = pd.read_html(io.StringIO(response.text))

ipl_team_pts_tbl = [table for table in tables if 'PT' in table][0]
ipl_team_pts_tbl = ipl_team_pts_tbl.iloc[::2]
ipl_team_pts_tbl = ipl_team_pts_tbl.iloc[:, :12]
ipl_team_pts_tbl['Teams'] = ipl_team_pts_tbl['Teams'].replace('\\s+', ' ', regex=True).replace('\\d', '', regex=True)
ipl_team_pts_tbl.to_csv(f'./data/standings_{day}.csv',index=False)

In [16]:
# Get all players for ipl2025
import requests
import pickle

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# URL of the IPL players page
urls = [
    'https://www.iplt20.com/teams/chennai-super-kings',
    'https://www.iplt20.com/teams/delhi-capitals',
    'https://www.iplt20.com/teams/gujarat-titans',
    'https://www.iplt20.com/teams/kolkata-knight-riders',
    'https://www.iplt20.com/teams/lucknow-super-giants',
    'https://www.iplt20.com/teams/mumbai-indians',
    'https://www.iplt20.com/teams/punjab-kings',
    'https://www.iplt20.com/teams/rajasthan-royals',
    'https://www.iplt20.com/teams/royal-challengers-bangalore',
    'https://www.iplt20.com/teams/sunrisers-hyderabad',
]
players = []
for url in urls:
    # Send GET request
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')

    player_divs = soup.find_all('div', class_='ih-p-name')
    players += [player_div.find('h2').get_text(strip=True).lower() for player_div in player_divs]

with open('./data/players.bin', 'wb') as f:
    pickle.dump(players, f)