In [198]:
import re
import time
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [189]:
def make_request_n_soup(url, tag, class_):
    print('Making request...')
    response = requests.get(url)
    print('Making soup...')
    soup = BeautifulSoup(response.text, 'lxml')
    class_html = soup.find_all(tag, class_)
    return class_html

In [190]:
def extract_team_info(team_url, bs4_text):
    base_url = 'https://www.cricbuzz.com'
    purse_n_squad = re.findall(r'\d+(?:\.\d+)?', bs4_text)
    team_info = {
        'team_url': base_url + team_url,
        'team_name': bs4_text.split('Purse')[0],
        'purse_start_at': float(purse_n_squad[0]),
        'purse_remaining': float(purse_n_squad[1]),
        'squad_size': int(purse_n_squad[2])
    }
    return team_info

In [191]:
def scrape_ipl_2023_teams():
    
    ipl_2023_teams = make_request_n_soup(
        'https://www.cricbuzz.com/cricket-series/ipl-2023/auction/teams',
        'a', 'cb-col cb-col-100 cb-font-14'
    )
    print('Extracting team info from soup...')
    
    teams = []
    for team in ipl_2023_teams:
        team_info = extract_team_info(team['href'], team.text)
        teams.append(team_info)
    
    print('Saving teams...')
    pd.DataFrame(teams).to_csv('IPL 2023 teams.csv', index=False)
    print('Done.')
    return {team['team_name']: team['team_url'] for team in teams}

In [192]:
def scrape_squads():
    
    teams = scrape_ipl_2023_teams()
    players_info = []
    
    for team in teams.items():
        print(f'Scraping {team[0]}...')
        players = make_request_n_soup(
            team[1], 'div',
            'cb-col cb-col-100 cb-font-14 cb-brdr-thin-btm cb-schdl'
        )
        
        print('Extracting player details...')
        for player in players:
            player = list(filter(None, player.text.strip().replace('\xa0', '').split('  ')))
            player_info = {
                'player_name': player[0],
                'player_type': player[1].split(' • ')[0],
                'player_auction_price': player[2].split()[-1],
                'retained_status': 'Yes' if 'RETAINED' in player[2] else 'No',
                'ipl_team': team[0],
                'player_country': player[1].split(' • ')[1]
            }
            players_info.append(player_info)
        print('Done.')
        time.sleep(5)
    
    return players_info

In [193]:
squads = scrape_squads()

Making request...
Making soup...
Extracting team info from soup...
Saving teams...
Done.
Scraping CSK...
Making request...
Making soup...
Extracting player details...
Done.
Scraping RCB...
Making request...
Making soup...
Extracting player details...
Done.
Scraping DC...
Making request...
Making soup...
Extracting player details...
Done.
Scraping GT...
Making request...
Making soup...
Extracting player details...
Done.
Scraping KKR...
Making request...
Making soup...
Extracting player details...
Done.
Scraping LSG...
Making request...
Making soup...
Extracting player details...
Done.
Scraping MI...
Making request...
Making soup...
Extracting player details...
Done.
Scraping PBKS...
Making request...
Making soup...
Extracting player details...
Done.
Scraping RR...
Making request...
Making soup...
Extracting player details...
Done.
Scraping SRH...
Making request...
Making soup...
Extracting player details...
Done.


In [194]:
pd.DataFrame(squads).to_csv('IPL 2023 squads.csv', index=False)