In [1]:
import requests
import html5lib
from bs4 import BeautifulSoup
import pandas as pd
import datetime
import numpy as np
from os import path
import pickle
import csv
import os

In [2]:
IPL_ID=117
TEAMS = [
    ['CSK',4343],
    ['deccan chargers',4347],
    ['DD',4344],
    ['GL',5845],
    ['KXIP',4342],
    ['KTK',4788],
    ['KKR',4341],
    ['MI',4346],
    ['PW',4787],
    ['RR',4345],
    ['RPS',5843],
    ['RCB',4340],
    ['SH',5143]]

BATAVG_BASEURL = 'https://stats.espncricinfo.com/ci/engine/records/averages/batting.html?id={};team={};type=trophy'
BOWLAVG_BASEURL = 'https://stats.espncricinfo.com/ci/engine/records/averages/bowling.html?id={};team={};type=trophy'
PLAYER_BASEURL = 'https://www.espncricinfo.com/england/content/player/{}.html'

PLAYER_CSV_PATH = os.path.abspath(os.getcwd()) + '//data//players_data.csv'
PROCESSED_PLAYERS_LOG = os.path.abspath(os.getcwd()) + '//data//processed_players.pkl'

In [3]:
def init():
    global players_df, players_processed, players_column
    # Load Files Processed List
    if(path.isfile(PROCESSED_PLAYERS_LOG) == False):
        players_processed = set()
    else:
        with open(PROCESSED_PLAYERS_LOG, "rb") as file_handle:
            players_processed = pickle.load(file_handle)
            
    players_column = ['player_code', 'player_id', 'full_name', 'country', 'role', 'batting_style', 'bowling_style', 'born']
    if(path.isfile(PLAYER_CSV_PATH) == False):
        players_df = pd.DataFrame(columns=players_column)
    else:
        players_df = pd.read_csv(PLAYER_CSV_PATH)

In [4]:
def save_processed_files():
    with open(PROCESSED_PLAYERS_LOG, "wb") as file_handle:
        pickle.dump(players_processed, file_handle)
        
    players_df.to_csv(PLAYER_CSV_PATH, index = False, header=True)

In [5]:
init()
for team in TEAMS:
    url_list = [BATAVG_BASEURL.format(IPL_ID, team[1]), BOWLAVG_BASEURL.format(IPL_ID, team[1])]
    for url in url_list:
        print("Processing URL: ", url)
        data = requests.get(url).content
        soup = BeautifulSoup(data, 'html5lib')
        rows = soup.findAll('a', attrs={'class':'data-link'})
        player_data = []
        for entry in rows:
            player = {}
            player['player_code'] = entry.string
            player['player_id'] = entry['href'].split('/')[-1][:-5]
            player_data.append(player)
        if(len(player_data) > 0):
            players_df = players_df.append(player_data)
players_df = players_df.drop_duplicates()
players_df.index = players_df['player_id']

# save_processed_files()

Processing URL:  https://stats.espncricinfo.com/ci/engine/records/averages/batting.html?id=117;team=4343;type=trophy
Processing URL:  https://stats.espncricinfo.com/ci/engine/records/averages/bowling.html?id=117;team=4343;type=trophy
Processing URL:  https://stats.espncricinfo.com/ci/engine/records/averages/batting.html?id=117;team=4347;type=trophy
Processing URL:  https://stats.espncricinfo.com/ci/engine/records/averages/bowling.html?id=117;team=4347;type=trophy
Processing URL:  https://stats.espncricinfo.com/ci/engine/records/averages/batting.html?id=117;team=4344;type=trophy
Processing URL:  https://stats.espncricinfo.com/ci/engine/records/averages/bowling.html?id=117;team=4344;type=trophy
Processing URL:  https://stats.espncricinfo.com/ci/engine/records/averages/batting.html?id=117;team=5845;type=trophy
Processing URL:  https://stats.espncricinfo.com/ci/engine/records/averages/bowling.html?id=117;team=5845;type=trophy
Processing URL:  https://stats.espncricinfo.com/ci/engine/record

In [7]:
players_df

Unnamed: 0_level_0,player_code,player_id,full_name,country,role,batting_style,bowling_style,born
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
290629,P Amarnath,290629,,India,,Right-hand bat,Right-arm medium,"\nJune 1, 1982"
26798,S Anirudha,26798,,India,Top-order batsman,Right-hand bat,,"\nApril 14, 1987, Madras (now Chennai), Tamil ..."
319745,KB Arun Karthik,319745,,India,Batsman,Right-hand bat,Legbreak googly,"\nFebruary 15, 1986, Walajapet, Tamil Nadu"
26421,R Ashwin,26421,,India,Bowling allrounder,Right-hand bat,Right-arm offbreak,"\nSeptember 17, 1986, Madras (now Chennai), Ta..."
1083030,KM Asif,1083030,,India,Bowler,Right-hand bat,Right-arm medium,"\nJuly 24, 1993, Edavanna,Malappuram"
...,...,...,...,...,...,...,...,...
25913,Mohammad Nabi,25913,,,,,,
793463,Rashid Khan,793463,,,,,,
319744,X Thalaivan Sargunam,319744,,,,,,
720465,Virat Singh,720465,,,,,,


In [6]:
print(len(players_df))

1167


In [21]:
for index in players_df.index:
    if(index in players_processed):
        continue
    player_url = PLAYER_BASEURL.format(index)
    print("Processing Player: ", players_df['player_code'][index], " URL:", player_url)
    data = requests.get(player_url).content
    soup = BeautifulSoup(data, 'html5lib')
    player_head_div = soup.find('div', attrs={'class':'ciPlayernametxt'})
    full_name = player_head_div.find('h1').string
    country = player_head_div.find('h3', attrs={'class':'PlayersSearchLink'}).string
    players_df['full_name'][index] = full_name
    players_df['country'][index] = country
    player_attributes = soup.findAll('p', attrs={'class':'ciPlayerinformationtxt'})
    for attribute in player_attributes:
        data_desc = attribute.find('b').string
        if data_desc == 'Born':
            players_df['born'][index] = attribute.find('span').string
        elif data_desc == 'Batting style':
            players_df['batting_style'][index] = attribute.find('span').string
        elif data_desc == 'Bowling style':
            players_df['bowling_style'][index] = attribute.find('span').string
        elif data_desc == 'Playing role':
            players_df['role'][index] = attribute.find('span').string
    players_processed.add(index)
save_processed_files()

Processing Player:  DE Bollinger  URL: https://www.espncricinfo.com/england/content/player/4508.html
Processing Player:  DJ Bravo  URL: https://www.espncricinfo.com/england/content/player/51439.html
Processing Player:  DL Chahar  URL: https://www.espncricinfo.com/england/content/player/447261.html
Processing Player:  PP Chawla  URL: https://www.espncricinfo.com/england/content/player/32966.html
Processing Player:  SM Curran  URL: https://www.espncricinfo.com/england/content/player/662973.html
Processing Player:  MS Dhoni  URL: https://www.espncricinfo.com/england/content/player/28081.html
Processing Player:  F du Plessis  URL: https://www.espncricinfo.com/england/content/player/44828.html
Processing Player:  SP Fleming  URL: https://www.espncricinfo.com/england/content/player/37000.html
Processing Player:  A Flintoff  URL: https://www.espncricinfo.com/england/content/player/12856.html
Processing Player:  RD Gaikwad  URL: https://www.espncricinfo.com/england/content/player/1060380.html


Processing Player:  Harmeet Singh  URL: https://www.espncricinfo.com/england/content/player/391128.html
Processing Player:  DJ Harris  URL: https://www.espncricinfo.com/england/content/player/5583.html
Processing Player:  RJ Harris  URL: https://www.espncricinfo.com/england/content/player/5779.html
Processing Player:  IR Jaggi  URL: https://www.espncricinfo.com/england/content/player/279554.html
Processing Player:  Jaskaran Singh  URL: https://www.espncricinfo.com/england/content/player/376102.html
Processing Player:  AA Jhunjhunwala  URL: https://www.espncricinfo.com/england/content/player/29702.html
Processing Player:  D Kalyankrishna  URL: https://www.espncricinfo.com/england/content/player/30719.html
Processing Player:  VVS Laxman  URL: https://www.espncricinfo.com/england/content/player/30750.html
Processing Player:  MJ Lumb  URL: https://www.espncricinfo.com/england/content/player/16406.html
Processing Player:  CA Lynn  URL: https://www.espncricinfo.com/england/content/player/326

Processing Player:  AB McDonald  URL: https://www.espncricinfo.com/england/content/player/6553.html
Processing Player:  GD McGrath  URL: https://www.espncricinfo.com/england/content/player/6565.html
Processing Player:  MF Maharoof  URL: https://www.espncricinfo.com/england/content/player/49638.html
Processing Player:  AD Mathews  URL: https://www.espncricinfo.com/england/content/player/49764.html
Processing Player:  GJ Maxwell  URL: https://www.espncricinfo.com/england/content/player/325026.html
Processing Player:  BMAJ Mendis  URL: https://www.espncricinfo.com/england/content/player/49700.html
Processing Player:  Mohammad Asif  URL: https://www.espncricinfo.com/england/content/player/41411.html
Processing Player:  Mohammed Shami  URL: https://www.espncricinfo.com/england/content/player/481896.html
Processing Player:  M Morkel  URL: https://www.espncricinfo.com/england/content/player/46538.html
Processing Player:  C Munro  URL: https://www.espncricinfo.com/england/content/player/232359

Processing Player:  F Behardien  URL: https://www.espncricinfo.com/england/content/player/44410.html
Processing Player:  BA Bhatt  URL: https://www.espncricinfo.com/england/content/player/433410.html
Processing Player:  Bipul Sharma  URL: https://www.espncricinfo.com/england/content/player/35928.html
Processing Player:  MS Bisla  URL: https://www.espncricinfo.com/england/content/player/27280.html
Processing Player:  RS Bopara  URL: https://www.espncricinfo.com/england/content/player/10582.html
Processing Player:  RR Bose  URL: https://www.espncricinfo.com/england/content/player/27572.html
Processing Player:  KC Cariappa  URL: https://www.espncricinfo.com/england/content/player/777537.html
Processing Player:  SD Chitnis  URL: https://www.espncricinfo.com/england/content/player/337916.html
Processing Player:  SS Cottrell  URL: https://www.espncricinfo.com/england/content/player/495551.html
Processing Player:  P Dharmani  URL: https://www.espncricinfo.com/england/content/player/28067.html

Processing Player:  A Chopra  URL: https://www.espncricinfo.com/england/content/player/27639.html
Processing Player:  TK Curran  URL: https://www.espncricinfo.com/england/content/player/550235.html
Processing Player:  DB Das  URL: https://www.espncricinfo.com/england/content/player/328163.html
Processing Player:  C de Grandhomme  URL: https://www.espncricinfo.com/england/content/player/55395.html
Processing Player:  M de Lange  URL: https://www.espncricinfo.com/england/content/player/393279.html
Processing Player:  JL Denly  URL: https://www.espncricinfo.com/england/content/player/12454.html
Processing Player:  LH Ferguson  URL: https://www.espncricinfo.com/england/content/player/493773.html
Processing Player:  SC Ganguly  URL: https://www.espncricinfo.com/england/content/player/28779.html
Processing Player:  RS Gavaskar  URL: https://www.espncricinfo.com/england/content/player/28792.html
Processing Player:  AN Ghosh  URL: https://www.espncricinfo.com/england/content/player/220435.html

Processing Player:  C Nanda  URL: https://www.espncricinfo.com/england/content/player/32093.html
Processing Player:  GR Napier  URL: https://www.espncricinfo.com/england/content/player/17991.html
Processing Player:  A Nel  URL: https://www.espncricinfo.com/england/content/player/46569.html
Processing Player:  HH Pandya  URL: https://www.espncricinfo.com/england/content/player/625371.html
Processing Player:  KH Pandya  URL: https://www.espncricinfo.com/england/content/player/471342.html
Processing Player:  JL Pattinson  URL: https://www.espncricinfo.com/england/content/player/272465.html
Processing Player:  RV Pawar  URL: https://www.espncricinfo.com/england/content/player/32305.html
Processing Player:  RJ Peterson  URL: https://www.espncricinfo.com/england/content/player/46750.html
Processing Player:  KA Pollard  URL: https://www.espncricinfo.com/england/content/player/230559.html
Processing Player:  SM Pollock  URL: https://www.espncricinfo.com/england/content/player/46774.html
Proces

Processing Player:  BA Stokes  URL: https://www.espncricinfo.com/england/content/player/311158.html
Processing Player:  SW Tait  URL: https://www.espncricinfo.com/england/content/player/8076.html
Processing Player:  O Thomas  URL: https://www.espncricinfo.com/england/content/player/914567.html
Processing Player:  SK Trivedi  URL: https://www.espncricinfo.com/england/content/player/35353.html
Processing Player:  AJ Turner  URL: https://www.espncricinfo.com/england/content/player/500268.html
Processing Player:  A Uniyal  URL: https://www.espncricinfo.com/england/content/player/35586.html
Processing Player:  AC Voges  URL: https://www.espncricinfo.com/england/content/player/8119.html
Processing Player:  SK Warne  URL: https://www.espncricinfo.com/england/content/player/8166.html
Processing Player:  DH Yagnik  URL: https://www.espncricinfo.com/england/content/player/36071.html
Processing Player:  Younis Khan  URL: https://www.espncricinfo.com/england/content/player/43652.html
Processing Pl