In [1]:
import pandas as pd
import ssl 
import os
import logging
from pathlib import Path
import time
import requests
from bs4 import BeautifulSoup


ssl._create_default_https_context = ssl._create_unverified_context

Table: 'career_avg_bowler'

In [2]:
def career_avg_bowler(player_ids):

    career_avg_bowler_merged = pd.DataFrame()
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    for p_id in player_ids:
        url = "https://stats.espncricinfo.com/ci/engine/player/" + p_id + ".html?class=2;template=results;type=bowling"
        response = requests.get(url, headers=headers)

        dismissal_url = "https://stats.espncricinfo.com/ci/engine/player/" + p_id + ".html?class=2;template=results;type=bowling;view=dismissal_summary"
        dismissal_response = requests.get(dismissal_url, headers=headers)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tables = pd.read_html(str(soup))

            if len(tables) > 2 and "Span" in tables[2].columns:
                career_avg_bowler = tables[2]
                career_avg_bowler = career_avg_bowler[["Span","Inns","Overs","Mdns","Runs","Wkts","Ave","Econ","SR"]]
                career_avg_bowler.insert(0, "P_id", [p_id], True)
                
        if dismissal_response.status_code == 200:
            dismissal_soup = BeautifulSoup(dismissal_response.content, 'html.parser')
            dismissal_tables = pd.read_html(str(dismissal_soup))
                    
            if len(dismissal_tables) > 2 and "Grouping" in dismissal_tables[3].columns:
                dismissal = dismissal_tables[3]
                dismissal_types = ["caught", "bowled", "leg before wicket"]
                dismissal.set_index("Grouping", inplace=True)
                dismissal = dismissal.loc[dismissal_types, ["Dis"]]
                dismissal_dict = dismissal.to_dict()
                dismissal = pd.DataFrame.from_dict(dismissal_dict.values())
                        
                career_avg_bowler = pd.concat([career_avg_bowler, dismissal], axis=1)
                
                career_avg_bowler_merged = pd.concat([career_avg_bowler_merged, career_avg_bowler], ignore_index=True)
            time.sleep(0.1)
        else:
            print(f"Failed to retrieve data for player ID {p_id}, status code: {response.status_code}")

    filepath = Path('Csv/Career_avg_bowler.csv')
    career_avg_bowler_merged.to_csv(filepath, index_label="row_id")
    print(career_avg_bowler_merged)


Table: 'vs_country_bowler'

In [3]:
def vs_country_bowler(player_ids):

    vs_country_bowler_merged = pd.DataFrame()
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    for p_id in player_ids:
        url = "https://stats.espncricinfo.com/ci/engine/player/" + p_id + ".html?class=2;template=results;type=bowling"
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tables = pd.read_html(str(soup))

            if len(tables) > 3:
                vs_country_bowler = tables[3]
                vs_country_bowler = vs_country_bowler[["Grouping","Inns","Overs","Mdns","Runs","Wkts","Ave","Econ","SR"]]
                vs_country_bowler.rename(columns={'Grouping': 'Country'}, inplace=True)
                vs_country_bowler.insert(0, "P_id", p_id, True)

                countries = ['v Pakistan', 'v Australia', 'v England', 'v New Zealand', 'v West Indies', 'v Sri Lanka', 'v Bangladesh', 'v South Africa']
                select_vs_country_bowler = vs_country_bowler.loc[vs_country_bowler['Country'].isin(countries)]
                
                vs_country_bowler_merged = pd.concat([vs_country_bowler_merged, select_vs_country_bowler], ignore_index=True)
            time.sleep(0.1)
        else:
            print(f"Failed to retrieve data for player ID {p_id}, status code: {response.status_code}")

    filepath = Path('Csv/Vs_country_bowler.csv')
    vs_country_bowler_merged.to_csv(filepath, index_label="row_id")
    print(vs_country_bowler_merged)

Table: 'home_vs_away_bowler'

In [4]:
def home_vs_away_bowler(player_ids):

    home_vs_away_bowler_merged = pd.DataFrame()
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    for p_id in player_ids:
        url = "https://stats.espncricinfo.com/ci/engine/player/" + p_id + ".html?class=2;template=results;type=bowling"
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tables = pd.read_html(str(soup))

            if len(tables) > 3:
                home_vs_away_bowler = tables[3]
                home_vs_away_bowler = home_vs_away_bowler[["Grouping","Inns","Overs","Mdns","Runs","Wkts","Ave","Econ","SR"]]
                home_vs_away_bowler.rename(columns={'Grouping': 'Venue'}, inplace=True)
                home_vs_away_bowler.insert(0, "P_id", p_id, True)

                venues = ['home', 'away', 'neutral']
                select_home_vs_away_bowler = home_vs_away_bowler.loc[home_vs_away_bowler['Venue'].isin(venues)]
                
                home_vs_away_bowler_merged = pd.concat([home_vs_away_bowler_merged, select_home_vs_away_bowler], ignore_index=True)
            time.sleep(0.1)
        else:
            print(f"Failed to retrieve data for player ID {p_id}, status code: {response.status_code}")

    filepath = Path('Csv/Home_vs_away_bowler.csv')
    home_vs_away_bowler_merged.to_csv(filepath, index_label="row_id")
    print(home_vs_away_bowler_merged)

Table: 'yearly_stats_bowler'

In [5]:
def yearly_stats_bowler(player_ids):

    yearly_stats_bowler_merged = pd.DataFrame()
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}

    for p_id in player_ids:
        url = "https://stats.espncricinfo.com/ci/engine/player/" + p_id + ".html?class=2;template=results;type=bowling"
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tables = pd.read_html(str(soup))

            if len(tables) > 3:
                yearly_stats_bowler = tables[3]
                yearly_stats_bowler = yearly_stats_bowler[["Grouping","Inns","Overs","Mdns","Runs","Wkts","Ave","Econ","SR"]]
                yearly_stats_bowler.rename(columns={'Grouping': 'Year'}, inplace=True)
                yearly_stats_bowler.insert(0, "P_id", p_id, True)

                select_yearly_stats_bowler = yearly_stats_bowler.loc[yearly_stats_bowler['Year'].str.startswith('year ', na=False)]
                yearly_stats_bowler_merged = pd.concat([yearly_stats_bowler_merged, select_yearly_stats_bowler], ignore_index=True)
            time.sleep(0.1)
        else:
            print(f"Failed to retrieve data for player ID {p_id}, status code: {response.status_code}")

    filepath = Path('Csv/Yearly_stats_bowler.csv')
    yearly_stats_bowler_merged.to_csv(filepath, index_label="row_id")
    print(yearly_stats_bowler_merged)

#Main function below:

In [6]:
#234675: Ravindrasinh Anirudhsinh Jadeja (ALL)
#625371: Hardik Himanshu Pandya (ALL)
#26421: Ravichandran Ashwin (ALL)
#625383: Jasprit Jasbirsingh Bumrah (BOW)
#481896: Mohammed Shami Ahmed (BOW)
#326016: Bhuvneshwar Kumar Singh (BOW)
#376116: Umeshkumar Tilak Yadav (BOW)

player_ids=['234675', '625371', '26421', '625383', '481896', '326016', '376116']

os.system("rm -f Csv/*_bowler.csv")

career_avg_bowler(player_ids)
vs_country_bowler(player_ids)
home_vs_away_bowler(player_ids)
yearly_stats_bowler(player_ids)


  tables = pd.read_html(str(soup))
  dismissal_tables = pd.read_html(str(dismissal_soup))
  tables = pd.read_html(str(soup))
  dismissal_tables = pd.read_html(str(dismissal_soup))
  tables = pd.read_html(str(soup))
  dismissal_tables = pd.read_html(str(dismissal_soup))
  tables = pd.read_html(str(soup))
  dismissal_tables = pd.read_html(str(dismissal_soup))
  tables = pd.read_html(str(soup))
  dismissal_tables = pd.read_html(str(dismissal_soup))
  tables = pd.read_html(str(soup))
  dismissal_tables = pd.read_html(str(dismissal_soup))
  tables = pd.read_html(str(soup))
  dismissal_tables = pd.read_html(str(dismissal_soup))


     P_id       Span  Inns   Overs  Mdns  Runs  Wkts    Ave  Econ    SR  \
0  234675  2009-2023   189  1625.0    56  7936   220  36.07  4.88  44.3   
1  625371  2016-2023    80   533.1    15  2960    84  35.23  5.55  38.0   
2   26421  2010-2023   114  1050.3    37  5180   156  33.20  4.93  40.4   
3  625383  2016-2023    88   763.2    57  3509   149  23.55  4.59  30.7   
4  481896  2013-2023   100   830.5    51  4618   195  23.68  5.55  25.5   
5  326016  2012-2022   120   974.3    68  4951   141  35.11  5.08  41.4   
6  376116  2010-2018    73   593.0    23  3565   106  33.63  6.01  33.5   

   caught  bowled  leg before wicket  
0   103.0    58.0               37.0  
1    71.0     8.0                5.0  
2    84.0    29.0               25.0  
3    90.0    45.0               14.0  
4   122.0    61.0               12.0  
5   105.0    26.0               10.0  
6    76.0    20.0               10.0  


  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vs_country_bowler.rename(columns={'Grouping': 'Country'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vs_country_bowler.rename(columns={'Grouping': 'Country'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vs_country_bowler.rename(columns={'Grouping': 'Country'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to

      P_id         Country Inns  Overs Mdns  Runs Wkts    Ave  Econ    SR
0   234675     v Australia   41  378.3    6  1985   37  53.64  5.24  61.3
1   234675    v Bangladesh   13  108.0    2   481   14  34.35  4.45  46.2
2   234675       v England   25  199.3    4   952   39  24.41  4.77  30.6
3   234675   v New Zealand   14  125.0    2   654    8  81.75  5.23  93.7
4   234675      v Pakistan   11   97.5    5   474   12  39.50  4.84  48.9
5   234675  v South Africa   10   89.1    4   402   15  26.80  4.50  35.6
6   234675     v Sri Lanka   28  223.5    8  1081   28  38.60  4.82  47.9
7   234675   v West Indies   32  268.1   16  1315   44  29.88  4.90  36.5
8   625371     v Australia   11   62.0    0   399   12  33.25  6.43  31.0
9   625371    v Bangladesh    3   14.3    0   102    3  34.00  7.03  29.0
10  625371       v England   11   83.3    4   493   12  41.08  5.90  41.7
11  625371   v New Zealand   14   95.0    4   545   15  36.33  5.73  38.0
12  625371      v Pakistan    6   41.5

  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_vs_away_bowler.rename(columns={'Grouping': 'Venue'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_vs_away_bowler.rename(columns={'Grouping': 'Venue'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_vs_away_bowler.rename(columns={'Grouping': 'Venue'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to

      P_id    Venue Inns  Overs Mdns  Runs Wkts    Ave  Econ    SR
0   234675     home   77  701.5   25  3393  111  30.56  4.83  37.9
1   234675     away   66  535.5   18  2682   52  51.57  5.00  61.8
2   234675  neutral   46  387.2   13  1861   57  32.64  4.80  40.7
3   625371     home   31  195.3    5  1161   35  33.17  5.93  33.5
4   625371     away   33  211.5    5  1104   33  33.45  5.21  38.5
5   625371  neutral   16  125.5    5   695   16  43.43  5.52  47.1
6    26421     home   45  422.4    9  2129   70  30.41  5.03  36.2
7    26421     away   42  371.1   14  1875   42  44.64  5.05  53.0
8    26421  neutral   27  256.4   14  1176   44  26.72  4.58  35.0
9   625383     home   41  357.3   25  1709   64  26.70  4.78  33.5
10  625383     away   29  250.0   18  1117   55  20.30  4.46  27.2
11  625383  neutral   18  155.5   14   683   30  22.76  4.38  31.1
12  481896     home   44  353.5   27  2062   85  24.25  5.82  24.9
13  481896     away   39  335.3   15  1843   75  24.57  5.49  

  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yearly_stats_bowler.rename(columns={'Grouping': 'Year'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yearly_stats_bowler.rename(columns={'Grouping': 'Year'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yearly_stats_bowler.rename(columns={'Grouping': 'Year'}, inplace=True)
  tables = pd.read_html(str(soup))
A value is trying to be

      P_id       Year Inns  Overs Mdns  Runs Wkts    Ave  Econ     SR
0   234675  year 2009   13   97.4    4   506   10  50.60  5.18   58.6
1   234675  year 2010   20  158.0    7   739   19  38.89  4.67   49.8
2   234675  year 2011   13  112.0    5   555   24  23.12  4.95   28.0
3   234675  year 2012    9   71.4    0   390    4  97.50  5.44  107.5
4   234675  year 2013   34  303.3   22  1321   52  25.40  4.35   35.0
..     ...        ...  ...    ...  ...   ...  ...    ...   ...    ...
62  376116  year 2014    9   71.2    3   381   17  22.41  5.34   25.1
63  376116  year 2015   14  110.2    6   632   25  25.28  5.72   26.4
64  376116  year 2016   10   88.5    2   581   15  38.73  6.54   35.5
65  376116  year 2017    8   67.2    3   388   15  25.86  5.76   26.9
66  376116  year 2018    4   39.5    0   275    4  68.75  6.90   59.7

[67 rows x 10 columns]


  tables = pd.read_html(str(soup))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yearly_stats_bowler.rename(columns={'Grouping': 'Year'}, inplace=True)
