In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from io import StringIO



In [3]:


headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}


In [4]:

# Initialize current_df correctly
current_df = pd.DataFrame(columns=["Player"])

year = 2010
for i in range(3):
    url = f'https://stats.espncricinfo.com/ci/engine/stats/index.html?class=3;spanmax1=10+Jul+{year};spanval1=span;template=results;type=batting'
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check if the request was successful

        # Parse the HTML content
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all tables in the page
        tables = soup.find_all('table')

        # Convert the first table to a pandas DataFrame
        if tables:
            df = pd.DataFrame(pd.read_html(StringIO(str(tables[2])))[0])
            if 'Player' in df.columns and 'Runs' in df.columns:
                df = df[['Player', 'Runs']]
                df.rename(columns={'Runs': f'{year}'}, inplace=True)
                current_df = pd.merge(current_df, df, on='Player', how='outer')

            else:
                print(f"'Player' or 'Runs' column not found for the year {year}")
        else:
            print("No tables found on the page.")
    except requests.HTTPError as e:
        print(f"HTTP error occurred: {e.response.status_code} - {e.response.reason}")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

    year = year + 1
    

print(current_df)


                   Player    2010    2011    2012
0         A Symonds (AUS)   337.0   337.0     NaN
1     AB de Villiers (SA)   579.0   604.0   680.0
2      Abdul Razzaq (PAK)     NaN   346.0     NaN
3        BB McCullum (NZ)  1100.0  1100.0  1352.0
4         BJ Haddin (AUS)   300.0   335.0     NaN
5        BRM Taylor (ZIM)     NaN     NaN   373.0
6      C Kieswetter (ENG)     NaN     NaN   411.0
7           CH Gayle (WI)   617.0   617.0   757.0
8      CJ Chibhabha (ZIM)     NaN     NaN   370.0
9          CL White (AUS)   505.0   519.0   632.0
10        DA Warner (AUS)   644.0   706.0   866.0
11          DJ Bravo (WI)   344.0   344.0   470.0
12        DJ Hussey (AUS)   579.0   622.0   752.0
13  DPMD Jayawardene (SL)   760.0   856.0   955.0
14       EJG Morgan (ENG)   360.0   520.0   603.0
15        G Gambhir (IND)   621.0   621.0   746.0
16          GC Smith (SA)   803.0   958.0   982.0
17      H Masakadza (ZIM)   329.0   411.0   559.0
18          HH Gibbs (SA)   400.0   400.0   400.0


In [7]:

# List of countries
countries = pd.read_csv(r'data/country_shortcodes.csv', dtype=str)  

print(countries.tail(5))

# row_index = countries[countries["Code"] == 'IND'].index[0]
# print(row_index)

# Function to get Shortcode based on Code
def get_row(input):
    rows = countries[countries["Code"] == input].index
    if len(rows) > 0:
        row_index = rows[0]
        if len(countries.loc[row_index, "Lowercase shortcode"]) == 2:
            return f'https://public.flourish.studio/country-flags/svg/{countries.loc[row_index, "Lowercase shortcode"]}.svg'
        else:
            return countries.loc[row_index, "Lowercase shortcode"]
    else:
        return 'Unknown'  # Or any default value you want to assign

# Apply function to get Shortcode for each country in current_df
current_df["Flag"] = current_df["Country"].apply(get_row)

print(current_df.head(5))

            Name  Code Shortcode  \
245        Yemen    YE      :YE:   
246       Zambia    ZM      :ZM:   
247     Zimbabwe   ZIM     :ZIM:   
248  West Indies    WI       NaN   
249     Scotland  SCOT       NaN   

                                   Lowercase shortcode  
245                                               :ye:  
246                                               :zm:  
247                                              :zim:  
248  https://upload.wikimedia.org/wikipedia/en/9/9b...  
249  https://upload.wikimedia.org/wikipedia/commons...  


KeyError: 'Country'

In [5]:
excel_file = "output.xlsx" 
 
# Write DataFrame to Excel 
current_df.to_excel(excel_file, index=False) 