In [683]:
import json
import pandas as pd
import re
import time
import requests
import numpy as np

In [684]:
# Base URL without the dynamic timestamp part
base_batsman_url = "https://ipl-stats-sports-mechanic.s3.ap-south-1.amazonaws.com/ipl/feeds/stats/203-toprunsscorers.js"
base_bowler_url = "https://ipl-stats-sports-mechanic.s3.ap-south-1.amazonaws.com/ipl/feeds/stats/203-mostwickets.js"
callback_param = "ontoprunsscorers"

# Generate current timestamp in milliseconds
timestamp_ms = int(time.time() * 1000)
print("*********** creating batsman url *************")
# Construct the full URL with dynamic timestamp
full_batsman_url = f"{base_batsman_url}?callback={callback_param}&_={timestamp_ms}"
print("*********** creating bowler url **************")
full_bowler_url = f"{base_bowler_url}?callback={callback_param}&_={timestamp_ms}"

print(full_batsman_url)
print(full_bowler_url)

print("********** started fetching data for batsman ************* ")
batsman_response = requests.get(full_batsman_url)
if batsman_response.status_code == 200:
    batsman_data = batsman_response.text  # Parse JSON response
    print("********** fetched successfully **********")
else:
    print(f"Request failed with status code {batsman_response.status_code}")
    # print(batsman_response.text)

print("extracting data into json")
batsman_json = json.loads(re.search(r'ontoprunsscorers\((\{.*\})\)', batsman_data).group(1))
batsman_df = pd.DataFrame(batsman_json["toprunsscorers"])

print("********** dropping irrelevant columns **********")
batsman_df.drop(columns = ['PlayerId','PlayerDOB', 'Nationality', 'TCompetitionID', 'TStrikerID', 'TTeamID', 
                           'TeamName', 'CompetitionID', 'TeamID', 'StrikerID', 'ClientPlayerID', 'Extras', 'DBFreq', 'BdryFreq',
                           'RPSS', 'ScoringBalls', 'Ones', 'Twos', 'Threes', 'NotOuts', 'BattingAveragesss', 'Dotballs', 
                           'DoubleCenturies', 'HighestScore', 'Centuries', 'Fours', 'Sixes', "RightHandedBat",], inplace=True)

cols_to_rename_bat = ['Innings', 'TotalRuns', 'StrikeRate', 'Balls',
                      'Outs', 'FiftyPlusRuns', 'BdryPercent', 'DBPercent']
batsman_df.rename(columns={col: f"{col}_Bat" for col in cols_to_rename_bat}, inplace=True)

print("**** writing data to batsman_performance_file ****")
batsman_df.to_csv("batsman_performance.csv", index=False)


print("********** started fetching data for bowler ************* ")
bowler_response = requests.get(full_bowler_url)
if bowler_response.status_code == 200:
    bowler_data = bowler_response.text  # Parse JSON response
    print("********** fetched successfully **********")
    
else:
    print(f"Request failed with status code {bowler_response.status_code}")
    # print(batsman_response.text)

print("extracting bowler data into json")
bowler_json = json.loads(re.search(r'onmostwickets\((\{.*\})\)', bowler_data).group(1))
bowler_df = pd.DataFrame(bowler_json["mostwickets"])

print(" dropping irrelevant columns ")
bowler_df = bowler_df.drop(columns = [
    'Nationality', 'TeamName','TeamID','CompetitionID', 'LegalBallsBowled', 'DotBallsBowled',
    'ScoringBallsBowled', 'Ones', 'Twos', 'Threes', 'Wides', 'NoBalls', 'Byes', 'LegBye', 'InningsRuns', 'BowlingSR',
    'Fours', 'Sixes', 'InningsWickets', 'MatchRuns', 'MatchWickets', 'BBIW', 'BBMW', 'Maidens', 'MaidenWickets',
    'TenWickets', 'ClientPlayerID', 'FiveWickets'])

bowler_df.rename(columns= {
    'BowlerID':'PlayerId'
},inplace = True)

cols_to_rename = ['Innings', 'DotBallPercent', 'BoundaryPercentage', 'Wickets', 'StrikeRate']
bowler_df.rename(columns={col: f"{col}_Bowler" for col in cols_to_rename}, inplace=True)

print(" writing data to batsman_performance_file")
bowler_df.to_csv("bawler_performance.csv", index=False)

*********** creating batsman url *************
*********** creating bowler url **************
https://ipl-stats-sports-mechanic.s3.ap-south-1.amazonaws.com/ipl/feeds/stats/203-toprunsscorers.js?callback=ontoprunsscorers&_=1748093401801
https://ipl-stats-sports-mechanic.s3.ap-south-1.amazonaws.com/ipl/feeds/stats/203-mostwickets.js?callback=ontoprunsscorers&_=1748093401801
********** started fetching data for batsman ************* 
********** fetched successfully **********
extracting data into json
********** dropping irrelevant columns **********
**** writing data to batsman_performance_file ****
********** started fetching data for bowler ************* 
********** fetched successfully **********
extracting bowler data into json
 dropping irrelevant columns 
 writing data to batsman_performance_file


In [685]:
# Load the CSV files
role_df = pd.read_csv("Batsman.csv")
bat_performance_df = pd.read_csv("batsman_performance.csv")
bowl_performance_df = pd.read_csv("bawler_performance.csv")
print("shape of unfiltered df ")
print(bat_performance_df.shape, bowl_performance_df.shape)

team_list= ['MI', 'DC', 'RR', 'CSK','RCB','LSG','SRH','GT','PBKS']
bowl_performance_df = bowl_performance_df[bowl_performance_df.TeamCode.isin(team_list)]
bat_performance_df = bat_performance_df[bat_performance_df.TeamCode.isin(team_list)]
print(bat_performance_df.shape, bowl_performance_df.shape)

shape of unfiltered df 
(149, 14) (100, 16)
(134, 14) (91, 16)


In [686]:
# Remove spaces from player names in both DataFrames
role_df["PlayerKey"] = role_df["Player_Name"].str.replace(" ", "").str.lower()
bat_performance_df["PlayerKey"] = bat_performance_df["StrikerName"].str.replace(" ", "").str.lower()
bowl_performance_df["PlayerKey"] = bowl_performance_df["BowlerName"].str.replace(" ", "").str.lower()

In [687]:
role_df.shape

(164, 7)

In [688]:
# Merge on the normalized keys using a left join to keep all Batsman.csv records
merged_bat_df = pd.merge(role_df, bat_performance_df, on="PlayerKey", how="inner")
merged_bat_df.dropna(subset = ["StrikerName"],inplace= True)
print(merged_bat_df.shape)
merged_bat_df.columns

(129, 21)


Index(['Id', 'Player_Name', 'Team', 'Credit_Point', 'Role', 'Speciality',
       'PlayerKey', 'StrikerName', 'Matches', 'TeamCode', 'Innings_Bat',
       'TotalRuns_Bat', 'Balls_Bat', 'StrikeRate_Bat', 'DBPercent_Bat',
       'BdryPercent_Bat', 'Outs_Bat', 'FiftyPlusRuns_Bat', 'BattingAverage',
       'Catches', 'Stumpings'],
      dtype='object')

In [689]:
merged_bowl_df = pd.merge(role_df, bowl_performance_df, on="PlayerKey", how="inner")
merged_bowl_df.columns

Index(['Id', 'Player_Name', 'Team', 'Credit_Point', 'Role', 'Speciality',
       'PlayerKey', 'BowlerName', 'RightHandedBat', 'TeamCode', 'Matches',
       'Innings_Bowler', 'PlayerId', 'TotalRunsConceded',
       'DotBallPercent_Bowler', 'BowlingAverage', 'StrikeRate_Bowler',
       'BoundaryPercentage_Bowler', 'BoundaryFrequency', 'EconomyRate',
       'OversBowled', 'Wickets_Bowler', 'FourWickets'],
      dtype='object')

In [690]:
merged_bowl_df.shape

(83, 23)

In [691]:
final_df= pd.merge(merged_bat_df, merged_bowl_df,on="PlayerKey",how="outer")
final_df.shape

(156, 43)

In [692]:
final_df.columns

Index(['Id_x', 'Player_Name_x', 'Team_x', 'Credit_Point_x', 'Role_x',
       'Speciality_x', 'PlayerKey', 'StrikerName', 'Matches_x', 'TeamCode_x',
       'Innings_Bat', 'TotalRuns_Bat', 'Balls_Bat', 'StrikeRate_Bat',
       'DBPercent_Bat', 'BdryPercent_Bat', 'Outs_Bat', 'FiftyPlusRuns_Bat',
       'BattingAverage', 'Catches', 'Stumpings', 'Id_y', 'Player_Name_y',
       'Team_y', 'Credit_Point_y', 'Role_y', 'Speciality_y', 'BowlerName',
       'RightHandedBat', 'TeamCode_y', 'Matches_y', 'Innings_Bowler',
       'PlayerId', 'TotalRunsConceded', 'DotBallPercent_Bowler',
       'BowlingAverage', 'StrikeRate_Bowler', 'BoundaryPercentage_Bowler',
       'BoundaryFrequency', 'EconomyRate', 'OversBowled', 'Wickets_Bowler',
       'FourWickets'],
      dtype='object')

In [693]:
# final_df[['PlayerKey', 'StrikeRate_x', 'StrikeRate_y']]

In [694]:
# Remove substring and duplicates (preserving order)
seen = set()
clean_col_name = []
for col in final_df.columns:
    clean_col = col.replace('_x', '')
    clean_col = clean_col.replace('_y', '')
    if clean_col not in seen:
        clean_col_name.append(clean_col)
        seen.add(clean_col)

print(clean_col_name)
for col in clean_col_name:
    # print(col)
    if col + '_x' in final_df.columns:
        final_df[col] = final_df[col + '_x'].combine_first(final_df[col + '_y'])
        final_df.drop(columns = [col + '_x', col + '_y'], inplace=True)

final_df.shape

['Id', 'Player_Name', 'Team', 'Credit_Point', 'Role', 'Speciality', 'PlayerKey', 'StrikerName', 'Matches', 'TeamCode', 'Innings_Bat', 'TotalRuns_Bat', 'Balls_Bat', 'StrikeRate_Bat', 'DBPercent_Bat', 'BdryPercent_Bat', 'Outs_Bat', 'FiftyPlusRuns_Bat', 'BattingAverage', 'Catches', 'Stumpings', 'BowlerName', 'RightHandedBat', 'Innings_Bowler', 'PlayerId', 'TotalRunsConceded', 'DotBallPercent_Bowler', 'BowlingAverage', 'StrikeRate_Bowler', 'BoundaryPercentage_Bowler', 'BoundaryFrequency', 'EconomyRate', 'OversBowled', 'Wickets_Bowler', 'FourWickets']


(156, 35)

In [695]:
final_df['Balls_Bat'] = final_df['Balls_Bat'].replace(0,np.nan)
final_df['StrikeRate_Bat'] = final_df['TotalRuns_Bat'] / final_df['Balls_Bat']
final_df['SRRI'] = round(final_df['StrikeRate_Bat'] * np.log10((final_df['Balls_Bat'] + 1) * (final_df['Innings_Bat'] + 1)), 2)

final_df['DBBI_Bat'] = round((1 - (final_df['DBPercent_Bat']/100)) + (final_df['BdryPercent_Bat']/100)*2, 2)

# Optional: Replace NaNs (e.g., if BallsFaced_Bat was 0) with 0 for safe viewing
final_df[['StrikeRate_Bat', 'SRRI']] = final_df[['StrikeRate_Bat', 'SRRI']].fillna(0)

In [696]:
final_df['BRRI'] = round((final_df['Wickets_Bowler']/final_df['Innings_Bowler'])*((6+final_df['DotBallPercent_Bowler'])/(final_df['EconomyRate']*1.5 + final_df['StrikeRate_Bowler'] + final_df['BowlingAverage']*0.5)), 2)

final_df['DBBI_Bowler'] = round((final_df['DotBallPercent_Bowler']/100) + (final_df['Wickets_Bowler']/final_df['OversBowled'])*2 - (final_df['BoundaryPercentage_Bowler']/100)*1.5, 2)

In [697]:
final_df.drop(columns =["Balls_Bat", "Innings_Bat", "StrikeRate_Bat",
                       'Wickets_Bowler','Innings_Bowler','DotBallPercent_Bowler','EconomyRate','StrikeRate_Bowler','BowlingAverage',
                       'StrikerName', 'BowlerName','Player_Name', 'Id','Team'],
              inplace = True)

In [698]:
final_df.columns

Index(['PlayerKey', 'TotalRuns_Bat', 'DBPercent_Bat', 'BdryPercent_Bat',
       'Outs_Bat', 'FiftyPlusRuns_Bat', 'BattingAverage', 'Catches',
       'Stumpings', 'RightHandedBat', 'PlayerId', 'TotalRunsConceded',
       'BoundaryPercentage_Bowler', 'BoundaryFrequency', 'OversBowled',
       'FourWickets', 'Credit_Point', 'Role', 'Speciality', 'Matches',
       'TeamCode', 'SRRI', 'DBBI_Bat', 'BRRI', 'DBBI_Bowler'],
      dtype='object')

In [702]:
final_df.to_csv("final_data.csv", index=False)

In [703]:
def playing_team(team1, team2, role_df, final_df):
    
    # Construct a name for the match
    match_name = f"{team1}_vs_{team2}"

    # Filter final_df for only the two teams
    combined_df = final_df[final_df['TeamCode'].isin([team1, team2])].copy()
    print(f"[INFO] Filtered {len(combined_df)} records from final_df for teams {team1} and {team2}")


    # Identify players present in role_df but missing in final_df
    missing_players = set(role_df[role_df['Team'].isin([team1, team2])]['PlayerKey']) - \
                      set(combined_df['PlayerKey'])
    print(f"[INFO] Missing players (in role_df but not in final_df): {missing_players}")

    # Save the result to a CSV
    output_filename = f"{match_name}_data.csv"
    combined_df.to_csv(output_filename, index=False)
    print(f"[INFO] Saved final data to {output_filename}")
    return combined_df


In [707]:
df_t = playing_team('PBKS', 'DC', role_df, final_df)

[INFO] Filtered 33 records from final_df for teams PBKS and DC
[INFO] Missing players (in role_df but not in final_df): {'tnatarajan'}
[INFO] Saved final data to PBKS_vs_DC_data.csv


In [712]:
playing_ls = ['arshdeepsingh', 'ashutoshsharma',
              'azmatullahomarzai','fafduplessis',
              'harpreetbrar', 'joshinglis', 'karunnair', 'klrahul', 'kuldeepyadav',
              'marcojansen', 'marcusstoinis', 'mohitsharma','mukeshkumar',
              'mustafizurrahman', 'nehalwadhera', 'prabhsimransingh', 'priyansharya', 'sameerrizvi',
              'shashanksingh', 'shreyasiyer',
              'tristanstubbs', 'viprajnigam']
  

In [713]:
df_t = df_t[df_t["PlayerKey"].isin(playing_ls)]
df_t.to_csv("df_t.csv", index=False)