In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import warnings
warnings.filterwarnings('ignore')
from ipl_parser import IPLParser
pd.set_option('display.max_rows', 200)

In [2]:
Input_Filepath = 'D:\\Datasets\\IPL_2024\\RCB VS CSK'
Output_FilePath = 'D:\\Datasets\\IPL_2024\\Data Files'
stadium = 'MA Chidambaram Stadium, Chepauk, Chennai'

parser = IPLParser(Input_Filepath, Output_FilePath,stadium)

In [3]:
name_mapping = {
    'Deshpande': 'Tushar Deshpande',
    'Mustafizur': 'Mustafizur Rahman',
    'Theekshana': 'Maheesh Theekshana',
    'Chahar': 'Deepak Chahar',
    'Jadeja': 'Ravindra Jadeja',
    'Alzarri': 'Alzarri Joseph',
    'Siraj': 'Mohammed Siraj',
    'Green': 'Cameron Green',
    'Maxwell': 'Glenn Maxwell',
    'Dagar': 'Mayank Dagar',
    'Karn': 'Karn Sharma',
    'Yash': 'Yash Dayal',
    'Karthik': 'Dinesh Karthik',
    'Rawat': 'Anuj Rawat',
    'Kohli': 'Virat Kohli',
    'Patidar': 'Rajat Patidar',
    'du': 'Faf du Plessis',
    'Dube': 'Shivam Dube',
    'Mitchell': 'Daryl Mitchell',
    'Rahane': 'Ajinkya Rahane',
    'Ravindra': 'Rachin Ravindra',
    'Gaikwad': 'Ruturaj Gaikwad'
}


player_details = {
    'Tushar Deshpande': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm fast-medium'},
    'Mustafizur Rahman': {'batting_side': 'Left-handed', 'bowling_side': 'Left-arm fast-medium'},
    'Maheesh Theekshana': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm leg-spin'},
    'Deepak Chahar': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm medium-fast'},
    'Ravindra Jadeja': {'batting_side': 'Left-handed', 'bowling_side': 'Left-arm orthodox'},
    'Alzarri Joseph': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm fast'},
    'Mohammed Siraj': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm fast-medium'},
    'Cameron Green': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm fast-medium'},
    'Glenn Maxwell': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm off-spin'},
    'Mayank Dagar': {'batting_side': 'Left-handed', 'bowling_side': 'Slow left-arm orthodox'},
    'Karn Sharma': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm leg-spin'},
    'Yash Dayal': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm medium'},
    'Dinesh Karthik': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm off-spin'},
    'Anuj Rawat': {'batting_side': 'Right-handed', 'bowling_side': 'None'},
    'Virat Kohli': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm medium'},
    'Rajat Patidar': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm medium'},
    'Faf du Plessis': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm leg-spin'},
    'Shivam Dube': {'batting_side': 'Left-handed', 'bowling_side': 'Right-arm medium-fast'},
    'Daryl Mitchell': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm medium-fast'},
    'Ajinkya Rahane': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm medium'},
    'Rachin Ravindra': {'batting_side': 'Left-handed', 'bowling_side': 'Slow left-arm orthodox'},
    'Ruturaj Gaikwad': {'batting_side': 'Right-handed', 'bowling_side': 'Right-arm off-spin'}
}

number_mapping = {
    'W' : '0',
    'â€¢' : '0'
}

In [4]:
mydata = parser.getMatchData()

In [5]:
mydata.shape

(249, 6)

In [6]:
def get_bowler_batter_outcom(x):
    return re.search(r'^\w+', x).group() , re.search(r'to\s+(\w+)', x).group(1) , re.search(r'<span>(.*?)<\/span>', x).group(1)\

def get_ball_details(x):
    return re.search(r'<span>(.*?)<\/span>', str(x)).group(1)

In [7]:
mydata[['Bowler','Batter','Outcome']] = mydata['batter_bowler'].apply(lambda x : pd.Series(get_bowler_batter_outcom(x)))

mydata['Runs'] = mydata['ball_details'].apply(lambda x : get_ball_details(x))

mydata[['Batter','Bowler']] = mydata[['Batter','Bowler']].replace(name_mapping)

In [8]:
mydata['Batting Style'] = ''
mydata['Bowling Style'] = ''

for index, row in mydata.iterrows():
    batter_name = row['Batter']
    bowler_name = row['Bowler']
    
    if batter_name in player_details:
        mydata.loc[index, 'Batting Style'] = player_details[batter_name]['batting_side']
    if bowler_name in player_details:
        mydata.loc[index, 'Bowling Style'] = player_details[bowler_name]['bowling_side']

In [9]:
mydata['Runs'] = mydata['Runs'].replace(number_mapping)

mydata['Runs'] = mydata['Runs'].str.replace('[A-Za-z]', '')

mydata['Runs'] = mydata['Runs'].apply(lambda x : int(x))

mydata['ball'] = mydata['overs'].apply(lambda x : int(x.split('.')[1]))

mydata['over'] = mydata['overs'].apply(lambda x : int(x.split('.')[0]))

In [10]:
mydata.drop(columns=['ball_details','batter_bowler','overs'],inplace=True)

mydata = mydata[['innings','over','ball','Batter','Bowler','Runs','Outcome','Batting Style','Bowling Style','comments','venue']]

mydata.sort_values(by=['innings','over','ball'],ignore_index = True , inplace=True)

In [25]:
#mydata.to_csv('D:\\Datasets\\IPL_2024\\Data Files\\Clean_Data\\RCB_VS_CSK.csv',index=False)