In [109]:
# Libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re 

In [110]:
df = pd.read_csv('data/ipl_2024_schedule.csv')
df.head()

Unnamed: 0,Match,Team_1,Team_2,Player_of_the_Match,Venue,City,Match_info,Score_Card
0,CHENNAI SUPER KINGS VS ROYAL CHALLENGERS BENGA...,CSK,RCB,Mustafizur Rahman,MA Chidambaram Stadium,Chennai,https://www.cricbuzz.com/live-cricket-full-com...,https://www.cricbuzz.com/live-cricket-scorecar...
1,PUNJAB KINGS VS DELHI CAPITALS,PK,DC,Sam Curran,Maharaja Yadavindra Singh International Cricke...,Mullanpur,https://www.cricbuzz.com/live-cricket-full-com...,https://www.cricbuzz.com/live-cricket-scorecar...
2,KOLKATA KNIGHT RIDERS VS SUNRISERS HYDERABAD,KKR,SH,Andre Russell,Eden Gardens,Kolkata,https://www.cricbuzz.com/live-cricket-full-com...,https://www.cricbuzz.com/live-cricket-scorecar...
3,RAJASTHAN ROYALS VS LUCKNOW SUPER GIANTS,RR,LSG,Sanju Samson,Sawai Mansingh Stadium,Jaipur,https://www.cricbuzz.com/live-cricket-full-com...,https://www.cricbuzz.com/live-cricket-scorecar...
4,GUJARAT TITANS VS MUMBAI INDIANS,GT,MI,Sai Sudharsan,Narendra Modi Stadium,Ahmedabad,https://www.cricbuzz.com/live-cricket-full-com...,https://www.cricbuzz.com/live-cricket-scorecar...


In [111]:
df.shape

(74, 8)

### First Innings Batting

In [112]:
bat1 = []
def first_innings_batting(first_innings_bat,match_number,opposite_team):
    '''
    This Function Will Extract 1st innings Batting
    For Easy understanding See Sample Data for each match in Above Cell 🔝

    Input : first_innings_batting,match_number,opposite_team_name
    Output : Each player First Innings Batting 2D list
    '''
    # 1st Innings will starts after SR text
    start = first_innings_bat.find('SR')
    # 1st Innings will ends before Extras text
    end = first_innings_bat.find('Extras')
    # 1st Innings Team Name will present before I , 1 for India Eg: India in this case we will get 0 as Index
    team_name = first_innings_bat.find('I',1)
    # Teamname Extraction
    team = first_innings_bat[:team_name].strip()
    # Opposite Teamname
    opp_team = opposite_team[:opposite_team.index('I',1)].strip()
    # Actual Data
    actual = first_innings_bat[start+2:end].strip()
    # Same Length is between each attribute
    for i in actual.split('        '):
        bat1.append({'Match_id':match_number,
                     'Score':i,
                     'Team':team,
                     'Opposite_Team':opp_team,
                    'Innings':'1'})

### First Innings Bowling

In [113]:
bowl1 = []
def first_innings_bowling(first_innings_bowl,sample,match_number,opposite_team):
    '''
    This Function Will Extract 1st innings Bowling Statistics
    For Easy understanding See Sample Data for each match in Above Cell🔝

    Input : first_innings_bowl,match_number,opposite_team_name
    Output : Each player First Innings Bowling 2D list
    '''
    # Bowler Name Starts After ECO text
    start = first_innings_bowl.find('ECO')
    # Teamname Extraction
    team_name = sample.find('I',1)
    team = sample[:team_name].strip()
    # Opposite Teamname
    opp_team = opposite_team[:opposite_team.index('I',1)].strip()
    # Actual Data
    actual = first_innings_bowl[start+3:].strip()
    for i in actual.split('        '):
        bowl1.append({'Match_id':match_number,
                      'Bowling':i,
                      'Team':team,
                      'Opposite_Team':opp_team,
                     'Innings':'1'})

### Second Innings Batting

In [114]:
bat2 = []
def second_innings_batting(second_innings_bat,match_number,opposite_team):
    '''
    This Function Will Extract 2nd innings Batting
    For Easy understanding See Sample Data for each match in Above Cell🔝

    Input : second_innings_batting,match_number,opposite_team_name
    Output : Each player Second Innings Batting 2D list
    '''
    # 2nd Innings will starts after SR text
    start = second_innings_bat.find('SR')
    # 2nd Innings will ends before Extras text
    end = second_innings_bat.find('Extras')
    # 2nd Innings Team Name will present before I , 1 for India Eg: India in this case we will get 0 as Index
    team_name = second_innings_bat.find('I',1)
    # Teamname Extraction
    team = second_innings_bat[:team_name].strip()
    # Opposite Teamname
    opp_team = opposite_team[:opposite_team.index('I',1)].strip()
    # Actual Data
    actual = second_innings_bat[start+2:end].strip()
    # Same Length is between each attribute
    for i in actual.split('        '):
        bat2.append({'Match_id':match_number,
                     'Score':i,
                     'Team':team,
                     'Opposite_Team':opp_team,
                    'Innings':'2'})

### Second Innings Bowling

In [115]:
bowl2 = []
def second_innings_bowling(second_innings_bowl,sample,match_number,opposite_team):
    '''
    This Function Will Extract 2nd innings Bowling Statistics
    For Easy understanding See Sample Data for each match in Above Cell🔝

    Input : second_innings_batting,match_number,opposite_team_name
    Output : Each player Second Innings Bowling 2D list
    '''
    # Bowler Name Starts After ECO text
    start = second_innings_bowl.find('ECO')
    # Teamname Extraction
    team_name = sample.find('I',1)
    team = sample[:team_name].strip()
     # Opposite Teamname
    opp_team = opposite_team[:opposite_team.find('I',1)].strip()
    # Actual Data
    actual = second_innings_bowl[start+3:].strip()
    for i in actual.split('        '):
        bowl2.append({'Match_id':match_number,
                      'Bowling':i,
                      'Team':team,
                      'Opposite_Team':opp_team,
                     'Innings':'2'})

In [116]:
toss = []
def give_me_soup(match_urls):
    '''
    This is the Main Function It Will pass the arguments to Each Innings from First Batting to Second Innings Bowling along wit toss
    By Extracting Each match soup & Batting , Bowling Attributes
    Input : Each Match Url
    Output: Batting, Bowling, Toss Details of Eatch Match
    '''
    try:
        cnt=0   # for Count & Match_id column
        for sample in match_urls:
            response = requests.get(sample)
            if response.status_code == 200: # Status Code check for each match
                cnt+=1

                # Extraction
                if cnt not in [63, 66, 70]:

                  match_soup = BeautifulSoup(response.content,'html.parser')  # Html parsing
                  match_score = match_soup.find_all('div',attrs={'class':'cb-col cb-col-100 cb-ltst-wgt-hdr'}) # scorecard class
                  toss_txt = match_soup.find_all('div',attrs={'class':'cb-col cb-col-73'})[2].text # Toss Class
                  cleaned = [i.text.strip() for i in match_score]  # cleaned data for each match


                  # Data Uploading & please have a look at above cell
                  first_innings_batting(cleaned[0],cnt,cleaned[3])
                  first_innings_bowling(cleaned[1],cleaned[3],cnt,cleaned[0])
                  second_innings_batting(cleaned[3],cnt,cleaned[0])
                  second_innings_bowling(cleaned[4],cleaned[0],cnt,cleaned[3])

                  win_index = toss_txt.index('won')
                  toss.append(toss_txt[:win_index].strip())
                  print(f'{cnt} Match Data Uploaded!')
                else:
                  pass

            else:
                start = sample.find('/',54)
                match = sample[start+1:]
                print(f"Unable to access for this {match} Match")


    except requests.exceptions.RequestException as e:
        print("Request error:", e)


In [117]:
urls = df['Score_Card']

In [118]:
give_me_soup(urls)

1 Match Data Uploaded!
2 Match Data Uploaded!
3 Match Data Uploaded!
4 Match Data Uploaded!
5 Match Data Uploaded!
6 Match Data Uploaded!
7 Match Data Uploaded!
8 Match Data Uploaded!
9 Match Data Uploaded!
10 Match Data Uploaded!
11 Match Data Uploaded!
12 Match Data Uploaded!
13 Match Data Uploaded!
14 Match Data Uploaded!
15 Match Data Uploaded!
16 Match Data Uploaded!
17 Match Data Uploaded!
18 Match Data Uploaded!
19 Match Data Uploaded!
20 Match Data Uploaded!
21 Match Data Uploaded!
22 Match Data Uploaded!
23 Match Data Uploaded!
24 Match Data Uploaded!
25 Match Data Uploaded!
26 Match Data Uploaded!
27 Match Data Uploaded!
28 Match Data Uploaded!
29 Match Data Uploaded!
30 Match Data Uploaded!
31 Match Data Uploaded!
32 Match Data Uploaded!
33 Match Data Uploaded!
34 Match Data Uploaded!
35 Match Data Uploaded!
36 Match Data Uploaded!
37 Match Data Uploaded!
38 Match Data Uploaded!
39 Match Data Uploaded!
40 Match Data Uploaded!
41 Match Data Uploaded!
42 Match Data Uploaded!
4

In [119]:
bat_1_df = pd.DataFrame(bat1)
bat_2_df = pd.DataFrame(bat2)
bowl_1_df = pd.DataFrame(bowl1)
bowl_2_df = pd.DataFrame(bowl2)

In [120]:
bat_1_df.head()

Unnamed: 0,Match_id,Score,Team,Opposite_Team,Innings
0,1,Kohli c Rachin Ravindra b Mustafizur 21 20...,Royal Challengers Bengaluru,Chennai Super Kings,1
1,1,du Plessis (c) c Rachin Ravindra b Mustafiz...,Royal Challengers Bengaluru,Chennai Super Kings,1
2,1,Rajat Patidar c Dhoni b Mustafizur 0 3 0 0...,Royal Challengers Bengaluru,Chennai Super Kings,1
3,1,Maxwell c Dhoni b D Chahar 0 1 0 0 0.00,Royal Challengers Bengaluru,Chennai Super Kings,1
4,1,Green b Mustafizur 18 22 1 0 81.82,Royal Challengers Bengaluru,Chennai Super Kings,1


In [121]:
bat_2_df.head()

Unnamed: 0,Match_id,Score,Team,Opposite_Team,Innings
0,1,Ruturaj Gaikwad (c) c Green b Yash Dayal 1...,Chennai Super Kings,Royal Challengers Bengaluru,2
1,1,Rachin Ravindra c Rajat Patidar b Karn Shar...,Chennai Super Kings,Royal Challengers Bengaluru,2
2,1,Ajinkya Rahane c Maxwell b Green 27 19 0 2...,Chennai Super Kings,Royal Challengers Bengaluru,2
3,1,Daryl Mitchell c Rajat Patidar b Green 22 ...,Chennai Super Kings,Royal Challengers Bengaluru,2
4,1,Shivam Dube not out 34 28 4 1 121.43,Chennai Super Kings,Royal Challengers Bengaluru,2


In [122]:
bowl_1_df.head()

Unnamed: 0,Match_id,Bowling,Team,Opposite_Team,Innings
0,1,D Chahar 4 0 37 1 0 2 9.20,Chennai Super Kings,Royal Challengers Bengaluru,1
1,1,Tushar Deshpande 4 0 47 0 0 6 11.80,Chennai Super Kings,Royal Challengers Bengaluru,1
2,1,M Theekshana 4 0 36 0 0 0 9.00,Chennai Super Kings,Royal Challengers Bengaluru,1
3,1,Mustafizur 4 0 29 4 0 2 7.20,Chennai Super Kings,Royal Challengers Bengaluru,1
4,1,Ravindra Jadeja 4 0 21 0 0 0 5.20,Chennai Super Kings,Royal Challengers Bengaluru,1


In [123]:
batting_df = pd.concat([bat_1_df,bat_2_df],axis=0)
bowling_df = pd.concat([bowl_1_df,bowl_2_df],axis=0)

In [124]:
batting_df.head()

Unnamed: 0,Match_id,Score,Team,Opposite_Team,Innings
0,1,Kohli c Rachin Ravindra b Mustafizur 21 20...,Royal Challengers Bengaluru,Chennai Super Kings,1
1,1,du Plessis (c) c Rachin Ravindra b Mustafiz...,Royal Challengers Bengaluru,Chennai Super Kings,1
2,1,Rajat Patidar c Dhoni b Mustafizur 0 3 0 0...,Royal Challengers Bengaluru,Chennai Super Kings,1
3,1,Maxwell c Dhoni b D Chahar 0 1 0 0 0.00,Royal Challengers Bengaluru,Chennai Super Kings,1
4,1,Green b Mustafizur 18 22 1 0 81.82,Royal Challengers Bengaluru,Chennai Super Kings,1


In [125]:
batting_df.to_csv('data/batting.csv',index=False)
bowling_df.to_csv('data/bowling.csv',index=False)

### Batting Statistics

In [126]:
def extract_scores_to_df(each_batsman_sample):
    '''
    This Function will Extract Stats of Batsman Statitcs such as Runs,Balls,Boundaries and Sixes (Numericals)
    Input : Batsman Stats as a row 
    Output : Batsman Stats as df 

    Sample Input : 'Bairstow    c Daryl Mitchell b Santner  33 35 4 1 94.29'
    '''
    stc = each_batsman_sample.split(' ')[-5:]  # only last 5 characters are Numericals see above cell for approach 
    
    # Extracting each value from scores assign it to appropriate attributes
    return pd.Series({
        'Runs': stc[0],
        'Balls': stc[1],
        'Boundaries': stc[2],
        'Sixes': stc[3],
        'StrikeRate': stc[4]
    })

In [127]:
batting_stats = batting_df['Score'].apply(extract_scores_to_df)

In [128]:
batting_stats.head()

Unnamed: 0,Runs,Balls,Boundaries,Sixes,StrikeRate
0,21,20,0,1,105.0
1,35,23,8,0,152.17
2,0,3,0,0,0.0
3,0,1,0,0,0.0
4,18,22,1,0,81.82


In [129]:
batting_df.shape,batting_stats.shape

((1129, 5), (1129, 5))

In [130]:
# Concatenating 2 dfs row_wise to get exact score card (Dismissal+Stats)
updated_batting_df = pd.concat([batting_df,batting_stats],axis=1)  
updated_batting_df.head()

Unnamed: 0,Match_id,Score,Team,Opposite_Team,Innings,Runs,Balls,Boundaries,Sixes,StrikeRate
0,1,Kohli c Rachin Ravindra b Mustafizur 21 20...,Royal Challengers Bengaluru,Chennai Super Kings,1,21,20,0,1,105.0
1,1,du Plessis (c) c Rachin Ravindra b Mustafiz...,Royal Challengers Bengaluru,Chennai Super Kings,1,35,23,8,0,152.17
2,1,Rajat Patidar c Dhoni b Mustafizur 0 3 0 0...,Royal Challengers Bengaluru,Chennai Super Kings,1,0,3,0,0,0.0
3,1,Maxwell c Dhoni b D Chahar 0 1 0 0 0.00,Royal Challengers Bengaluru,Chennai Super Kings,1,0,1,0,0,0.0
4,1,Green b Mustafizur 18 22 1 0 81.82,Royal Challengers Bengaluru,Chennai Super Kings,1,18,22,1,0,81.82


In [131]:
sample = updated_batting_df['Score']
data_list = []
for itr in sample:
    score = itr.split(' ')[:-6]
    data_dict = {} 
    if 'and' in score:
        data_dict['Batsman'] = " ".join(score[:score.index('c')]).strip() # upto c in 'caught'
        data_dict['Dismissal Type'] = 'Caught and Bowled' 
        data_dict['Fielder'] = '-' # No Fielder 
        data_dict['Bowler'] = " ".join(score[score.index('b')+1:]).strip() # for bowler 'b'


        # if 'c' and 'b' present in string then it is caught 
    elif 'c' in score and 'b' in score:
        data_dict['Batsman'] = " ".join(score[:score.index('c')]).strip() # upto c in 'caught'
        data_dict['Dismissal Type'] = 'Catch'
        # Fielder Name will come before Bowler 
        data_dict['Fielder'] = " ".join(score[score.index('c')+1:score.index('b')]).strip() 
        data_dict['Bowler'] = " ".join(score[score.index('b')+1:]).strip() # for bowler 'b'

        # stump
    elif 'st' in score and 'b' in score:        
        data_dict['Batsman'] = " ".join(score[:score.index('st')]).strip() 
        data_dict['Dismissal Type'] = 'Stump'  
        data_dict['Fielder'] = " ".join(score[score.index('st')+1:score.index('b')]).strip()
        data_dict['Bowler'] = " ".join(score[score.index('b')+1:]).strip()
        
    # lbw 
    elif 'lbw' in score and 'b' in score:        
        data_dict['Batsman'] = " ".join(score[:score.index('lbw')]).strip()
        data_dict['Dismissal Type'] = 'lbw'
        data_dict['Fielder'] = '-'
        data_dict['Bowler'] = " ".join(score[score.index('b')+1:]).strip()

        # Bowled
    elif 'b' in score:        
        data_dict['Batsman'] = " ".join(score[:score.index('b')]).strip()
        data_dict['Dismissal Type'] = 'Bowled'
        data_dict['Fielder'] = '-'
        data_dict['Bowler'] = " ".join(score[score.index('b')+1:]).strip()

    # Not out 
    elif 'not' in score:        
        data_dict['Batsman'] = " ".join(score[:score.index('not')]).strip()
        data_dict['Dismissal Type'] = 'Not out'
        data_dict['Fielder'] = '-'
        data_dict['Bowler'] = '-'

    # runout
    elif 'run' in score:
        data_dict['Batsman'] = " ".join(score[:score.index('run')]).strip()
        data_dict['Dismissal Type'] = 'Run out'
        data_dict['Fielder'] = " ".join(score[score.index('out')+1:]).strip()[1:-1]
        data_dict['Bowler'] = '-'

    data_list.append(data_dict)
    
scores_df = pd.DataFrame(data_list)

In [132]:
scores_df.head()

Unnamed: 0,Batsman,Dismissal Type,Fielder,Bowler
0,Kohli,Catch,Rachin Ravindra,Mustafizur
1,du Plessis (c),Catch,Rachin Ravindra,Mustafizur
2,Rajat Patidar,Catch,Dhoni,Mustafizur
3,Maxwell,Catch,Dhoni,D Chahar
4,Green,Bowled,-,Mustafizur


In [133]:
batting_df.head()

Unnamed: 0,Match_id,Score,Team,Opposite_Team,Innings
0,1,Kohli c Rachin Ravindra b Mustafizur 21 20...,Royal Challengers Bengaluru,Chennai Super Kings,1
1,1,du Plessis (c) c Rachin Ravindra b Mustafiz...,Royal Challengers Bengaluru,Chennai Super Kings,1
2,1,Rajat Patidar c Dhoni b Mustafizur 0 3 0 0...,Royal Challengers Bengaluru,Chennai Super Kings,1
3,1,Maxwell c Dhoni b D Chahar 0 1 0 0 0.00,Royal Challengers Bengaluru,Chennai Super Kings,1
4,1,Green b Mustafizur 18 22 1 0 81.82,Royal Challengers Bengaluru,Chennai Super Kings,1


In [134]:
batting_stats.head()

Unnamed: 0,Runs,Balls,Boundaries,Sixes,StrikeRate
0,21,20,0,1,105.0
1,35,23,8,0,152.17
2,0,3,0,0,0.0
3,0,1,0,0,0.0
4,18,22,1,0,81.82


In [135]:
batting_df.shape,scores_df.shape,batting_stats.shape

((1129, 5), (1129, 4), (1129, 5))

In [136]:
final_batting_df = pd.concat([batting_df.reset_index(drop=True), scores_df.reset_index(drop=True), batting_stats.reset_index(drop=True)], axis=1)
final_batting_df.head()

Unnamed: 0,Match_id,Score,Team,Opposite_Team,Innings,Batsman,Dismissal Type,Fielder,Bowler,Runs,Balls,Boundaries,Sixes,StrikeRate
0,1,Kohli c Rachin Ravindra b Mustafizur 21 20...,Royal Challengers Bengaluru,Chennai Super Kings,1,Kohli,Catch,Rachin Ravindra,Mustafizur,21,20,0,1,105.0
1,1,du Plessis (c) c Rachin Ravindra b Mustafiz...,Royal Challengers Bengaluru,Chennai Super Kings,1,du Plessis (c),Catch,Rachin Ravindra,Mustafizur,35,23,8,0,152.17
2,1,Rajat Patidar c Dhoni b Mustafizur 0 3 0 0...,Royal Challengers Bengaluru,Chennai Super Kings,1,Rajat Patidar,Catch,Dhoni,Mustafizur,0,3,0,0,0.0
3,1,Maxwell c Dhoni b D Chahar 0 1 0 0 0.00,Royal Challengers Bengaluru,Chennai Super Kings,1,Maxwell,Catch,Dhoni,D Chahar,0,1,0,0,0.0
4,1,Green b Mustafizur 18 22 1 0 81.82,Royal Challengers Bengaluru,Chennai Super Kings,1,Green,Bowled,-,Mustafizur,18,22,1,0,81.82


In [137]:
final_batting_df.drop(columns=['Score'],inplace=True)

In [138]:
final_batting_df['Team'] = final_batting_df['Team'].apply(lambda x: ''.join(re.findall(r'\b\w', x)))
final_batting_df['Opposite_Team'] = final_batting_df['Opposite_Team'].apply(lambda x: ''.join(re.findall(r'\b\w', x)))

In [139]:
final_batting_df.head()

Unnamed: 0,Match_id,Team,Opposite_Team,Innings,Batsman,Dismissal Type,Fielder,Bowler,Runs,Balls,Boundaries,Sixes,StrikeRate
0,1,RCB,CSK,1,Kohli,Catch,Rachin Ravindra,Mustafizur,21,20,0,1,105.0
1,1,RCB,CSK,1,du Plessis (c),Catch,Rachin Ravindra,Mustafizur,35,23,8,0,152.17
2,1,RCB,CSK,1,Rajat Patidar,Catch,Dhoni,Mustafizur,0,3,0,0,0.0
3,1,RCB,CSK,1,Maxwell,Catch,Dhoni,D Chahar,0,1,0,0,0.0
4,1,RCB,CSK,1,Green,Bowled,-,Mustafizur,18,22,1,0,81.82


### Bowling Statistics

In [140]:
bowl_1_df.head()

Unnamed: 0,Match_id,Bowling,Team,Opposite_Team,Innings
0,1,D Chahar 4 0 37 1 0 2 9.20,Chennai Super Kings,Royal Challengers Bengaluru,1
1,1,Tushar Deshpande 4 0 47 0 0 6 11.80,Chennai Super Kings,Royal Challengers Bengaluru,1
2,1,M Theekshana 4 0 36 0 0 0 9.00,Chennai Super Kings,Royal Challengers Bengaluru,1
3,1,Mustafizur 4 0 29 4 0 2 7.20,Chennai Super Kings,Royal Challengers Bengaluru,1
4,1,Ravindra Jadeja 4 0 21 0 0 0 5.20,Chennai Super Kings,Royal Challengers Bengaluru,1


In [141]:
bowl_2_df.head()

Unnamed: 0,Match_id,Bowling,Team,Opposite_Team,Innings
0,1,Mohammed Siraj 4 0 38 0 0 1 9.50,Royal Challengers Bengaluru,Chennai Super Kings,2
1,1,Yash Dayal 3 0 28 1 0 0 9.30,Royal Challengers Bengaluru,Chennai Super Kings,2
2,1,Alzarri Joseph 3.4 0 38 0 0 4 10.40,Royal Challengers Bengaluru,Chennai Super Kings,2
3,1,Karn Sharma 2 0 24 1 0 0 12.00,Royal Challengers Bengaluru,Chennai Super Kings,2
4,1,Mayank Dagar 2 0 6 0 0 0 3.00,Royal Challengers Bengaluru,Chennai Super Kings,2


In [142]:
bowling_df = pd.concat([bowl_1_df,bowl_2_df],axis=0)
bowling_df.head()

Unnamed: 0,Match_id,Bowling,Team,Opposite_Team,Innings
0,1,D Chahar 4 0 37 1 0 2 9.20,Chennai Super Kings,Royal Challengers Bengaluru,1
1,1,Tushar Deshpande 4 0 47 0 0 6 11.80,Chennai Super Kings,Royal Challengers Bengaluru,1
2,1,M Theekshana 4 0 36 0 0 0 9.00,Chennai Super Kings,Royal Challengers Bengaluru,1
3,1,Mustafizur 4 0 29 4 0 2 7.20,Chennai Super Kings,Royal Challengers Bengaluru,1
4,1,Ravindra Jadeja 4 0 21 0 0 0 5.20,Chennai Super Kings,Royal Challengers Bengaluru,1


In [143]:
def bowler_stats_extraction(samples):
    '''
    This Function will Extract Stats of Bowler Statitcs such as Overs,Runs,Maindens,Wickets and Economy (Numericals)
    Input : Bowler Stats as a row 
    Output : Bowler Stats as df 

    Sample Input : 'Boult   10 1 48 1 0 1 4.80'
    '''
    
    #for i in samples:
    bowling_stats = samples.split(' ')[-7:]
    Name = " ".join(samples.split(' ')[:-7]).strip()
    return pd.Series({
            'Bowler': Name,
            'Overs' : bowling_stats[0],
            'Maindens' : bowling_stats[1],
            'Runs' : bowling_stats[2],
            'Wickets' : bowling_stats[3],
            'No_Balls' : bowling_stats[4],
            'Wides' : bowling_stats[5],
            'Economy' : bowling_stats[6]})

In [144]:
bowl_stats = bowling_df['Bowling'].apply(bowler_stats_extraction)
bowl_stats.head()

Unnamed: 0,Bowler,Overs,Maindens,Runs,Wickets,No_Balls,Wides,Economy
0,D Chahar,4,0,37,1,0,2,9.2
1,Tushar Deshpande,4,0,47,0,0,6,11.8
2,M Theekshana,4,0,36,0,0,0,9.0
3,Mustafizur,4,0,29,4,0,2,7.2
4,Ravindra Jadeja,4,0,21,0,0,0,5.2


In [145]:
# Concatenating 2 dfs row_wise to get exact score card (Dismissal+Stats)
final_bowling_df = pd.concat([bowling_df,bowl_stats],axis=1)
final_bowling_df.head()

Unnamed: 0,Match_id,Bowling,Team,Opposite_Team,Innings,Bowler,Overs,Maindens,Runs,Wickets,No_Balls,Wides,Economy
0,1,D Chahar 4 0 37 1 0 2 9.20,Chennai Super Kings,Royal Challengers Bengaluru,1,D Chahar,4,0,37,1,0,2,9.2
1,1,Tushar Deshpande 4 0 47 0 0 6 11.80,Chennai Super Kings,Royal Challengers Bengaluru,1,Tushar Deshpande,4,0,47,0,0,6,11.8
2,1,M Theekshana 4 0 36 0 0 0 9.00,Chennai Super Kings,Royal Challengers Bengaluru,1,M Theekshana,4,0,36,0,0,0,9.0
3,1,Mustafizur 4 0 29 4 0 2 7.20,Chennai Super Kings,Royal Challengers Bengaluru,1,Mustafizur,4,0,29,4,0,2,7.2
4,1,Ravindra Jadeja 4 0 21 0 0 0 5.20,Chennai Super Kings,Royal Challengers Bengaluru,1,Ravindra Jadeja,4,0,21,0,0,0,5.2


In [146]:
final_bowling_df.drop(columns='Bowling',inplace=True)

In [147]:
final_bowling_df.head()

Unnamed: 0,Match_id,Team,Opposite_Team,Innings,Bowler,Overs,Maindens,Runs,Wickets,No_Balls,Wides,Economy
0,1,Chennai Super Kings,Royal Challengers Bengaluru,1,D Chahar,4,0,37,1,0,2,9.2
1,1,Chennai Super Kings,Royal Challengers Bengaluru,1,Tushar Deshpande,4,0,47,0,0,6,11.8
2,1,Chennai Super Kings,Royal Challengers Bengaluru,1,M Theekshana,4,0,36,0,0,0,9.0
3,1,Chennai Super Kings,Royal Challengers Bengaluru,1,Mustafizur,4,0,29,4,0,2,7.2
4,1,Chennai Super Kings,Royal Challengers Bengaluru,1,Ravindra Jadeja,4,0,21,0,0,0,5.2


In [148]:
final_bowling_df['Team'] = final_bowling_df['Team'].apply(lambda x: ''.join(re.findall(r'\b\w', x)))
final_bowling_df['Opposite_Team'] = final_bowling_df['Opposite_Team'].apply(lambda x: ''.join(re.findall(r'\b\w', x)))

In [149]:
final_bowling_df.head()

Unnamed: 0,Match_id,Team,Opposite_Team,Innings,Bowler,Overs,Maindens,Runs,Wickets,No_Balls,Wides,Economy
0,1,CSK,RCB,1,D Chahar,4,0,37,1,0,2,9.2
1,1,CSK,RCB,1,Tushar Deshpande,4,0,47,0,0,6,11.8
2,1,CSK,RCB,1,M Theekshana,4,0,36,0,0,0,9.0
3,1,CSK,RCB,1,Mustafizur,4,0,29,4,0,2,7.2
4,1,CSK,RCB,1,Ravindra Jadeja,4,0,21,0,0,0,5.2


In [150]:
final_batting_df.to_csv('data/Batting_df2.csv',index=False)
final_bowling_df.to_csv('data/Bowling_df2.csv',index=False)