In [165]:
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objects as go

In [166]:
df_raw = pd.read_csv('dataset/individual_match/1254060.csv')

In [167]:
# df = df_raw[['match_id', 'venue', 'innings', 'batting_team', 'bowling_team', 'ball', 'striker', 'non_striker', 
#          'bowler', 'runs_off_bat', 'extras']].copy()
df = df_raw.copy()
df.columns

Index(['match_id', 'season', 'start_date', 'venue', 'innings', 'ball',
       'batting_team', 'bowling_team', 'striker', 'non_striker', 'bowler',
       'runs_off_bat', 'extras', 'wides', 'noballs', 'byes', 'legbyes',
       'penalty', 'wicket_type', 'player_dismissed', 'other_wicket_type',
       'other_player_dismissed'],
      dtype='object')

#### Batting Metrics

Hard Hitting Ability = (4Fours + 6Sixes) / Balls Played by Batsman

Fast Scoring Ability = Total Runs / Balls Played by Batsman

Running Between Wickets = (Total Runs – (4Fours + 6Sixes))/(Total Balls Played – Boundary Balls)

is_century

is_half_century

##### Season or Cummulative

Finisher = Not Out innings / Total Innings played

Consistency = Total Runs/Number of Times Out

Batting average (Ave): The total number of runs divided by the total number of innings in which the batsman was out. Ave = Runs/[I – NO] (also Avge or Avg.)

The number of innings in which the batsman scored one hundred runs or more.

Highest score is a season / or untill now in IPL

In [168]:
# get_all_batsman = 
df['striker'].unique()

array(['N Rana', 'Shubman Gill', 'RA Tripathi', 'AD Russell',
       'EJG Morgan', 'KD Karthik', 'Shakib Al Hasan', 'WP Saha',
       'DA Warner', 'MK Pandey', 'JM Bairstow', 'Mohammad Nabi',
       'V Shankar', 'Abdul Samad'], dtype=object)

In [169]:
# Batting metrics calculation
def count_4s(c):
    return ((c == 4) | (c==5)).sum()

def count_6s(c):
    return (c == 6).sum()

def get_balls_faced(c):
    return c.shape[0]

def get_wide_balls(c):
    return int(c.notnull().sum())

def get_total_runs(c):
    return c.sum()

def get_if_century(c):
    return int(c.sum() >= 100)

def get_if_half_century(c):
    return int(50 <= c.sum() < 100)

In [170]:
batsman_stats = df.groupby(['match_id', 'season', 'venue', 'innings','striker']).agg(
#     is_batsman = ('striker', lambda _ : True),
    no_of_4s = ('runs_off_bat', count_4s),
    no_of_6s = ('runs_off_bat', count_6s),
    balls_faced = ('ball', get_balls_faced), # faced = played + wides
    total_wides = ('wides', get_wide_balls), # faced = played + wides
    total_runs = ('runs_off_bat', get_total_runs),
    is_century = ('runs_off_bat', get_if_century),
    is_half_century = ('runs_off_bat', get_if_half_century)
).reset_index()
batsman_stats['balls_faced'] = batsman_stats['balls_faced'].astype('int')
batsman_stats['total_wides'] = batsman_stats['total_wides'].astype('int')
# batsman_stats.rename(columns = {'striker':'name'}, inplace=True)
batsman_stats

Unnamed: 0,match_id,season,venue,innings,striker,no_of_4s,no_of_6s,balls_faced,total_wides,total_runs,is_century,is_half_century
0,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,AD Russell,1,0,6,1,5,0,0
1,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,EJG Morgan,0,0,3,0,2,0,0
2,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,KD Karthik,2,1,10,1,22,0,0
3,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,N Rana,9,4,56,0,80,0,1
4,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,RA Tripathi,5,2,29,0,53,0,1
5,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Shakib Al Hasan,0,0,6,1,3,0,0
6,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Shubman Gill,1,1,13,0,15,0,0
7,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,Abdul Samad,0,2,8,0,19,0,0
8,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,DA Warner,0,0,4,0,3,0,0
9,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,JM Bairstow,5,3,41,1,55,0,1


In [171]:
# Hard Hitting Ability = (4Fours + 6Sixes) / Balls Played by Batsman

# Fast Scoring Ability = Total Runs / Balls Played by Batsman

# Running Between Wickets = (Total Runs – (4Fours + 6Sixes))/(Total Balls Played – Boundary Balls)

batsman_stats['balls_played'] = batsman_stats.balls_faced - batsman_stats.total_wides

batsman_stats['hard_hit_rate'] = (batsman_stats.no_of_4s + batsman_stats.no_of_6s) / batsman_stats.balls_played

batsman_stats['fast_score_rate'] = (batsman_stats.total_runs / batsman_stats.balls_played)
                                    
batsman_stats['run_bw_wickets'] = (batsman_stats.total_runs - (batsman_stats.no_of_4s*4 + batsman_stats.no_of_6s*6)) / (batsman_stats.balls_played - (batsman_stats.no_of_4s + batsman_stats.no_of_6s))

batsman_stats

Unnamed: 0,match_id,season,venue,innings,striker,no_of_4s,no_of_6s,balls_faced,total_wides,total_runs,is_century,is_half_century,balls_played,hard_hit_rate,fast_score_rate,run_bw_wickets
0,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,AD Russell,1,0,6,1,5,0,0,5,0.2,1.0,0.25
1,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,EJG Morgan,0,0,3,0,2,0,0,3,0.0,0.666667,0.666667
2,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,KD Karthik,2,1,10,1,22,0,0,9,0.333333,2.444444,1.333333
3,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,N Rana,9,4,56,0,80,0,1,56,0.232143,1.428571,0.465116
4,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,RA Tripathi,5,2,29,0,53,0,1,29,0.241379,1.827586,0.954545
5,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Shakib Al Hasan,0,0,6,1,3,0,0,5,0.0,0.6,0.6
6,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Shubman Gill,1,1,13,0,15,0,0,13,0.153846,1.153846,0.454545
7,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,Abdul Samad,0,2,8,0,19,0,0,8,0.25,2.375,1.166667
8,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,DA Warner,0,0,4,0,3,0,0,4,0.0,0.75,0.75
9,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,JM Bairstow,5,3,41,1,55,0,1,40,0.2,1.375,0.53125


In [172]:
def bowler_wickets(s):
    return (s.notnull() & s.isin(['caught', 'bowled', 'lbw', 'stumped', 'caught and bowled', 'hit wicket'])).sum()
df.columns

def get_if_3knocked(s):
    w = bowler_wickets(s)
    return int(w == 3)
    
def get_if_4knocked(s):
    w = bowler_wickets(s)
    return int(3 < w)

def get_if_shortwickets(s):
    w = bowler_wickets(s)
    return int(w < 3)


In [173]:
bowler_stats = df.groupby(['match_id', 'season', 'venue', 'innings','bowler']).agg(
#     is_bowler = ('bowler', lambda _ : True),
    runs_scored_bat = ('runs_off_bat', 'sum'),
    runs_scored_extra = ('extras', 'sum'),
    total_wides = ('wides', 'count'),
    total_noballs = ('noballs', 'count'),
    total_balls = ('ball', 'count'),
    wickets_taken = ('wicket_type', bowler_wickets),
    wiskets_3plus = ('wicket_type', get_if_3knocked),
    wickets_4plus = ('wicket_type', get_if_4knocked),
    wickets_shortIndex = ('wicket_type', get_if_shortwickets)
).reset_index()

# bowler_stats.rename(columns = {'bowler':'name'}, inplace=True)
bowler_stats

Unnamed: 0,match_id,season,venue,innings,bowler,runs_scored_bat,runs_scored_extra,total_wides,total_noballs,total_balls,wickets_taken,wiskets_3plus,wickets_4plus,wickets_shortIndex
0,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,B Kumar,44,1,1,0,25,1,0,0,1
1,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Mohammad Nabi,32,0,0,0,24,2,0,0,1
2,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Rashid Khan,19,5,1,0,25,2,0,0,1
3,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Sandeep Sharma,35,0,0,0,18,0,0,0,1
4,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,T Natarajan,36,1,1,0,25,1,0,0,1
5,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,V Shankar,14,0,0,0,6,0,0,0,1
6,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,AD Russell,31,1,1,0,19,1,0,0,1
7,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,CV Varun,36,2,0,0,24,0,0,0,1
8,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,Harbhajan Singh,8,0,0,0,6,0,0,0,1
9,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,M Prasidh Krishna,31,4,4,0,28,2,0,0,1


#### Bowling Metrics

Economy = Runs Scored / (Number of balls bowled by bowler/6)  {Runs / overs bowled}

bowling_average = Number of balls bowled / Wickets Taken

Consistency = Runs Conceded / Wickets Taken

Crucial Wicket Taking Ability = Number of times Four or Five Wickets Taken / Number of Innings Played

Short Performance Index = (Wickets Taken – 4* Number of Times Four Wickets Taken – 5* Number of Times Five Wickets Taken) / (Innings Played – Number of Times Four Wickets or Five Wickets Taken)

In [174]:
bowler_stats['economy'] = (bowler_stats.runs_scored_bat + bowler_stats.runs_scored_extra) / (bowler_stats.total_balls // 6)
bowler_stats['strike_rate'] = bowler_stats.total_balls / bowler_stats.wickets_taken
bowler_stats['bowling_average'] = (bowler_stats.runs_scored_bat + bowler_stats.runs_scored_extra) / bowler_stats.wickets_taken
bowler_stats['illegal_bowl_rate'] = (bowler_stats.total_wides + bowler_stats.total_noballs) / bowler_stats.total_balls

bowler_stats

Unnamed: 0,match_id,season,venue,innings,bowler,runs_scored_bat,runs_scored_extra,total_wides,total_noballs,total_balls,wickets_taken,wiskets_3plus,wickets_4plus,wickets_shortIndex,economy,strike_rate,bowling_average,illegal_bowl_rate
0,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,B Kumar,44,1,1,0,25,1,0,0,1,11.25,25.0,45.0,0.04
1,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Mohammad Nabi,32,0,0,0,24,2,0,0,1,8.0,12.0,16.0,0.0
2,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Rashid Khan,19,5,1,0,25,2,0,0,1,6.0,12.5,12.0,0.04
3,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,Sandeep Sharma,35,0,0,0,18,0,0,0,1,11.666667,inf,inf,0.0
4,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,T Natarajan,36,1,1,0,25,1,0,0,1,9.25,25.0,37.0,0.04
5,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",1,V Shankar,14,0,0,0,6,0,0,0,1,14.0,inf,inf,0.0
6,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,AD Russell,31,1,1,0,19,1,0,0,1,10.666667,19.0,32.0,0.052632
7,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,CV Varun,36,2,0,0,24,0,0,0,1,9.5,inf,inf,0.0
8,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,Harbhajan Singh,8,0,0,0,6,0,0,0,1,8.0,inf,inf,0.0
9,1254060,2021,"MA Chidambaram Stadium, Chepauk, Chennai",2,M Prasidh Krishna,31,4,4,0,28,2,0,0,1,8.75,14.0,17.5,0.142857


In [175]:
# print(bowler_stats.name.unique())
# print(batsman_stats.name.unique())
# s = set(bowler_stats.name.unique())
# s.update(batsman_stats.name.unique())
# print(len(s))

In [176]:
batsman_stats_to_keep = ['match_id', 'season', 'venue', 'innings', 'striker','is_century','is_half_century','run_bw_wickets', 'hard_hit_rate','fast_score_rate', 'balls_played']
batsman_stats = batsman_stats.loc[:, batsman_stats_to_keep]
bowler_stats_to_keep =  ['match_id', 'season', 'venue', 'innings', 'bowler','illegal_bowl_rate','wiskets_3plus','wickets_4plus','wickets_shortIndex','economy','strike_rate', 'bowling_average']
bowler_stats = bowler_stats.loc[:, bowler_stats_to_keep]
# batsman_stats
print(batsman_stats.shape)
print(bowler_stats.shape)
print(bowler_stats.shape[0] + batsman_stats.shape[0])

# players_stats_per_match = batsman_stats.merge(bowler_stats, on=['venue','season','match_id','innings'])
# print(players_stats_per_match.shape)

(14, 12)
(12, 13)
26


In [177]:
# players_stats_per_match