In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
all_match = pd.read_csv('all_matches.csv')

In [3]:
all_match.head()

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,...,extras,wides,noballs,byes,legbyes,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed
0,335982,2007/08,2008-04-18,M Chinnaswamy Stadium,1,0.1,Kolkata Knight Riders,Royal Challengers Bangalore,SC Ganguly,BB McCullum,...,1,,,,1.0,,,,,
1,335982,2007/08,2008-04-18,M Chinnaswamy Stadium,1,0.2,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,...,0,,,,,,,,,
2,335982,2007/08,2008-04-18,M Chinnaswamy Stadium,1,0.3,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,...,1,1.0,,,,,,,,
3,335982,2007/08,2008-04-18,M Chinnaswamy Stadium,1,0.4,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,...,0,,,,,,,,,
4,335982,2007/08,2008-04-18,M Chinnaswamy Stadium,1,0.5,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,...,0,,,,,,,,,


In [4]:
def clean_data(x):
    return str(x).replace('/', '')

all_match['season'] = all_match['season'].apply(clean_data)

In [5]:
def remove_suffixes(x):
    date = str(x)
    
    for suffix in ['08', '10', '21']:
        if date.endswith(suffix):
            date = date[:-2]
            
    return date

all_match['season'] = all_match['season'].apply(remove_suffixes)

In [6]:
all_match['season'] = all_match['season'].astype('int64')

In [7]:
all_match = all_match[all_match.season >= 2022]

In [8]:
def balls_per_dismissal(balls, dismissals):
    if dismissals > 0:
        return balls/dismissals
    
    else:
        return balls/1
    
def balls_per_boundary(balls, boundary):
    if boundary > 0:
        return balls/boundary
    
    else:
        return balls/1

In [9]:
def phase(over):
    if over <= 6:
        return 'PowerPlay'
    
    elif over <= 15:
        return 'Middle Over'
    
    else:
        return 'Death Over'

In [10]:
all_match['Phase'] = all_match['ball'].apply(lambda x: phase(x))

In [11]:
def ByCustom(df, current_phase, current_opposition):
    df = df[df.Phase == current_phase]
    df = df[df.bowling_team == current_opposition]
    current_venue = 'MA Chidambaram Stadium, Chepauk', 'MA Chidambaram Stadium', 'MA Chidambaram Stadium, Chepauk, Chennai'
    df.reset_index(inplace= True, drop= True)
    
    df['isDot'] = df['runs_off_bat'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['runs_off_bat'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['runs_off_bat'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['runs_off_bat'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['runs_off_bat'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['runs_off_bat'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['striker', 'match_id'])['runs_off_bat'].sum().reset_index()).groupby(['striker'])['runs_off_bat'].sum().reset_index().rename(columns={'runs_off_bat':'runs'})
    dismissals = pd.DataFrame(df.groupby(['striker'])['player_dismissed'].count()).reset_index().rename(columns= {'player_dismissed':'dismissals'})
    balls = pd.DataFrame(df.groupby(['striker'])['match_id'].count()).reset_index().rename(columns= {'match_id':'balls'})
    innings = pd.DataFrame(df.groupby(['striker'])['match_id'].apply(lambda x: len(list(np.unique(x)))).reset_index()).rename(columns= {'match_id':'innings'})
    
    dots = pd.DataFrame(df.groupby(['striker'])['isDot'].sum()).reset_index().rename(columns= {'isDot':'dots'})
    ones = pd.DataFrame(df.groupby(['striker'])['isOne'].sum()).reset_index().rename(columns= {'isOne':'ones'})
    twos = pd.DataFrame(df.groupby(['striker'])['isTwo'].sum()).reset_index().rename(columns= {'isTwo':'twos'})
    threes = pd.DataFrame(df.groupby(['striker'])['isThree'].sum()).reset_index().rename(columns= {'isThree':'threes'})
    fours = pd.DataFrame(df.groupby(['striker'])['isFour'].sum()).reset_index().rename(columns= {'isFour':'fours'})
    sixes = pd.DataFrame(df.groupby(['striker'])['isSix'].sum()).reset_index().rename(columns= {'isSix':'sixes'})
    
    df = pd.merge(runs, balls, on= 'striker').merge(innings, on= 'striker').merge(dismissals, on= 'striker').merge(dots, on= 'striker').merge(ones, on= 'striker').merge(twos, on= 'striker').merge(threes, on= 'striker').merge(fours, on= 'striker').merge(sixes, on= 'striker')
    
    df['SR'] = df.apply(lambda x: 100*(x['runs']/x['balls']), axis= 1)
    df['RPI'] = df.apply(lambda x: x['runs']/x['innings'], axis= 1)
    df['BPD'] = df.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis= 1)
    df['BPB'] = df.apply(lambda x: balls_per_boundary(x['balls'], x['fours']+x['sixes']), axis= 1)
    df['dot_percentage'] = df.apply(lambda x: 100*(x['dots']/x['balls']), axis= 1)
    
    return df

In [12]:
df = ByCustom(all_match, 'Middle Over', 'Chennai Super Kings')

In [13]:
df.head(2)

Unnamed: 0,striker,runs,balls,innings,dismissals,dots,ones,twos,threes,fours,sixes,SR,RPI,BPD,BPB,dot_percentage
0,A Badoni,23,22,2,0,7,11,3,0,0,1,104.545455,11.5,22.0,22.0,31.818182
1,A Raghuvanshi,0,1,1,1,1,0,0,0,0,0,0.0,0.0,1.0,1.0,100.0


In [14]:
wt_sr, wt_rpi, wt_bpd, wt_dot_percentage = 0.14, 0.27, 0.16, 0.45

In [15]:
df = df[df.innings >= 2]
df = df[df.balls >= 30]

In [16]:
df['calc_SR'] = df['SR'].apply(lambda x: x*x)
df['calc_RPI'] = df['RPI'].apply(lambda x: x*x)
df['calc_BPD'] = df['BPD'].apply(lambda x: x*x)
df['calc_dot_percentage'] = df['dot_percentage'].apply(lambda x: x*x)

sq_sr,sq_rpi,sq_bpd,sq_dot_percentage = np.sqrt(df[['calc_SR', 'calc_RPI', 'calc_BPD', 'calc_dot_percentage']].sum(axis=0))

df['calc_SR'] = df['calc_SR'].apply(lambda x: x/sq_sr)
df['calc_RPI'] = df['calc_RPI'].apply(lambda x: x/sq_rpi)
df['calc_BPD'] = df['calc_BPD'].apply(lambda x: x/sq_bpd)
df['calc_dot_percentage'] = df['calc_dot_percentage'].apply(lambda x: x/sq_dot_percentage)

df['calc_SR'] = df['calc_SR'].apply(lambda x: x*wt_sr)
df['calc_RPI'] = df['calc_RPI'].apply(lambda x: x*wt_rpi)
df['calc_BPD'] = df['calc_BPD'].apply(lambda x: x*wt_bpd)
df['calc_dot_percentage'] = df['calc_dot_percentage'].apply(lambda x: x*wt_dot_percentage)

best_sr, worst_sr = max(df['calc_SR']), min(df['calc_SR'])
best_rpi, worst_rpi = max(df['calc_RPI']), min(df['calc_RPI'])
best_BPD, worst_BPD = max(df['calc_BPD']), min(df['calc_BPD'])
best_dot_percentage, worst_dot_percentage = max(df['calc_dot_percentage']), min(df['calc_dot_percentage'])

In [17]:
df['dev_best_SR'] = df['calc_SR'].apply(lambda x: (x-best_sr)*(x-best_sr))
df['dev_best_RPI'] = df['calc_RPI'].apply(lambda x: (x-best_rpi)*(x-best_rpi))
df['dev_best_BPD'] = df['calc_BPD'].apply(lambda x: (x-best_BPD)*(x-best_BPD))
df['dev_best_dot_percentage'] = df['calc_dot_percentage'].apply(lambda x: (x-best_dot_percentage)*(x-best_dot_percentage))

df['dev_best_sqrt'] = df.apply(lambda x: x['dev_best_SR'] + x['dev_best_RPI'] + x['dev_best_BPD'] + x['dev_best_dot_percentage'], axis= 1)

df['dev_worst_SR'] = df['calc_SR'].apply(lambda x: (x-worst_sr)*(x-worst_sr))
df['dev_worst_RPI'] = df['calc_RPI'].apply(lambda x: (x-worst_rpi)*(x-worst_rpi))
df['dev_worst_BPD'] = df['calc_BPD'].apply(lambda x: (x-worst_BPD)*(x-worst_BPD))
df['dev_worst_dot_percentage'] = df['calc_dot_percentage'].apply(lambda x: (x-worst_dot_percentage)*(x-worst_dot_percentage))

df['dev_worst_sqrt'] = df.apply(lambda x: x['dev_worst_SR'] + x['dev_worst_RPI'] + x['dev_worst_BPD'] + x['dev_worst_dot_percentage'], axis= 1)

In [18]:
df['score'] = df.apply(lambda x: x['dev_worst_sqrt']/(x['dev_worst_sqrt']+x['dev_best_sqrt']), axis= 1)

In [19]:
df[['striker', 'score']].head()

Unnamed: 0,striker,score
3,AK Markram,0.105685
7,Abhishek Sharma,0.247957
14,B Sai Sudharsan,0.603412
15,C Green,0.10918
16,D Padikkal,0.27583


In [20]:
df[['striker', 'innings', 'runs', 'balls', 'dismissals', 'SR', 'RPI', 'dot_percentage', 'score']].sort_values(['score'], ascending= False).reset_index(drop= True).head(25)

Unnamed: 0,striker,innings,runs,balls,dismissals,SR,RPI,dot_percentage,score
0,B Sai Sudharsan,4,158,92,2,171.73913,39.5,21.73913,0.603412
1,GJ Maxwell,3,64,33,2,193.939394,21.333333,21.212121,0.5477
2,DA Warner,2,65,44,1,147.727273,32.5,29.545455,0.443541
3,HR Shokeen,2,42,43,2,97.674419,21.0,48.837209,0.372852
4,N Pooran,4,83,63,0,131.746032,20.75,28.571429,0.35996
5,RK Singh,3,60,55,0,109.090909,20.0,40.0,0.346692
6,DA Miller,4,86,64,2,134.375,21.5,37.5,0.333798
7,LS Livingstone,2,44,37,1,118.918919,22.0,40.540541,0.321783
8,Shubman Gill,5,130,86,4,151.162791,26.0,27.906977,0.321124
9,Tilak Varma,4,103,89,2,115.730337,25.75,37.078652,0.292356


#### From the above analysis, I conclude that B Sai Sudarshan is having a good SR, RPI and comparatively less dot percentage against CSK in M Chidambaram Stadium in Middle Over against 3 spinners
