In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('FINAL.csv')

In [3]:
df['wicket_type']=df['wicket_type'].fillna(0)

In [4]:
for index,row in enumerate(df['wicket_type']):
    if row=='obstructing the field' or row=='retired hurt' or row=='run out' or row==0:
        df.loc[index,'wicket']=0
    else:
        df.loc[index,'wicket']=1

In [5]:
def balls_per_dismissal(balls, dismissals):
    if dismissals > 0:
        return balls/dismissals
    else:
#         return math.inf
        return balls/1
    
def runs_per_ball(runs_conceeded, balls):
    if balls > 0:
        return runs_conceeded/balls
    else:
        return 1000
    
def runs_per_dismissal(runs_conceeded, dismissals):
    if dismissals > 0:
        return runs_conceeded/dismissals
    elif (runs_conceeded == 0):
        return 1000
    else:
        return runs_conceeded/1

In [6]:
def ByCustom(df, current_phase, current_venue):
    
    df = df[df.over_type == current_phase]
    df = df[df.venue == current_venue]
    df.reset_index(inplace = True, drop = True)

    df['isDot'] = df['Total_runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['Total_runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['Total_runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['Total_runs'].apply(lambda x: 1 if x == 3 else 0)

    df['isFour'] = df['runs_off_bat'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['runs_off_bat'].apply(lambda x: 1 if x == 6 else 0)

    runs = pd.DataFrame(df.groupby(['bowler'])['Total_runs'].sum()).reset_index().rename(columns = {'Total_runs':'runs'})
    innings = pd.DataFrame(df.groupby(['bowler'])['match_id'].apply(lambda x: len(list(np.unique(x))))).reset_index().rename(columns = {'match_id':'innings'})
    balls = pd.DataFrame(df.groupby(['bowler'])['match_id'].count()).reset_index().rename(columns = {'match_id':'balls'})
    dismissals = pd.DataFrame(df.groupby(['bowler'])['wicket'].sum()).reset_index().rename(columns = {'wicket':'dismissals'})

    dots = pd.DataFrame(df.groupby(['bowler'])['isDot'].sum()).reset_index().rename(columns = {'isDot':'dots'})
    ones = pd.DataFrame(df.groupby(['bowler'])['isOne'].sum()).reset_index().rename(columns = {'isOne':'ones'})
    twos = pd.DataFrame(df.groupby(['bowler'])['isTwo'].sum()).reset_index().rename(columns = {'isTwo':'twos'})
    threes = pd.DataFrame(df.groupby(['bowler'])['isThree'].sum()).reset_index().rename(columns = {'isThree':'threes'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})

    df = pd.merge(innings, balls, on = 'bowler').merge(runs, on = 'bowler').merge(dismissals, on = 'bowler').merge(dots, on = 'bowler')
    df['dot%'] = df.apply(lambda x: 100*(x['dots']/x['balls']), axis = 1)
    
    # balls_per_dismissal, runs_per_ball, runs_per_dismissal
    df['SR'] = df.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis = 1)
    df['Eco'] = df.apply(lambda x: 6*runs_per_ball(x['runs'], x['balls']), axis = 1)
    df['Avg'] = df.apply(lambda x: runs_per_dismissal(int(x['runs']), x['dismissals']), axis = 1)
    
    return df

In [7]:
def normalization(df1):
    max_val = max(df1['dot%'])
    min_val = min(df1['dot%'])

    df1['mx_dot%'] = df1['dot%'].apply(lambda x: (x-min_val)/(max_val - min_val))

    max_val = max(df1['Eco'])
    min_val = min(df1['Eco'])

    df1['mx_Eco'] = df1['Eco'].apply(lambda x: (x-min_val)/(max_val - min_val))


    max_val = max(df1['SR'])
    min_val = min(df1['SR'])

    df1['mx_SR'] = df1['SR'].apply(lambda x: (x-min_val)/(max_val - min_val))

    max_val = max(df1['Avg'])
    min_val = min(df1['Avg'])

    df1['mx_Avg'] = df1['Avg'].apply(lambda x: (x-min_val)/(max_val - min_val))

    df1['minmax_score'] = df1.apply(lambda x: x['mx_dot%']*0.13 + x['mx_Eco']*0.16 + x['mx_SR']*0.35 + x['mx_Avg']*0.35, axis = 1)
    display(df1.sort_values('minmax_score', ascending = True).head(10))

In [8]:
def standardization(df2):
    mean_val = np.mean(df2['Eco'])
    std_val = np.std(df2['Eco'])

    df2['z_Eco'] = df2['Eco'].apply(lambda x: (x - mean_val)/std_val)

    mean_val = np.mean(df2['dot%'])
    std_val = np.std(df2['dot%'])

    df2['z_dot%'] = df2['dot%'].apply(lambda x: (x - mean_val)/std_val)

    mean_val = np.mean(df2['SR'])
    std_val = np.std(df2['SR'])

    df2['z_SR'] = df2['SR'].apply(lambda x: (x - mean_val)/std_val)

    mean_val = np.mean(df2['Avg'])
    std_val = np.std(df2['Avg'])

    df2['z_Avg'] = df2['Avg'].apply(lambda x: (x - mean_val)/std_val)

    df2['Z_score'] = df2.apply(lambda x: x['z_dot%']*0.13 + x['z_Eco']*0.16 + x['z_SR']*0.35 + x['z_Avg']*0.35, axis = 1)
    display(df2.sort_values('Z_score', ascending = True).head(10))

In [9]:
wt_sr, wt_dot, wt_eco, wt_avg = 0.35, 0.13, 0.16, 0.35
def topis_method(df3):    
    df3['calc_SR'] = df3['SR'].apply(lambda x: x*x) 
    df3['calc_dot%'] = df3['dot%'].apply(lambda x: x*x) 
    df3['calc_Eco'] = df3['Eco'].apply(lambda x: x*x) 
    df3['calc_Avg'] = df3['Avg'].apply(lambda x: x*x)

    sq_sr, sq_dot, sq_Eco, sq_Avg = np.sqrt(df3[['calc_SR','calc_dot%', 'calc_Eco', 'calc_Avg']].sum(axis = 0))

    df3['calc_SR'] = df3['calc_SR'].apply(lambda x: x/sq_sr) 
    df3['calc_dot%'] = df3['calc_dot%'].apply(lambda x: x/sq_dot) 
    df3['calc_Eco'] = df3['calc_Eco'].apply(lambda x: x/sq_Eco) 
    df3['calc_Avg'] = df3['calc_Avg'].apply(lambda x: x/sq_Avg)

    df3['calc_SR'] = df3['calc_SR'].apply(lambda x: x*wt_sr) 
    df3['calc_dot%'] = df3['calc_dot%'].apply(lambda x: x*wt_dot) 
    df3['calc_Eco'] = df3['calc_Eco'].apply(lambda x: x*wt_eco) 
    df3['calc_Avg'] = df3['calc_Avg'].apply(lambda x: x*wt_avg)

    best_sr, worst_sr = min(df3['calc_SR']), max(df3['calc_SR'])
    best_dot, worst_dot = min(df3['calc_dot%']), max(df3['calc_dot%'])
    best_Eco, worst_Eco = min(df3['calc_Eco']), max(df3['calc_Eco'])
    best_Avg, worst_Avg = min(df3['calc_Avg']), max(df3['calc_Avg'])

    df3['dev_best_SR'] = df3['calc_SR'].apply(lambda x: (x-best_sr)*(x-best_sr)) 
    df3['dev_best_dot%'] = df3['calc_dot%'].apply(lambda x: (x-best_dot)*(x-best_dot)) 
    df3['dev_best_Eco'] = df3['calc_Eco'].apply(lambda x: (x-best_Eco)*(x-best_Eco)) 
    df3['dev_best_Avg'] = df3['calc_Avg'].apply(lambda x: (x-best_Avg)*(x-best_Avg))

    df3['dev_best_sqrt'] = df3.apply(lambda x: x['dev_best_SR'] + x['dev_best_dot%'] + x['dev_best_Eco'] + x['dev_best_Avg'], axis = 1) 

    df3['dev_worst_SR'] = df3['calc_SR'].apply(lambda x: (x-worst_sr)*(x-worst_sr)) 
    df3['dev_worst_dot%'] = df3['calc_dot%'].apply(lambda x: (x-worst_dot)*(x-worst_dot)) 
    df3['dev_worst_Eco'] = df3['calc_Eco'].apply(lambda x: (x-worst_Eco)*(x-worst_Eco)) 
    df3['dev_worst_Avg'] = df3['calc_Avg'].apply(lambda x: (x-worst_Avg)*(x-worst_Avg))

    df3['dev_worst_sqrt'] = df3.apply(lambda x: x['dev_worst_SR'] + x['dev_worst_dot%'] + x['dev_worst_Eco'] + x['dev_worst_Avg'], axis = 1) 
    df3['topsis_score'] = df3.apply(lambda x: x['dev_worst_sqrt']/(x['dev_worst_sqrt'] + x['dev_best_sqrt']), axis = 1)
    display(df3[['bowler','innings','balls','runs','dismissals','dots','dot%','SR','Eco','Avg','topsis_score']].sort_values(['topsis_score'], ascending = False).reset_index(drop = True).head(10))

In [10]:
#######################################################################################################

                                ##  Names of the stadium

#Chepauk Stadium(Chennai) 
#Chinnaswamy Stadium(Banglore)
#Dr DY Patil Sports Academy
#Dr. Y.S. Rajasekhara Reddy Cricket Stadium(vizag)
#Dubai International Cricket Stadium  
#Eden Gardens
#Feroz shah kotla(Delhi) 
#Kingsmead',
#Motera Stadium(Ahemdabad) 
#PCA Stadium Mohali
#Pune stadium
#Rajiv Gandhi stadium(Hyderbad)
#Sawai Mansingh Stadium
#Sharjah Cricket Stadium 
#Sheikh Zayed Stadium
#SuperSport Park
#Wankhede Stadium(Mumbai)
######################################################################################################

############################################Phase###############################################
#PowerPlay 
#Middle overs 
#Death overs'

#################################################################################################

phase=input('Enter The phase: ')
stadium=input('Enter the Stadium')
df1 = ByCustom(df, phase, stadium)
df1 = df1[df1.balls >= 25]
print('\t\t\t\t\t Using Normalization ')
normalization(df1)
print('\t\t\t\t\t Using Standardization ')
standardization(df1)
print('\t\t\t\t\tUsing Topis Method')
topis_method(df1)


Enter The phase: Death overs
Enter the StadiumWankhede Stadium(Mumbai)
					 Using Normalization 


Unnamed: 0,bowler,innings,balls,runs,dismissals,dots,dot%,SR,Eco,Avg,mx_dot%,mx_Eco,mx_SR,mx_Avg,minmax_score
22,B Kumar,4,26,29,5.0,11,42.307692,5.2,6.692308,5.8,0.995589,0.136795,0.0,0.0,0.151314
128,SL Malinga,37,339,402,41.0,116,34.218289,8.268293,7.115044,9.804878,0.689454,0.198823,0.061612,0.040372,0.157135
100,P Kumar,5,30,48,4.0,7,23.333333,7.5,9.6,12.0,0.277523,0.563435,0.046185,0.0625,0.164267
38,DE Bollinger,2,25,24,2.0,9,36.0,12.5,5.76,12.0,0.756881,0.0,0.146586,0.0625,0.171575
106,R Ashwin,4,25,45,3.0,4,16.0,8.333333,10.8,15.0,0.0,0.739508,0.062918,0.092742,0.172802
49,Harbhajan Singh,7,42,68,5.0,11,26.190476,8.4,9.714286,13.6,0.385649,0.580204,0.064257,0.078629,0.192977
32,CH Morris,4,45,73,7.0,15,33.333333,6.428571,9.733333,10.428571,0.655963,0.582998,0.02467,0.046659,0.20352
105,PP Ojha,5,33,35,3.0,14,42.424242,11.0,6.363636,11.666667,1.0,0.08857,0.116466,0.05914,0.205633
12,AJ Tye,2,26,33,2.0,9,34.615385,13.0,7.615385,16.5,0.704481,0.272236,0.156627,0.107863,0.227712
35,CR Woakes,5,38,73,4.0,8,21.052632,9.5,11.526316,18.25,0.191212,0.846079,0.086345,0.125504,0.234377


					 Using Standardization 


Unnamed: 0,bowler,innings,balls,runs,dismissals,dots,dot%,SR,Eco,Avg,mx_dot%,mx_Eco,mx_SR,mx_Avg,minmax_score,z_Eco,z_dot%,z_SR,z_Avg,Z_score
22,B Kumar,4,26,29,5.0,11,42.307692,5.2,6.692308,5.8,0.995589,0.136795,0.0,0.0,0.151314,-1.922709,2.259113,-1.124565,-1.15632,-0.812259
128,SL Malinga,37,339,402,41.0,116,34.218289,8.268293,7.115044,9.804878,0.689454,0.198823,0.061612,0.040372,0.157135,-1.650933,0.999793,-0.81072,-0.931794,-0.744056
100,P Kumar,5,30,48,4.0,7,23.333333,7.5,9.6,12.0,0.277523,0.563435,0.046185,0.0625,0.164267,-0.053363,-0.694725,-0.889306,-0.808728,-0.693164
38,DE Bollinger,2,25,24,2.0,9,36.0,12.5,5.76,12.0,0.756881,0.0,0.146586,0.0625,0.171575,-2.522086,1.277161,-0.377874,-0.808728,-0.652814
106,R Ashwin,4,25,45,3.0,4,16.0,8.333333,10.8,15.0,0.0,0.739508,0.062918,0.092742,0.172802,0.718113,-1.836344,-0.804068,-0.640538,-0.629439
49,Harbhajan Singh,7,42,68,5.0,11,26.190476,8.4,9.714286,13.6,0.385649,0.580204,0.064257,0.078629,0.192977,0.020111,-0.249939,-0.797248,-0.719027,-0.559971
32,CH Morris,4,45,73,7.0,15,33.333333,6.428571,9.733333,10.428571,0.655963,0.582998,0.02467,0.046659,0.20352,0.032357,0.862027,-0.998899,-0.896828,-0.546264
105,PP Ojha,5,33,35,3.0,14,42.424242,11.0,6.363636,11.666667,1.0,0.08857,0.116466,0.05914,0.205633,-2.13401,2.277257,-0.531304,-0.827416,-0.52095
12,AJ Tye,2,26,33,2.0,9,34.615385,13.0,7.615385,16.5,0.704481,0.272236,0.156627,0.107863,0.227712,-1.329266,1.061611,-0.326731,-0.556443,-0.383784
35,CR Woakes,5,38,73,4.0,8,21.052632,9.5,11.526316,18.25,0.191212,0.846079,0.086345,0.125504,0.234377,1.185059,-1.049774,-0.684733,-0.458333,-0.346934


					Using Topis Method


Unnamed: 0,bowler,innings,balls,runs,dismissals,dots,dot%,SR,Eco,Avg,topsis_score
0,P Kumar,5,30,48,4.0,7,23.333333,7.5,9.6,12.0,0.999745
1,R Ashwin,4,25,45,3.0,4,16.0,8.333333,10.8,15.0,0.999598
2,Harbhajan Singh,7,42,68,5.0,11,26.190476,8.4,9.714286,13.6,0.99952
3,A Nehra,6,43,84,5.0,10,23.255814,8.6,11.72093,16.8,0.999246
4,CR Woakes,5,38,73,4.0,8,21.052632,9.5,11.526316,18.25,0.999074
5,HV Patel,3,34,68,4.0,9,26.470588,8.5,12.0,17.0,0.999058
6,CH Morris,4,45,73,7.0,15,33.333333,6.428571,9.733333,10.428571,0.999028
7,KA Pollard,14,82,144,8.0,21,25.609756,10.25,10.536585,18.0,0.998975
8,SL Malinga,37,339,402,41.0,116,34.218289,8.268293,7.115044,9.804878,0.998937
9,SP Narine,6,52,84,5.0,16,30.769231,10.4,9.692308,16.8,0.998779
