In [1]:
import os
import pandas as pd
import numpy as np
from scipy.stats import zscore
import warnings
warnings.filterwarnings("ignore")

In [2]:
def load_data(file_path , filename):
    csv_path = os.path.join(file_path, filename)
    return pd.read_csv(csv_path)

In [3]:
def write_csv_data(file_path, filename, df):
    isExist = os.path.exists(file_path)
    if not isExist:
        os.makedirs(file_path)
        print("The new directory is created!")
    csv_path = os.path.join(file_path, filename)
    df.to_csv(csv_path)
    
    if os.path.exists(csv_path) and os.path.getsize(csv_path) > 0:
        print(filename + " was written to successfully!")

In [4]:
print(os.getcwd())
batting_file_path = "Cleaned/Batting"


# All batting dataframes
odi_data = load_data(batting_file_path, "odi_data.csv")
t20_data = load_data(batting_file_path, "t20_data.csv")

C:\Users\sheru\Documents\GitHub\t20-blocks


In [5]:
def remove_unnamed_cols(df_data):
    unnamed_cols = df_data.filter(regex='Unnamed').columns
    df_data.drop(columns=unnamed_cols, inplace=True)
    return df_data

In [6]:
t20_data = remove_unnamed_cols(t20_data)
t20_data


Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,4s,6s,Final Region
0,V Kohli,2010,75,70,20,2633,94,52,1907,138,0,24,2,247,71,INDIA
1,RG Sharma,2007,104,96,14,2633,118,32,1905,138,4,19,6,234,120,INDIA
2,MJ Guptill,2009,83,80,7,2436,105,33,1810,134,2,15,2,215,113,NZ
3,Shoaib Malik,2006,111,104,30,2263,75,30,1824,124,0,7,1,186,61,PAK
4,BB McCullum,2005,71,70,10,2140,123,35,1571,136,2,13,3,199,91,NZ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1730,SR Welsh,2008,1,0,0,0,0,0,0,0,0,0,0,0,0,CAN
1731,SG Whittingham,2018,3,0,0,0,0,0,0,0,0,0,0,0,0,SCOT
1732,LJ Woodcock,2010,3,0,0,0,0,0,0,0,0,0,0,0,0,NZ
1733,Zamir Khan,2012,1,0,0,0,0,0,0,0,0,0,0,0,0,AFG


In [7]:
odi_data = remove_unnamed_cols(odi_data)
odi_data

Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,Final Region
0,SR Tendulkar,1989,463,452,41,18426,200,44,21367,86,49,96,20,INDIA
1,KC Sangakkara,2000,404,380,41,14234,169,41,18048,78,25,93,15,SL
2,RT Ponting,1995,375,365,39,13704,164,42,17046,80,30,82,20,AUS
3,ST Jayasuriya,1989,445,433,18,13430,189,32,14725,91,28,68,34,SL
4,DPMD Jayawardene,1998,448,418,39,12650,144,33,16020,78,19,77,28,SL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2480,ZS Ansari,2015,1,0,0,0,0,0,0,0,0,0,0,ENG
2481,Ariful Haque,2018,1,0,0,0,0,0,0,0,0,0,0,BDESH
2482,Ashfaq Ahmed,1994,3,0,0,0,0,0,0,0,0,0,0,PAK
2483,MD Bailey,1998,1,0,0,0,0,0,0,0,0,0,0,NZ


In [8]:
def calc_Z(data_col):
    data_col_z = zscore(data_col)
    data_col_z = data_col_z + abs(min(data_col_z))
    data_col = data_col_z
    return data_col

def calc_priority_Z(priority_Z):
    min_p = min(priority_Z)
    range_p = max(priority_Z) - min_p
    priority_Z = ((priority_Z - min_p) / range_p) * 10
    return priority_Z

def rate_player_t20(t20_df):
    priority_list = []
    #Priority 1 - Average
    avg_ave = t20_df['Ave'].mean()
    std_ave = t20_df['Ave'].std()
    ave_Z = zscore(t20_df['Ave'])
    #Shifting z score
    ave_Z = ave_Z + abs(min(ave_Z))
    bf_Z = zscore(t20_df['BF'])
    bf_Z = bf_Z + abs(min(bf_Z))
    mat_Z = zscore(t20_df['Mat'])
    mat_Z = mat_Z + abs(min(mat_Z))
    priority1_Z = (2*ave_Z) * bf_Z * mat_Z
    min_p1 = min(priority1_Z)
    range_p1 = max(priority1_Z) - min_p1
    priority1_Z = ((priority1_Z - min_p1) / range_p1) * 10
    t20_df['p1_z'] = priority1_Z
    priority_list.append(priority1_Z)
    
    #Priority 2 - Strike Rate
    sr_Z = calc_Z(t20_df['SR'])
    runs_Z = calc_Z(t20_df['Runs'])
    priority2_Z = (2*sr_Z) * runs_Z
    priority2_Z = calc_priority_Z(priority2_Z)
    t20_df['p2_z'] = priority2_Z
    priority_list.append(priority2_Z)
    
    #Priority 3 - 100
    hund_Z = calc_Z(t20_df['100'])
    priority3_Z = calc_priority_Z(hund_Z)
    t20_df['p3_z'] = priority3_Z
    priority_list.append(priority3_Z)
    
    #Priority 4 - 50, 4 and 6
    fifty_Z = calc_Z(t20_df['50'])
    four_Z = calc_Z(t20_df['4s'])
    six_Z = calc_Z(t20_df['6s'])
    priority4_Z = fifty_Z * four_Z * six_Z
    priority4_Z = calc_priority_Z(priority4_Z)
    t20_df['p4_z'] = priority4_Z
    priority_list.append(priority4_Z)
    
    rating = 1
    
    #Final Rating calculation
    for i in range(len(priority_list)):
        rating+=((len(priority_list)-i+1) * priority_list[i])
    
    #Normalising rating
    min_r = min(rating)
    range_rating = max(rating) - min_r
    rating = ((rating - min_r) / range_rating) * 10
    t20_df['rating'] = rating
    print(rating)

In [9]:
rate_player_t20(t20_data)

0        7.966242
1       10.000000
2        7.378105
3        5.639427
4        6.182289
          ...    
1730     0.000000
1731     0.000000
1732     0.000000
1733     0.000000
1734     0.000000
Length: 1735, dtype: float64


In [10]:
zero_vals = (t20_data['rating']==0).sum()
zero_vals

212

In [11]:
t20_data

Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,...,50,0,4s,6s,Final Region,p1_z,p2_z,p3_z,p4_z,rating
0,V Kohli,2010,75,70,20,2633,94,52,1907,138,...,24,2,247,71,INDIA,10.000000,10.000000,0.0,7.888889,7.966242
1,RG Sharma,2007,104,96,14,2633,118,32,1905,138,...,19,6,234,120,INDIA,8.556505,10.000000,10.0,10.000000,10.000000
2,MJ Guptill,2009,83,80,7,2436,105,33,1810,134,...,15,2,215,113,NZ,6.674529,8.983636,5.0,6.830578,7.378105
3,Shoaib Malik,2006,111,104,30,2263,75,30,1824,124,...,7,1,186,61,PAK,8.202628,7.722827,0.0,1.488641,5.639427
4,BB McCullum,2005,71,70,10,2140,123,35,1571,136,...,13,3,199,91,NZ,5.245133,8.009820,5.0,4.412524,6.182289
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1730,SR Welsh,2008,1,0,0,0,0,0,0,0,...,0,0,0,0,CAN,0.000000,0.000000,0.0,0.000000,0.000000
1731,SG Whittingham,2018,3,0,0,0,0,0,0,0,...,0,0,0,0,SCOT,0.000000,0.000000,0.0,0.000000,0.000000
1732,LJ Woodcock,2010,3,0,0,0,0,0,0,0,...,0,0,0,0,NZ,0.000000,0.000000,0.0,0.000000,0.000000
1733,Zamir Khan,2012,1,0,0,0,0,0,0,0,...,0,0,0,0,AFG,0.000000,0.000000,0.0,0.000000,0.000000


In [12]:
t20_data_sorted = t20_data.sort_values('rating', ascending=False)
t20_data_sorted

Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,...,50,0,4s,6s,Final Region,p1_z,p2_z,p3_z,p4_z,rating
1,RG Sharma,2007,104,96,14,2633,118,32,1905,138,...,19,6,234,120,INDIA,8.556505,10.000000,10.0,10.000000,10.000000
0,V Kohli,2010,75,70,20,2633,94,52,1907,138,...,24,2,247,71,INDIA,10.000000,10.000000,0.0,7.888889,7.966242
2,MJ Guptill,2009,83,80,7,2436,105,33,1810,134,...,15,2,215,113,NZ,6.674529,8.983636,5.0,6.830578,7.378105
4,BB McCullum,2005,71,70,10,2140,123,35,1571,136,...,13,3,199,91,NZ,5.245133,8.009820,5.0,4.412524,6.182289
3,Shoaib Malik,2006,111,104,30,2263,75,30,1824,124,...,7,1,186,61,PAK,8.202628,7.722827,0.0,1.488641,5.639427
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1596,H Ssenyondo,2019,4,1,1,0,0,0,0,0,...,0,0,0,0,UGA,0.000000,0.000000,0.0,0.000000,0.000000
1597,Taijul Islam,2019,2,1,1,0,0,0,1,0,...,0,0,0,0,BDESH,0.000000,0.000000,0.0,0.000000,0.000000
1598,M Zondeki,2006,1,1,0,0,0,0,1,0,...,0,1,0,0,SA,0.000000,0.000000,0.0,0.000000,0.000000
1599,Zulqarnain Haider,2019,5,1,0,0,0,0,0,0,...,0,1,0,0,ESP,0.000000,0.000000,0.0,0.000000,0.000000


In [13]:
write_csv_data("Cleaned/Batting", "t20_data_rating.csv", t20_data_sorted)

t20_data_rating.csv was written to successfully!


In [22]:
def rate_player_odi(odi_df):
    priority_list = []
    #Priority 1 - Average
    ave_Z = calc_Z(odi_df['Ave'])
    bf_Z = calc_Z(odi_df['BF'])
    mat_Z = calc_Z(odi_df['Mat'])
    priority1_Z = (2*ave_Z) * bf_Z * mat_Z
    priority1_Z = calc_priority_Z(priority1_Z)
    odi_df['p1_z'] = priority1_Z
    priority_list.append(priority1_Z)
    
    #Priority 2 - Strike Rate
    hund_Z = calc_Z(odi_df['100'])
    fifty_Z = calc_Z(odi_df['50'])
    priority2_Z = (2*hund_Z) * fifty_Z
    priority2_Z = calc_priority_Z(priority2_Z)
    odi_df['p2_z'] = priority2_Z
    priority_list.append(priority2_Z)
    
    #Priority 3 - 100
    sr_Z = calc_Z(odi_df['SR'])
    runs_Z = calc_Z(odi_df['Runs'])
    priority3_Z = (2*sr_Z) * runs_Z
    odi_df['p3_z'] = priority3_Z
    priority_list.append(priority3_Z)
    
    # #Priority 4 - 50, 4 and 6
    # four_Z = calc_Z(odi_df['4s'])
    # six_Z = calc_Z(odi_df['6s'])
    # priority4_Z = four_Z * six_Z
    # priority4_Z = calc_priority_Z(priority4_Z)
    # odi_df['p4_z'] = priority4_Z
    # priority_list.append(priority4_Z)
    
    rating = 1
    
    #Final Rating calculation
    for i in range(len(priority_list)):
        rating+=((len(priority_list)-i+1) * priority_list[i])
    
    #Normalising rating
    min_r = min(rating)
    range_rating = max(rating) - min_r
    rating = ((rating - min_r) / range_rating) * 10
    odi_df['rating'] = rating
    print(rating)

In [23]:
rate_player_odi(odi_data)

0       10.000000
1        6.690866
2        6.540932
3        6.657806
4        5.643075
          ...    
2480     0.000000
2481     0.000000
2482     0.000000
2483     0.000000
2484     0.000000
Length: 2485, dtype: float64


In [24]:
zero_vals = (odi_data['rating']==0).sum()
zero_vals

76

In [25]:
odi_data_sorted = odi_data.sort_values('rating', ascending=False)
odi_data_sorted

Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,Final Region,p1_z,p2_z,p3_z,rating
0,SR Tendulkar,1989,463,452,41,18426,200,44,21367,86,49,96,20,INDIA,10.000000,10.000000,72.043792,10.000000
1,KC Sangakkara,2000,404,380,41,14234,169,41,18048,78,25,93,15,SL,6.865620,4.942602,50.476428,6.690866
3,ST Jayasuriya,1989,445,433,18,13430,189,32,14725,91,28,68,34,SL,4.816705,4.047619,55.562842,6.657806
2,RT Ponting,1995,375,365,39,13704,164,42,17046,80,30,82,20,AUS,6.164604,5.229592,49.843024,6.540932
6,V Kohli,2008,242,233,39,11609,183,59,12445,93,43,55,13,INDIA,4.074044,5.027636,49.084549,6.051177
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2432,Ishtiaq Muhammad,2016,1,1,1,0,0,0,0,0,0,0,0,HKG,0.000000,0.000000,0.000000,0.000000
2431,SM Harwood,2009,1,1,0,0,0,0,4,0,0,0,1,AUS,0.000000,0.000000,0.000000,0.000000
2430,Harunur Rashid,1988,2,2,0,0,0,0,17,0,0,0,2,BDESH,0.000000,0.000000,0.000000,0.000000
2429,RG Hart,2002,2,1,0,0,0,0,9,0,0,0,1,NZ,0.000000,0.000000,0.000000,0.000000


In [26]:
write_csv_data("Cleaned/Batting", "odi_data_rating.csv", odi_data_sorted)

odi_data_rating.csv was written to successfully!
