In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current sessionP

/kaggle/input/bowlers-data-2016to2022/final_aggregated_bowler_data.csv
/kaggle/input/batsman-data-2016to2022/final_aggregated_batsman_data.csv


# Obtaining data from csv below which has been cleaned up


In [2]:
import pandas as pd
ipl_batsman_stats_data = pd.read_csv('/kaggle/input/batsman-data-2016to2022/final_aggregated_batsman_data.csv')
ipl_bowler_stats_data =  pd.read_csv('/kaggle/input/bowlers-data-2016to2022/final_aggregated_bowler_data.csv')
advanced_features_batting = ['Player','Mat','Inns','Avg', 'SR', 'BF','4s','6s'] 
advanced_features_bowling = ['Player','Mat','Inns','Avg', 'Econ', 'SR'] 
data_for_clustering_batsman = ipl_batsman_stats_data[advanced_features_batting]
data_for_clustering_bowler = ipl_bowler_stats_data[advanced_features_bowling]
print(data_for_clustering_batsman.head())



           Player  Mat  Inns        Avg          SR    BF   4s   6s
0  Shikhar Dhawan  110   110  38.561429  129.822857  2825  404   82
1     Virat Kohli  100   100  41.030000  130.228571  2618  302  108
2    David Warner   79    79  49.925000  138.330000  2368  327  113
3        KL Rahul   69    67  54.270000  141.684000  2094  258  126
4    Rishabh Pant   98    97  34.004286  146.625714  1918  260  129


# Here, we normalize the data using min-max scaling and then accordingly rank the players based on a Weighted Perfomance Score 


In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the data
ipl_batsman_stats_data = pd.read_csv('/kaggle/input/batsman-data-2016to2022/final_aggregated_batsman_data.csv')
ipl_bowler_stats_data = pd.read_csv('/kaggle/input/bowlers-data-2016to2022/final_aggregated_bowler_data.csv')

advanced_features_batting = ['Player', 'Mat', 'Inns', 'Avg', 'SR', 'BF', '4s', '6s', 'Runs']
advanced_features_bowling = ['Player', 'Mat', 'Inns', 'Avg', 'Econ', 'SR', 'Wkts']

data_for_clustering_batsman = ipl_batsman_stats_data[advanced_features_batting].copy()
data_for_clustering_bowler = ipl_bowler_stats_data[advanced_features_bowling].copy()

scaler = MinMaxScaler()

data_for_clustering_batsman.loc[:, advanced_features_batting[2:]] = scaler.fit_transform(
    data_for_clustering_batsman.loc[:, advanced_features_batting[2:]]
)

data_for_clustering_bowler.loc[:, advanced_features_bowling[2:]] = scaler.fit_transform(
    data_for_clustering_bowler.loc[:, advanced_features_bowling[2:]]
)

# Define a weighted performance metric for batsmen
data_for_clustering_batsman['Weighted_Performance_Score'] = (
    data_for_clustering_batsman['Mat'] * (
        data_for_clustering_batsman['Avg'] + 
        data_for_clustering_batsman['SR'] + 
        data_for_clustering_batsman['BF'] + 
        data_for_clustering_batsman['4s'] + 
        data_for_clustering_batsman['6s'] +
        data_for_clustering_batsman['Runs']
    )
)

data_for_clustering_bowler['Weighted_Performance_Score'] = (
    data_for_clustering_bowler['Mat'] * (
        data_for_clustering_bowler['Avg'] + 
        data_for_clustering_bowler['Econ'] + 
        data_for_clustering_bowler['SR'] +
        data_for_clustering_bowler['Wkts']
    )
)

sorted_batsmen = data_for_clustering_batsman.sort_values(by='Weighted_Performance_Score', ascending=False).reset_index(drop=True)
sorted_bowlers = data_for_clustering_bowler.sort_values(by='Weighted_Performance_Score', ascending=False).reset_index(drop=True)

# Assign tiers (10 players per tier)
def assign_tiers(df, tier_size=10):
    df['Tier'] = (df.index // tier_size) + 1
    return df

sorted_batsmen = assign_tiers(sorted_batsmen)
sorted_bowlers = assign_tiers(sorted_bowlers)

# Reset the index to start from 1
sorted_batsmen.index = sorted_batsmen.index + 1
sorted_bowlers.index = sorted_bowlers.index + 1

# Display the top tiers
print("Top 3 Tiers for Batsmen:")
print(sorted_batsmen.head(30))

print("\nTop 3 Tiers for Bowlers:")
print(sorted_bowlers.head(30))

sorted_batsmen.to_csv('/kaggle/working/batsmen_tiers.csv', index=True)
sorted_bowlers.to_csv('/kaggle/working/bowlers_tiers.csv', index=True)


Top 3 Tiers for Batsmen:
              Player  Mat      Inns       Avg        SR        BF        4s  \
1     Shikhar Dhawan  110  1.000000  0.710548  0.324557  1.000000  1.000000   
2        Virat Kohli  100  0.908257  0.756035  0.325571  0.926700  0.747525   
3       Rishabh Pant   98  0.880734  0.626576  0.366564  0.678824  0.643564   
4       Sanju Samson  100  0.908257  0.572587  0.350943  0.702195  0.542079   
5       David Warner   79  0.715596  0.919937  0.345825  0.838173  0.809406   
6       Rohit Sharma   99  0.889908  0.518782  0.318471  0.691218  0.606436   
7        Jos Buttler   82  0.733945  0.712469  0.373479  0.669263  0.685644   
8           MS Dhoni  105  0.816514  0.790703  0.315257  0.544263  0.316832   
9     Dinesh Karthik  107  0.926606  0.617705  0.357118  0.537535  0.514851   
10    AB de Villiers   80  0.697248  0.780265  0.390383  0.576487  0.482673   
11  Suryakumar Yadav   93  0.761468  0.575456  0.337457  0.601275  0.633663   
12          KL Rahul   69  

 0.90825688 0.69724771 0.88990826 0.78899083 0.76146789 0.7706422
 0.7706422  0.9266055  0.69724771 0.66055046 0.81651376 0.57798165
 0.74311927 0.64220183 0.66055046 0.63302752 0.48623853 0.83486239
 0.65137615 0.66972477 0.5412844  0.77981651 0.58715596 0.56880734
 0.66972477 0.58715596 0.47706422 0.58715596 0.55963303 0.7706422
 0.34862385 0.34862385 0.41284404 0.32110092 0.4587156  0.48623853
 0.69724771 0.41284404 0.59633028 0.32110092 0.53211009 0.65137615
 0.3853211  0.37614679 0.39449541 0.37614679 0.6146789  0.39449541
 0.40366972 0.29357798 0.41284404 0.35779817 0.3853211  0.40366972
 0.29357798 0.30275229 0.2293578  0.27522936 0.12844037 0.34862385
 0.13761468 0.21100917 0.17431193 0.19266055 0.20183486 0.20183486
 0.33944954 0.1559633  0.33027523 0.12844037 0.22018349 0.23853211
 0.26605505 0.13761468 0.39449541 0.14678899 0.11926606 0.22018349
 0.24770642 0.09174312 0.11926606 0.3853211  0.26605505 0.14678899
 0.20183486 0.11009174 0.29357798 0.3853211  0.18348624 0.119266