## **Imports**

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

## **Batsmen in the playing 11**

First, we will predict the batsmen that will be included in the playing 11 of the finalist.

In [None]:
df = pd.read_csv('Batters2023.csv')
df.head()

Unnamed: 0,Player,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,4s,6s
0,GJ Maxwell (AUS),7,7,2,397,201,79.4,260,152.69,2,0,1,40,22
1,RG Sharma (IND),8,8,0,442,131,55.25,360,122.77,1,2,1,50,22
2,Q de Kock (SA),9,9,0,591,174,65.66,541,109.24,4,0,0,57,21
3,DA Warner (AUS),9,8,0,446,163,55.75,412,108.25,2,1,0,42,20
4,Fakhar Zaman (PAK),4,3,1,219,126,109.5,170,128.82,1,1,0,14,18


We will separate the team name from the column and create a new column for that.

In [None]:
df['Team'] = df['Player'].str.extract(r'\((.*?)\)')
df['Player'] = df['Player'].str.replace(r' \((.*?)\)', '')
df.head()

Unnamed: 0,Player,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,4s,6s,Team
0,GJ Maxwell,7,7,2,397,201,79.4,260,152.69,2,0,1,40,22,AUS
1,RG Sharma,8,8,0,442,131,55.25,360,122.77,1,2,1,50,22,IND
2,Q de Kock,9,9,0,591,174,65.66,541,109.24,4,0,0,57,21,SA
3,DA Warner,9,8,0,446,163,55.75,412,108.25,2,1,0,42,20,AUS
4,Fakhar Zaman,4,3,1,219,126,109.5,170,128.82,1,1,0,14,18,PAK


In [None]:
df.dtypes

Player     object
Mat         int64
Inns        int64
NO          int64
Runs        int64
HS          int64
Ave        object
BF          int64
SR        float64
100         int64
50          int64
0           int64
4s          int64
6s          int64
Team       object
dtype: object

Since average is a number, we will replace the non-numerical values in the dataset with 0 and convert it to float.

In [None]:
df['Ave'] = df['Ave'].replace('-', '0')
df['Ave'] = df['Ave'].astype(float)

We will perform label encoding on Player and Team columns.

In [None]:
df_encoded = df.copy()
le1_player = LabelEncoder()
le1_team = LabelEncoder()
df_encoded['Player'] = le1_player.fit_transform(df_encoded['Player'])
df_encoded['Team'] = le1_team.fit_transform(df_encoded['Team'])
df_encoded.head()

Unnamed: 0,Player,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,4s,6s,Team
0,24,7,7,2,397,201,79.4,260,152.69,2,0,1,40,22,1
1,67,8,8,0,442,131,55.25,360,122.77,1,2,1,50,22,4
2,64,9,9,0,591,174,65.66,541,109.24,4,0,0,57,21,8
3,15,9,8,0,446,163,55.75,412,108.25,2,1,0,42,20,1
4,21,4,3,1,219,126,109.5,170,128.82,1,1,0,14,18,7


Since the batsmen who have played the most number of matches are most probable to play the final, we have used the number of matches as the target variable for our model. Also, since it is non-categorical, we will perform regression on the dataset.

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(df_encoded.drop(columns=['Mat']))
y = pd.get_dummies(df_encoded['Mat'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Dense(64, input_dim=X.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

model.fit(X_train, y_train, verbose=False, epochs=50, batch_size=32, validation_data=(X_test, y_test))

loss, mae = model.evaluate(X_test, y_test)
print(f"Model Mean Absolute Error: {mae}")

Model Mean Absolute Error: 0.2731572091579437


### The input teams i.e. the finalists have been predicted in task 3.

In [None]:
input_team = 'IND'

We will predict the batsmen who are most probable to play the final by predicting the number of matches played and sorting the players of that team according to that.

In [None]:
encoded_input_team = le1_team.transform([input_team])
team_mask = df_encoded['Team'] == encoded_input_team[0]

predicted_matches = model.predict(X)
df_encoded['PredictedMatches'] = predicted_matches
team_df = df_encoded[team_mask]

top_batters = team_df.sort_values(by='PredictedMatches', ascending=False).head(7)
top_batters['Player'] = le1_player.inverse_transform(top_batters['Player'])
top_batters_list = top_batters['Player'].tolist()

print("Top 7 Batters for", input_team)
for batter in top_batters_list:
  print(batter)

Top 7 Batters for IND
V Kohli
KL Rahul
RA Jadeja
SA Yadav
RG Sharma
Shubman Gill
SS Iyer


## **Bowlers in the playing 11**

In [None]:
df = pd.read_csv('Bowlers2023.csv')
df.head()

Unnamed: 0,Player,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
0,A Zampa (AUS),9,9,432,72.0,1,389,21,04-Aug,18.52,5.4,20.57,3,0
1,D Madushanka (SL),9,9,470,78.2,4,525,21,May-80,25.0,6.7,22.38,1,1
2,G Coetzee (SA),7,7,327,54.3,1,349,18,Apr-44,19.38,6.4,18.16,1,0
3,M Jansen (SA),8,8,388,64.4,3,415,17,Mar-31,24.41,6.41,22.82,0,0
4,Mohammed Shami (IND),4,4,156,26.0,3,112,16,May-18,7.0,4.3,9.75,1,2


We will separate the team name from the column and create a new column for that.

In [None]:
df['Team'] = df['Player'].str.extract(r'\((.*?)\)')
df['Player'] = df['Player'].str.replace(r' \((.*?)\)', '')
df.drop(columns=['BBI'], inplace=True)
df.head()

Unnamed: 0,Player,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,Ave,Econ,SR,4,5,Team
0,A Zampa,9,9,432,72.0,1,389,21,18.52,5.4,20.57,3,0,AUS
1,D Madushanka,9,9,470,78.2,4,525,21,25.0,6.7,22.38,1,1,SL
2,G Coetzee,7,7,327,54.3,1,349,18,19.38,6.4,18.16,1,0,SA
3,M Jansen,8,8,388,64.4,3,415,17,24.41,6.41,22.82,0,0,SA
4,Mohammed Shami,4,4,156,26.0,3,112,16,7.0,4.3,9.75,1,2,IND


In [None]:
df.dtypes

Player     object
Mat         int64
Inns        int64
Balls       int64
Overs     float64
Mdns        int64
Runs        int64
Wkts        int64
Ave       float64
Econ      float64
SR        float64
4           int64
5           int64
Team       object
dtype: object

We will perform label encoding on Player and Team columns.

In [None]:
df_encoded = df.copy()
le1_player = LabelEncoder()
le1_team = LabelEncoder()
df_encoded['Player'] = le1_player.fit_transform(df_encoded['Player'])
df_encoded['Team'] = le1_team.fit_transform(df_encoded['Team'])
df_encoded.head()

Unnamed: 0,Player,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,Ave,Econ,SR,4,5,Team
0,1,9,9,432,72.0,1,389,21,18.52,5.4,20.57,3,0,1
1,11,9,9,470,78.2,4,525,21,25.0,6.7,22.38,1,1,9
2,13,7,7,327,54.3,1,349,18,19.38,6.4,18.16,1,0,8
3,29,8,8,388,64.4,3,415,17,24.41,6.41,22.82,0,0,8
4,41,4,4,156,26.0,3,112,16,7.0,4.3,9.75,1,2,4


Since the bowlers who have played the most number of matches are most probable to play the final, we have used the number of matches as the target variable for our model. Also, since it is non-categorical, we will perform regression on the dataset.

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(df_encoded.drop(columns=['Mat']))
y = pd.get_dummies(df_encoded['Mat'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Dense(64, input_dim=X.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

model.fit(X_train, y_train, verbose=False, epochs=50, batch_size=32, validation_data=(X_test, y_test))

loss, mae = model.evaluate(X_test, y_test)
print(f"Model Mean Absolute Error: {mae}")

Model Mean Absolute Error: 0.19764822721481323


We will predict the bowlers who are most probable to play the final by predicting the number of matches played and sorting the players of that team according to that.

In [None]:
encoded_input_team = le1_team.transform([input_team])
team_mask = df_encoded['Team'] == encoded_input_team[0]

predicted_matches = model.predict(X)
df_encoded['PredictedMatches'] = predicted_matches
team_df = df_encoded[team_mask]

top_bowlers = team_df.sort_values(by='PredictedMatches', ascending=False).head(6)
top_bowlers['Player'] = le1_player.inverse_transform(top_bowlers['Player'])
top_bowlers_list = top_bowlers['Player'].tolist()

print("Top 6 Bowlers for", input_team)
for batter in top_bowlers_list:
  print(batter)

Top 6 Bowlers for IND
JJ Bumrah
Kuldeep Yadav
Mohammed Shami
RA Jadeja
Mohammed Siraj


## **Final playing 11**

We will combine the batsmen and the bowlers to predict the final playing 11.

In [None]:
common_players = set(top_batters_list) & set(top_bowlers_list)

merged_list = top_batters_list
merged_list.extend(bowler for bowler in top_bowlers_list if bowler not in common_players and len(merged_list) < 11)

print(f"Predicted 11 Players for {input_team}:")
for player in merged_list:
  print(player)

Predicted 11 Players for IND:
V Kohli
KL Rahul
RA Jadeja
SA Yadav
RG Sharma
Shubman Gill
SS Iyer
JJ Bumrah
Kuldeep Yadav
Mohammed Shami
Mohammed Siraj
