Dataset Used : https://www.kaggle.com/datasets/patrickb1912/ipl-complete-dataset-20082020?select=matches.csv

### **Loading the Data**

In [1]:
import pandas as pd

In [50]:
data = pd.read_csv('matches.csv')
data.head()

Unnamed: 0,id,season,city,date,match_type,player_of_match,venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,2007/08,Bangalore,18-04-2008,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bengaluru,Kolkata Knight Riders,Royal Challengers Bengaluru,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335983,2007/08,Chandigarh,19-04-2008,League,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",Punjab Kings,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,241.0,20.0,N,,MR Benson,SL Shastri
2,335984,2007/08,Delhi,19-04-2008,League,MF Maharoof,Feroz Shah Kotla,Delhi Capitals,Rajasthan Royals,Rajasthan Royals,bat,Delhi Capitals,wickets,9.0,130.0,20.0,N,,Aleem Dar,GA Pratapkumar
3,335985,2007/08,Mumbai,20-04-2008,League,MV Boucher,Wankhede Stadium,Mumbai Indians,Royal Challengers Bengaluru,Mumbai Indians,bat,Royal Challengers Bengaluru,wickets,5.0,166.0,20.0,N,,SJ Davis,DJ Harper
4,335986,2007/08,Kolkata,20-04-2008,League,DJ Hussey,Eden Gardens,Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,bat,Kolkata Knight Riders,wickets,5.0,111.0,20.0,N,,BF Bowden,K Hariharan


### **Data Pre Processing**

In [40]:
for col in data.columns:
    if col not in ["id", "season", "city", "date", "player_of_match", "venue", "result_margin", "target_runs", "target_overs", "umpire1", "umpire2"]:
        print("Unique Values in :",col)
        print(len(data[col].unique()),":" ,data[col].unique())

Unique Values in : match_type
5 : ['League' 'Qualifier 1' 'Qualifier 2' 'Final' 'Eliminator']
Unique Values in : team1
12 : ['Royal Challengers Bengaluru' 'Punjab Kings' 'Delhi Capitals'
 'Mumbai Indians' 'Kolkata Knight Riders' 'Rajasthan Royals'
 'Sunrisers Hyderabad' 'Chennai Super Kings' 'Kochi Tuskers Kerala'
 'Pune Warriors' 'Gujarat Titans' 'Lucknow Super Giants']
Unique Values in : team2
12 : ['Kolkata Knight Riders' 'Chennai Super Kings' 'Rajasthan Royals'
 'Royal Challengers Bengaluru' 'Sunrisers Hyderabad' 'Punjab Kings'
 'Delhi Capitals' 'Mumbai Indians' 'Kochi Tuskers Kerala' 'Pune Warriors'
 'Gujarat Titans' 'Lucknow Super Giants']
Unique Values in : toss_winner
12 : ['Royal Challengers Bengaluru' 'Chennai Super Kings' 'Rajasthan Royals'
 'Mumbai Indians' 'Sunrisers Hyderabad' 'Punjab Kings'
 'Kolkata Knight Riders' 'Delhi Capitals' 'Kochi Tuskers Kerala'
 'Pune Warriors' 'Gujarat Titans' 'Lucknow Super Giants']
Unique Values in : toss_decision
2 : ['field' 'bat']
Unique 

In [51]:
data = data.drop(["id", "date", "player_of_match", "result_margin","target_runs", "target_overs", "method", "result","umpire1", "umpire2"], axis=1)
data.head()

Unnamed: 0,season,city,match_type,venue,team1,team2,toss_winner,toss_decision,winner,super_over
0,2007/08,Bangalore,League,M Chinnaswamy Stadium,Royal Challengers Bengaluru,Kolkata Knight Riders,Royal Challengers Bengaluru,field,Kolkata Knight Riders,N
1,2007/08,Chandigarh,League,"Punjab Cricket Association Stadium, Mohali",Punjab Kings,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,N
2,2007/08,Delhi,League,Feroz Shah Kotla,Delhi Capitals,Rajasthan Royals,Rajasthan Royals,bat,Delhi Capitals,N
3,2007/08,Mumbai,League,Wankhede Stadium,Mumbai Indians,Royal Challengers Bengaluru,Mumbai Indians,bat,Royal Challengers Bengaluru,N
4,2007/08,Kolkata,League,Eden Gardens,Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,bat,Kolkata Knight Riders,N


In [52]:
len(data)

1095

In [53]:
data.to_csv("dataset.csv")

In [105]:
data = pd.read_csv('dataset.csv')

In [106]:
from sklearn.preprocessing import LabelEncoder

le_team = LabelEncoder()
le_city = LabelEncoder()
le_venue = LabelEncoder()
le_match_type = LabelEncoder()

In [107]:
data['team1'] = le_team.fit_transform(data['team1'])
data['team2'] = le_team.transform(data['team2'])
data['city'] = le_city.fit_transform(data['city'])
data['venue'] = le_venue.fit_transform(data['venue'])
data['match_type'] = le_match_type.fit_transform(data['match_type'])

In [108]:
data['toss_winner'] = (data['toss_winner'] == data['team2']).astype(int)  # 0 for team1, 1 for team2
data['toss_decision'] = (data['toss_decision'] == 'field').astype(int)    # 0 for bat, 1 for field
data['winner'] = (data['winner'] == data['team2']).astype(int)            # 0 for team1, 1 for team2
data['super_over'] = (data['super_over'] == 'Y').astype(int)              # 0 for N, 1 for Y

In [109]:
import tensorflow as tf

In [110]:
X = data[['team1', 'team2', 'city', 'venue', 'match_type']]
y_toss_winner = tf.keras.utils.to_categorical(data['toss_winner'])
y_toss_decision = data['toss_decision']
y_winner = tf.keras.utils.to_categorical(data['winner'])
y_super_over = data['super_over']

In [111]:
y_toss_winner

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]])

In [112]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_tw_train, y_tw_test, y_td_train, y_td_test, y_w_train, y_w_test, y_so_train, y_so_test = train_test_split(
    X, y_toss_winner, y_toss_decision, y_winner, y_super_over,
    test_size=0.1,
    shuffle=False
)

In [113]:
inputs = tf.keras.Input(shape=(5,))
layer1 = tf.keras.layers.Dense(128, activation='relu')(inputs)
dropout1 = tf.keras.layers.Dropout(0.2)(layer1)
batchnorm1 = tf.keras.layers.BatchNormalization()(dropout1)
layer2 = tf.keras.layers.Dense(64, activation='relu')(batchnorm1)

In [114]:
toss_winner = tf.keras.layers.Dense(2, activation='softmax', name='toss_winner')(layer2)
toss_decision = tf.keras.layers.Dense(1, activation='sigmoid', name='toss_decision')(layer2)
match_winner = tf.keras.layers.Dense(2, activation='softmax', name='match_winner')(layer2)
super_over_prob = tf.keras.layers.Dense(1, activation='sigmoid', name='super_over_prob')(layer2)

In [115]:
model = tf.keras.Model(inputs=inputs, outputs=[toss_winner, toss_decision, match_winner, super_over_prob])

In [116]:
model.compile(
    optimizer='adam',
    loss = {
        'toss_winner': 'categorical_crossentropy',
        'toss_decision': 'binary_crossentropy',
        'match_winner': 'categorical_crossentropy',
        'super_over_prob': 'binary_crossentropy'
    },
    loss_weights={'toss_winner': 0.25, 'toss_decision': 0.25, 'match_winner': 0.35, 'super_over_prob': 0.15},
    metrics={
        'toss_winner': 'accuracy',
        'toss_decision': ['accuracy', tf.keras.metrics.AUC()],
        'match_winner': 'accuracy',
        'super_over_prob': ['accuracy', tf.keras.metrics.AUC()]
    }
)

In [117]:
model.summary()

In [118]:
from keras.utils import plot_model


plot_model(model)

You must install graphviz (see instructions at https://graphviz.gitlab.io/download/) for `plot_model` to work.


In [119]:
history = model.fit(
    X_train, 
    {
        'toss_winner': y_tw_train, 
        'toss_decision': y_td_train,  # Corrected to y_td_train
        'match_winner': y_w_train, 
        'super_over_prob': y_so_train
    },
    validation_split=0.1,
    epochs=20,
    batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)]
)

Epoch 1/20


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 1), output.shape=(None, 2)