# IPL Score Predictive Model

In [134]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_absolute_error, mean_squared_error

In [135]:
df=pd.read_csv("https://raw.githubusercontent.com/shsarv/Machine-Learning-Projects/refs/heads/main/IPL%20Score%20Prediction/dataset/ipl.csv")

In [136]:
df.head()

Unnamed: 0,mid,date,venue,bat_team,bowl_team,batsman,bowler,runs,wickets,overs,runs_last_5,wickets_last_5,striker,non-striker,total
0,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,SC Ganguly,P Kumar,1,0,0.1,1,0,0,0,222
1,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,1,0,0.2,1,0,0,0,222
2,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.2,2,0,0,0,222
3,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.3,2,0,0,0,222
4,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.4,2,0,0,0,222


In [137]:
df.isnull().sum()

mid               0
date              0
venue             0
bat_team          0
bowl_team         0
batsman           0
bowler            0
runs              0
wickets           0
overs             0
runs_last_5       0
wickets_last_5    0
striker           0
non-striker       0
total             0
dtype: int64

In [138]:
df=df.drop(['mid','venue','batsman','bowler','striker','non-striker'], axis=1)

In [139]:
df.shape

(76014, 9)

In [140]:
# Teams which were consistently playing in all the IPLs
consistent_teams = [
    'Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',
    'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers Bangalore',
    'Delhi Daredevils', 'Sunrisers Hyderabad'
]

In [141]:
df=df[(df['bat_team'].isin(consistent_teams))& (df['bowl_team'].isin(consistent_teams))]

In [142]:
df.shape

(53811, 9)

In [143]:
df.head()

Unnamed: 0,date,bat_team,bowl_team,runs,wickets,overs,runs_last_5,wickets_last_5,total
0,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,1,0,0.1,1,0,222
1,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,1,0,0.2,1,0,222
2,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,2,0,0.2,2,0,222
3,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,2,0,0.3,2,0,222
4,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,2,0,0.4,2,0,222


In [144]:
# Runs in first 5 overs are unstable so remove them
df=df[df['overs']>=5.0]

In [145]:
df.shape

(40108, 9)

In [146]:
df['date']=pd.to_datetime(df['date'], format='%Y-%m-%d')

In [147]:
# One-Hot Encoding
df=pd.get_dummies(df, columns=['bat_team', 'bowl_team'])

In [115]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 40108 entries, 32 to 75888
Data columns (total 23 columns):
 #   Column                                 Non-Null Count  Dtype         
---  ------                                 --------------  -----         
 0   date                                   40108 non-null  datetime64[ns]
 1   runs                                   40108 non-null  int64         
 2   wickets                                40108 non-null  int64         
 3   overs                                  40108 non-null  float64       
 4   runs_last_5                            40108 non-null  int64         
 5   wickets_last_5                         40108 non-null  int64         
 6   total                                  40108 non-null  int64         
 7   bat_team_Chennai Super Kings           40108 non-null  bool          
 8   bat_team_Delhi Daredevils              40108 non-null  bool          
 9   bat_team_Kings XI Punjab               40108 non-null  bool      

In [148]:
# Rearrange the rows
df = df[['date','bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils', 'bat_team_Kings XI Punjab',
                         'bat_team_Kolkata Knight Riders', 'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',
                         'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',
                         'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils', 'bowl_team_Kings XI Punjab',
                         'bowl_team_Kolkata Knight Riders', 'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',
                         'bowl_team_Royal Challengers Bangalore', 'bowl_team_Sunrisers Hyderabad',
                         'overs', 'runs', 'wickets', 'runs_last_5', 'wickets_last_5', 'total']]

In [149]:
# Splitting data into training and testing
train_df= df[df['date'].dt.year<=2016]
test_df= df[df['date'].dt.year>=2017]
X_train= train_df.drop(['date','total'], axis=1)
y_train= train_df['total']
X_test= test_df.drop(['date','total'],axis=1)
y_test= test_df['total']

In [118]:
X_train.shape

(37330, 21)

In [119]:
X_test.shape

(2778, 21)

In [120]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor

In [150]:
# Testing all the models
model= {"LR": LinearRegression(), "DT": DecisionTreeRegressor(), "RF": RandomForestRegressor(), "AB": AdaBoostRegressor()}
for name, models in model.items():
    models.fit(X_train, y_train)
    y_pred= models.predict(X_test)
    print(f"{name} MAE: ", mean_absolute_error(y_test, y_pred))
    print(f"{name} RMSE: ", np.sqrt(mean_squared_error(y_test, y_pred)))

LR MAE:  12.118617546193288
LR RMSE:  15.843229566732099
DT MAE:  17.21418286537077
DT RMSE:  23.13500770938845
RF MAE:  13.707950623950087
RF RMSE:  18.16518688014133
AB MAE:  17.227279924118783
AB RMSE:  21.59008756860185


#### Since MAE and RMSE of linear regression is smallest among all the models. Therefore we'll use linear regression model.

In [151]:
# Prediction Function
def predict_score(model, batting_team, bowling_team, overs, runs, wickets, runs_in_prev_5, wickets_in_prev_5):
    teams= ['Chennai Super Kings', 'Delhi Daredevils', 'Kings XI Punjab', 'Kolkata Knight Riders', 'Mumbai Indians', 'Rajasthan Royals', 'Royal Challengers Bangalore', 'Sunrisers Hyderabad']
    temp= [0]*16
    temp[teams.index(batting_team)]+=1
    temp[8+teams.index(bowling_team)]+=1
    temp+=[overs, runs, wickets, runs_in_prev_5, wickets_in_prev_5]
    return int(model.predict([temp])[0])

### Prediction 1
##### Match: Sunrisers Hyderabad ðŸ†š Delhi Daredevils
##### Over: 12.4
##### Current score: 98/3
##### Last 5 overs: 40 runs, 2 wickets

In [124]:
final_score = predict_score(model["LR"], 'Sunrisers Hyderabad', 'Delhi Daredevils', overs=12.4, runs=98, wickets=3, runs_in_prev_5=40, wickets_in_prev_5=2
)
print(f"Predicted final score: {final_score-10} to {final_score+5}")

Predicted final score: 154 to 169




### Prediction 2
##### Match: Rajasthan Royals ðŸ†š Kings XI Punjab
##### Over: 18.2
##### Current score: 189/4
##### Last 5 overs: 36 runs, 1 wickets

In [126]:
final_score = predict_score(model['LR'], 'Rajasthan Royals', 'Kings XI Punjab', overs=18.2, runs=189, wickets=4, runs_in_prev_5=36, wickets_in_prev_5=1)
print(f"Predicted final score: {final_score-10} to {final_score+5}")

Predicted final score: 198 to 213




### Prediction 3
##### Match: Mumbai Indians ðŸ†š Sunrisers Hyderabad
##### Over: 12.3
##### Current score: 147/2
##### Last 5 overs: 29 runs, 0 wickets

In [128]:
final_score = predict_score(model["LR"], 'Mumbai Indians', 'Sunrisers Hyderabad', overs=12.3, runs=147, wickets=2, runs_in_prev_5=29, wickets_in_prev_5=0)
print(f"Predicted final score: {final_score-10} to {final_score+5}")

Predicted final score: 211 to 226


