In [184]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

In [185]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from xgboost import XGBRegressor

In [186]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [187]:
df = pd.read_csv('Featured_Matches_with_Deliveries.csv')

In [188]:
df.head(10)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,score,team1,Neutral,is_home_team,final_innings_score,batter_code,bowler_code,non_striker_code,batting_team_code,bowling_team_code
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,1,Royal Challengers Bangalore,False,False,222,1,14,2,1,2
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,1,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,2,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,2,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,2,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
5,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,6,BB McCullum,P Kumar,SC Ganguly,0,...,2,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
6,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,7,BB McCullum,P Kumar,SC Ganguly,0,...,3,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
7,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,1,BB McCullum,Z Khan,SC Ganguly,0,...,3,Royal Challengers Bangalore,False,False,222,2,15,1,1,2
8,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,2,BB McCullum,Z Khan,SC Ganguly,4,...,7,Royal Challengers Bangalore,False,False,222,2,15,1,1,2
9,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,3,BB McCullum,Z Khan,SC Ganguly,4,...,11,Royal Challengers Bangalore,False,False,222,2,15,1,1,2


In [189]:
print(df.columns)

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'score', 'team1', 'Neutral', 'is_home_team',
       'final_innings_score', 'batter_code', 'bowler_code', 'non_striker_code',
       'batting_team_code', 'bowling_team_code'],
      dtype='object')


In [190]:
df.drop(['match_id','team1','total_runs','batsman_runs', 'extra_runs',
         'extras_type', 'is_wicket', 'player_dismissed','dismissal_kind',
         'batter', 'bowler', 'non_striker','batting_team', 'bowling_team'],axis=1, inplace=True)

In [191]:
df.head(5)

Unnamed: 0,inning,over,ball,score,Neutral,is_home_team,final_innings_score,batter_code,bowler_code,non_striker_code,batting_team_code,bowling_team_code
0,1,0,1,1,False,False,222,1,14,2,1,2
1,1,0,2,1,False,False,222,2,14,1,1,2
2,1,0,3,2,False,False,222,2,14,1,1,2
3,1,0,4,2,False,False,222,2,14,1,1,2
4,1,0,5,2,False,False,222,2,14,1,1,2


In [192]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(['final_innings_score'], axis=1), df['final_innings_score'], test_size=0.2, random_state=42)

In [193]:
def evaluate(model, X_test, y_test, name="Model"):
    y_pred = model.predict(X_test)
    print(f"--- {name} ---")
    print("MAE:",mean_absolute_error(y_test, y_pred))
    print("RMSE:",np.sqrt(mean_squared_error(y_test, y_pred)))
    print("R2 Score:",r2_score(y_test, y_pred))
    print()

In [None]:
modelLR = LinearRegression()
modelLR.fit(X_train, y_train)
evaluate(modelLR, X_test, y_test, name="Linear Regression")

--- Linear Regression ---
MAE: 17.224598580740366
RMSE: 23.25698731362935
R2 Score: 0.44797165502669256



In [196]:
modelDTR = DecisionTreeRegressor(random_state=42)
modelDTR.fit(X_train, y_train)
evaluate(modelDTR, X_test, y_test, name="Decision Tree Regressor")

--- Decision Tree Regressor ---
MAE: 5.2242241199750055
RMSE: 14.027496282213406
R2 Score: 0.7991763803258802



In [197]:
modelRFR = RandomForestRegressor()
modelRFR.fit(X_train, y_train)
evaluate(modelRFR, X_test, y_test, name="Random Forest Regressor") 

--- Random Forest Regressor ---
MAE: 6.290171123079518
RMSE: 9.853738200289364
R2 Score: 0.9009039131813164



In [198]:
modelETR = ExtraTreesRegressor(n_estimators=100, random_state=42)
modelETR.fit(X_train, y_train)
evaluate(modelETR, X_test, y_test, name="Extra Tree Regressor")

--- Extra Tree Regressor ---
MAE: 4.462831007429008
RMSE: 7.83681816054724
R2 Score: 0.9373192757731084



In [199]:
modelXGB= XGBRegressor(n_estimators=100, random_state=42)
modelXGB.fit(X_train, y_train)
evaluate(modelXGB, X_test, y_test, name="XGB Regressor")

--- XGB Regressor ---
MAE: 13.836425934223254
RMSE: 18.650234832996816
R2 Score: 0.6450043141988795



In [200]:
custom_input = pd.DataFrame([{
    'inning': 2,
    'over': 9,
    'ball': 3,
    'score': 100,
    'Neutral': False,
    'is_home_team': True,
    'batter_code': 134,
    'bowler_code': 229,
    'non_striker_code': 99,
    'batting_team_code': 3,
    'bowling_team_code': 5
}])

In [201]:
y_pred = modelETR.predict(custom_input)

In [202]:
print("Predicted Final Score for this innings will be",int(y_pred))

Predicted Final Score for this innings will be 168
