In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import warnings
from xgboost import XGBClassifier

In [2]:
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_pickle('./T20WinProb_Data_cached_df')
df.head(1)

Unnamed: 0,team_batting,team_bowling,city,runs_left,ball_left,wicket_left,target,crr,rrr,Result
125,Bangladesh,New Zealand,Mount Maunganui,193,119,10,194,6.0,9.73,0


## # result 0 means bowling team wins and 1 means batting team wins

In [4]:
x = df.iloc[:,:-1]
y = df.iloc[:, -1]

In [5]:
x1,x_test, y1,y_test = train_test_split(x,y,test_size=0.2,random_state=1)
x_train, x_val, y_train, y_val =  train_test_split(x1,y1,test_size=0.2,random_state=2)

In [6]:
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

(86444, 9)
(21612, 9)
(27015, 9)
(86444,)
(21612,)
(27015,)


In [None]:
trf = ColumnTransformer([
    ('trf', OneHotEncoder(sparse=False, drop='first'),['team_batting','team_bowling','city'])
], remainder='passthrough')

In [None]:
lr_pipe = Pipeline(steps=[
    ('step1', trf),
    ('scl', StandardScaler()),
    ('step2', LogisticRegression(solver='liblinear'))
])

rf_pipe = Pipeline(steps=[
    ('step1', trf), 
    ('rf', RandomForestClassifier())
])

xgb_pipe = Pipeline(steps=[
    ('step1', trf),
    ('scl', StandardScaler()),
    ('xgb', XGBClassifier(n_estimators=1000,max_depth=12,learning_rate =0.2,random_state=10))
])

In [None]:
lr_pipe.fit(x_train, y_train)
rf_pipe.fit(x_train, y_train)
xgb_pipe.fit(x_train, y_train)

In [None]:
lry_pred_test = lr_pipe.predict(x_test)
print(f"Linear Model: {accuracy_score(lry_pred_test, y_test)}")

In [None]:
rfy_pred_test = rf_pipe.predict(x_test)
print(f"Random Forest Model: {accuracy_score(rfy_pred_test, y_test)}")

In [None]:
xgby_pred_test = xgb_pipe.predict(x_test)
print(f"XGB Reg Model: {accuracy_score(xgby_pred_test, y_test)}")

In [None]:
lrscore = cross_val_score(lr_pipe, x_val,y_val,cv=5)
print(f"Linear M0del: {lrscore}")

rfscore = cross_val_score(rf_pipe, x_val,y_val,cv=5)
print(f"Random Forest M0del: {rfscore}")

xgbscore = cross_val_score(xgb_pipe, x_val,y_val,cv=5)
print(f"XGB Regressor M0del: {xgbscore}")

In [None]:
x1 = x_test.head(1)
x1

## # result 0 means bowling team wins and 1 means batting team wins

In [None]:
y_test.head(1)

In [None]:
print(xgb_pipe.predict(x1))
print(xgb_pipe.predict_proba(x1))

In [None]:
print(rf_pipe.predict(x1))
print(rf_pipe.predict_proba(x1))

In [None]:
print(lr_pipe.predict(x1))
print(lr_pipe.predict_proba(x1))

In [None]:
import pickle

In [None]:
with open("./Models/IPL_WinProb_ModelPred.pkl", "wb") as f:
    pickle.dump(xgb_pipe,f)

In [None]:
model = pickle.load(open('./Models/IPL_WinProb_ModelPred.pkl', 'rb'))

In [None]:
print(model.predict(x1))
print(model.predict_proba(x1))