## First Innings Score Prediction

In [6]:
# Importing essential libraries
import pandas as pd
import pickle

# Loading the dataset
df = pd.read_csv('ipl.csv')

In [7]:
df.head()

Unnamed: 0,mid,date,venue,bat_team,bowl_team,batsman,bowler,runs,wickets,overs,runs_last_5,wickets_last_5,striker,non-striker,total
0,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,SC Ganguly,P Kumar,1,0,0.1,1,0,0,0,222
1,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,1,0,0.2,1,0,0,0,222
2,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.2,2,0,0,0,222
3,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.3,2,0,0,0,222
4,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.4,2,0,0,0,222


In [8]:
# --- Data Cleaning ---
# Removing unwanted columns
columns_to_remove = ['mid', 'venue', 'batsman', 'bowler', 'striker', 'non-striker']
df.drop(labels=columns_to_remove, axis=1, inplace=True)

In [9]:
df.head()

Unnamed: 0,date,bat_team,bowl_team,runs,wickets,overs,runs_last_5,wickets_last_5,total
0,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,1,0,0.1,1,0,222
1,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,1,0,0.2,1,0,222
2,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,2,0,0.2,2,0,222
3,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,2,0,0.3,2,0,222
4,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,2,0,0.4,2,0,222


In [10]:
df['bat_team'].unique()

array(['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',
       'Mumbai Indians', 'Deccan Chargers', 'Kings XI Punjab',
       'Royal Challengers Bangalore', 'Delhi Daredevils',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',
       'Rising Pune Supergiants', 'Gujarat Lions',
       'Rising Pune Supergiant'], dtype=object)

In [11]:
# Keeping only consistent teams
consistent_teams = ['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',
                    'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers Bangalore',
                    'Delhi Daredevils', 'Sunrisers Hyderabad']

In [12]:
df = df[(df['bat_team'].isin(consistent_teams)) & (df['bowl_team'].isin(consistent_teams))]

In [13]:
# Removing the first 5 overs data in every match
df = df[df['overs']>=5.0]

In [14]:
df.head()

Unnamed: 0,date,bat_team,bowl_team,runs,wickets,overs,runs_last_5,wickets_last_5,total
32,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,61,0,5.1,59,0,222
33,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,61,1,5.2,59,1,222
34,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,61,1,5.3,59,1,222
35,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,61,1,5.4,59,1,222
36,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,61,1,5.5,58,1,222


In [15]:
print(df['bat_team'].unique())
print(df['bowl_team'].unique())

['Kolkata Knight Riders' 'Chennai Super Kings' 'Rajasthan Royals'
 'Mumbai Indians' 'Kings XI Punjab' 'Royal Challengers Bangalore'
 'Delhi Daredevils' 'Sunrisers Hyderabad']
['Royal Challengers Bangalore' 'Kings XI Punjab' 'Delhi Daredevils'
 'Rajasthan Royals' 'Mumbai Indians' 'Chennai Super Kings'
 'Kolkata Knight Riders' 'Sunrisers Hyderabad']


In [16]:
# Converting the column 'date' from string into datetime object
from datetime import datetime
df['date'] = df['date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))

In [17]:
# --- Data Preprocessing ---
# Converting categorical features using OneHotEncoding method
encoded_df = pd.get_dummies(data=df, columns=['bat_team', 'bowl_team'])

In [18]:
encoded_df.head()

Unnamed: 0,date,runs,wickets,overs,runs_last_5,wickets_last_5,total,bat_team_Chennai Super Kings,bat_team_Delhi Daredevils,bat_team_Kings XI Punjab,...,bat_team_Royal Challengers Bangalore,bat_team_Sunrisers Hyderabad,bowl_team_Chennai Super Kings,bowl_team_Delhi Daredevils,bowl_team_Kings XI Punjab,bowl_team_Kolkata Knight Riders,bowl_team_Mumbai Indians,bowl_team_Rajasthan Royals,bowl_team_Royal Challengers Bangalore,bowl_team_Sunrisers Hyderabad
32,2008-04-18,61,0,5.1,59,0,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0
33,2008-04-18,61,1,5.2,59,1,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0
34,2008-04-18,61,1,5.3,59,1,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0
35,2008-04-18,61,1,5.4,59,1,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0
36,2008-04-18,61,1,5.5,58,1,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [19]:
encoded_df.head()

Unnamed: 0,date,runs,wickets,overs,runs_last_5,wickets_last_5,total,bat_team_Chennai Super Kings,bat_team_Delhi Daredevils,bat_team_Kings XI Punjab,...,bat_team_Royal Challengers Bangalore,bat_team_Sunrisers Hyderabad,bowl_team_Chennai Super Kings,bowl_team_Delhi Daredevils,bowl_team_Kings XI Punjab,bowl_team_Kolkata Knight Riders,bowl_team_Mumbai Indians,bowl_team_Rajasthan Royals,bowl_team_Royal Challengers Bangalore,bowl_team_Sunrisers Hyderabad
32,2008-04-18,61,0,5.1,59,0,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0
33,2008-04-18,61,1,5.2,59,1,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0
34,2008-04-18,61,1,5.3,59,1,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0
35,2008-04-18,61,1,5.4,59,1,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0
36,2008-04-18,61,1,5.5,58,1,222,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [20]:
encoded_df.columns

Index(['date', 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5',
       'total', 'bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils',
       'bat_team_Kings XI Punjab', 'bat_team_Kolkata Knight Riders',
       'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',
       'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',
       'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils',
       'bowl_team_Kings XI Punjab', 'bowl_team_Kolkata Knight Riders',
       'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',
       'bowl_team_Royal Challengers Bangalore',
       'bowl_team_Sunrisers Hyderabad'],
      dtype='object')

In [21]:
# Rearranging the columns
encoded_df = encoded_df[['date', 'bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils', 'bat_team_Kings XI Punjab',
              'bat_team_Kolkata Knight Riders', 'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',
              'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',
              'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils', 'bowl_team_Kings XI Punjab',
              'bowl_team_Kolkata Knight Riders', 'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',
              'bowl_team_Royal Challengers Bangalore', 'bowl_team_Sunrisers Hyderabad',
              'overs', 'runs', 'wickets', 'runs_last_5', 'wickets_last_5', 'total']]

In [22]:
# Splitting the data into train and test set
X_train = encoded_df.drop(labels='total', axis=1)[encoded_df['date'].dt.year <= 2016]
X_test = encoded_df.drop(labels='total', axis=1)[encoded_df['date'].dt.year >= 2017]

In [23]:
y_train = encoded_df[encoded_df['date'].dt.year <= 2016]['total'].values
y_test = encoded_df[encoded_df['date'].dt.year >= 2017]['total'].values

In [24]:
# Removing the 'date' column
X_train.drop(labels='date', axis=True, inplace=True)
X_test.drop(labels='date', axis=True, inplace=True)

In [25]:
# --- Model Building ---
# Linear Regression Model
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [26]:
# Creating a pickle file for the classifier
filename = 'first-innings-score-lr-model.pkl'
pickle.dump(regressor, open(filename, 'wb'))

In [39]:
#importing ml algo's
from sklearn.linear_model  import Ridge,Lasso,RidgeCV, LassoCV, ElasticNet, ElasticNetCV, LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.linear_model import BayesianRidge
from lightgbm import LGBMRegressor

In [42]:
Reg=[LinearRegression(),Ridge(),Lasso(),RidgeCV(),LassoCV(),ElasticNet(),ElasticNetCV(),DecisionTreeRegressor(),RandomForestRegressor(),XGBRegressor(),LGBMRegressor(),KNeighborsRegressor(),SVR(),BayesianRidge()]

In [43]:
#checking scores while training with different ml algo's 
for i in Reg:
    model=i
    model.fit(X_train,y_train)
    #y_pred_train=model.predict(xa_train)
    #y_pred_test = model.predict(xa_test)
    print('Training Score for *****************' ,i ,'model is      =   ',end="")
    print(model.score(X_train,y_train))
    print('Test Score for ************************' ,i ,'model is      =  ',end="")
    print(model.score(X_test,y_test))
    print()
    print()
    print()
    print()

Training Score for ***************** LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False) model is      =   0.6502132284850832
Test Score for ************************ LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False) model is      =  0.7522274447953539




Training Score for ***************** Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001) model is      =   0.6502133807548146
Test Score for ************************ Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001) model is      =  0.7522627797893786




Training Score for ***************** Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False) model is      =   0.638547675966

                    weights='uniform') model is      =   0.8843083153618684
Test Score for ************************ KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                    metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                    weights='uniform') model is      =  0.6047516212009467




Training Score for ***************** SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) model is      =   0.5811585311962435
Test Score for ************************ SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) model is      =  0.643444370462407




Training Score for ***************** BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, alpha_init=None,
              compute_score=False, copy_X=True, fit_intercept=True,
              lambda_1=1e-06, lambda_2=1e-0

In [60]:
#as we saw above in xgboost algo we getting better score than other model so we should now do hypertuning over it.
from sklearn.model_selection import GridSearchCV
param_grid8={
   
    'learning_rate':[0.01,0.1,1],
    'n_estimators':[300],
    'max_depth': [1,2,3,4],
    'min_child_samples':range(10,30)
    }

grid= GridSearchCV(LGBMRegressor(),param_grid8, verbose=3)

grid.fit(X_train,y_train)

Fitting 5 folds for each of 240 candidates, totalling 1200 fits
[CV] learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300, score=0.402, total=   0.4s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300, score=0.379, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.6s remaining:    0.0s


[CV]  learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300, score=0.415, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300, score=0.351, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=10, n_estimators=300, score=0.386, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=11, n_estimators=300, score=0.402, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=11, n_estimators=300, score=0.379, total=   0.4s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=11, n_estimators=300, s

[CV]  learning_rate=0.01, max_depth=1, min_child_samples=19, n_estimators=300, score=0.415, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=19, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=19, n_estimators=300, score=0.351, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=19, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=19, n_estimators=300, score=0.386, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=20, n_estimators=300, score=0.402, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=20, n_estimators=300, score=0.379, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=20, n_estimators=300, s

[CV]  learning_rate=0.01, max_depth=1, min_child_samples=28, n_estimators=300, score=0.415, total=   0.4s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=28, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=28, n_estimators=300, score=0.351, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=28, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=28, n_estimators=300, score=0.386, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=29, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=29, n_estimators=300, score=0.402, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=29, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=29, n_estimators=300, score=0.379, total=   0.3s
[CV] learning_rate=0.01, max_depth=1, min_child_samples=29, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=1, min_child_samples=29, n_estimators=300, s

[CV]  learning_rate=0.01, max_depth=2, min_child_samples=17, n_estimators=300, score=0.534, total=   0.3s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=17, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=17, n_estimators=300, score=0.429, total=   0.3s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=17, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=17, n_estimators=300, score=0.521, total=   0.3s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=18, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=18, n_estimators=300, score=0.542, total=   0.3s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=18, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=18, n_estimators=300, score=0.493, total=   0.5s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=18, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=18, n_estimators=300, s

[CV]  learning_rate=0.01, max_depth=2, min_child_samples=26, n_estimators=300, score=0.534, total=   0.4s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=26, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=26, n_estimators=300, score=0.429, total=   0.4s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=26, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=26, n_estimators=300, score=0.521, total=   0.3s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=27, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=27, n_estimators=300, score=0.542, total=   0.3s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=27, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=27, n_estimators=300, score=0.493, total=   0.4s
[CV] learning_rate=0.01, max_depth=2, min_child_samples=27, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=2, min_child_samples=27, n_estimators=300, s

[CV]  learning_rate=0.01, max_depth=3, min_child_samples=15, n_estimators=300, score=0.588, total=   0.4s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=15, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=15, n_estimators=300, score=0.471, total=   0.4s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=15, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=15, n_estimators=300, score=0.582, total=   0.4s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=16, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=16, n_estimators=300, score=0.609, total=   0.4s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=16, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=16, n_estimators=300, score=0.559, total=   0.4s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=16, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=16, n_estimators=300, s

[CV]  learning_rate=0.01, max_depth=3, min_child_samples=24, n_estimators=300, score=0.589, total=   0.4s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=24, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=24, n_estimators=300, score=0.471, total=   0.5s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=24, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=24, n_estimators=300, score=0.582, total=   0.5s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=25, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=25, n_estimators=300, score=0.609, total=   0.5s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=25, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=25, n_estimators=300, score=0.559, total=   0.5s
[CV] learning_rate=0.01, max_depth=3, min_child_samples=25, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=3, min_child_samples=25, n_estimators=300, s

[CV]  learning_rate=0.01, max_depth=4, min_child_samples=13, n_estimators=300, score=0.621, total=   0.5s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=13, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=13, n_estimators=300, score=0.502, total=   0.5s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=13, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=13, n_estimators=300, score=0.606, total=   0.5s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=14, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=14, n_estimators=300, score=0.639, total=   0.6s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=14, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=14, n_estimators=300, score=0.608, total=   0.6s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=14, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=14, n_estimators=300, s

[CV]  learning_rate=0.01, max_depth=4, min_child_samples=22, n_estimators=300, score=0.620, total=   0.5s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=22, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=22, n_estimators=300, score=0.502, total=   0.6s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=22, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=22, n_estimators=300, score=0.606, total=   0.5s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=23, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=23, n_estimators=300, score=0.640, total=   0.5s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=23, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=23, n_estimators=300, score=0.609, total=   0.5s
[CV] learning_rate=0.01, max_depth=4, min_child_samples=23, n_estimators=300 
[CV]  learning_rate=0.01, max_depth=4, min_child_samples=23, n_estimators=300, s

[CV]  learning_rate=0.1, max_depth=1, min_child_samples=11, n_estimators=300, score=0.591, total=   0.4s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=11, n_estimators=300, score=0.460, total=   0.3s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=11, n_estimators=300, score=0.584, total=   0.3s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=12, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=12, n_estimators=300, score=0.625, total=   0.3s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=12, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=12, n_estimators=300, score=0.585, total=   0.3s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=12, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=12, n_estimators=300, score=0.591,

[CV]  learning_rate=0.1, max_depth=1, min_child_samples=20, n_estimators=300, score=0.460, total=   0.4s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=20, n_estimators=300, score=0.584, total=   0.3s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=21, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=21, n_estimators=300, score=0.625, total=   0.3s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=21, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=21, n_estimators=300, score=0.585, total=   0.3s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=21, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=21, n_estimators=300, score=0.591, total=   0.5s
[CV] learning_rate=0.1, max_depth=1, min_child_samples=21, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=1, min_child_samples=21, n_estimators=300, score=0.460,

[CV]  learning_rate=0.1, max_depth=1, min_child_samples=29, n_estimators=300, score=0.584, total=   0.3s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300, score=0.672, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300, score=0.644, total=   0.5s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300, score=0.643, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300, score=0.480, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=10, n_estimators=300, score=0.633,

[CV]  learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300, score=0.672, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300, score=0.646, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300, score=0.646, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300, score=0.479, total=   0.3s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=19, n_estimators=300, score=0.634, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=20, n_estimators=300, score=0.672,

[CV]  learning_rate=0.1, max_depth=2, min_child_samples=28, n_estimators=300, score=0.645, total=   0.3s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=28, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=28, n_estimators=300, score=0.640, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=28, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=28, n_estimators=300, score=0.483, total=   0.3s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=28, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=28, n_estimators=300, score=0.633, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=29, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=29, n_estimators=300, score=0.673, total=   0.4s
[CV] learning_rate=0.1, max_depth=2, min_child_samples=29, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=2, min_child_samples=29, n_estimators=300, score=0.646,

[CV]  learning_rate=0.1, max_depth=3, min_child_samples=17, n_estimators=300, score=0.629, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=17, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=17, n_estimators=300, score=0.479, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=17, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=17, n_estimators=300, score=0.623, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=18, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=18, n_estimators=300, score=0.664, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=18, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=18, n_estimators=300, score=0.641, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=18, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=18, n_estimators=300, score=0.633,

[CV]  learning_rate=0.1, max_depth=3, min_child_samples=26, n_estimators=300, score=0.479, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=26, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=26, n_estimators=300, score=0.617, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=27, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=27, n_estimators=300, score=0.660, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=27, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=27, n_estimators=300, score=0.642, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=27, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=27, n_estimators=300, score=0.633, total=   0.4s
[CV] learning_rate=0.1, max_depth=3, min_child_samples=27, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=3, min_child_samples=27, n_estimators=300, score=0.483,

[CV]  learning_rate=0.1, max_depth=4, min_child_samples=15, n_estimators=300, score=0.602, total=   0.6s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300, score=0.642, total=   0.5s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300, score=0.635, total=   0.5s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300, score=0.629, total=   0.5s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300, score=0.464, total=   0.5s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=16, n_estimators=300, score=0.592,

[CV]  learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300, score=0.644, total=   0.7s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300, score=0.636, total=   0.7s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300, score=0.626, total=   0.9s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300, score=0.468, total=   0.7s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=25, n_estimators=300, score=0.595, total=   0.5s
[CV] learning_rate=0.1, max_depth=4, min_child_samples=26, n_estimators=300 
[CV]  learning_rate=0.1, max_depth=4, min_child_samples=26, n_estimators=300, score=0.643,

[CV]  learning_rate=1, max_depth=1, min_child_samples=14, n_estimators=300, score=0.662, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=14, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=14, n_estimators=300, score=0.643, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=14, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=14, n_estimators=300, score=0.477, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=14, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=14, n_estimators=300, score=0.626, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=15, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=15, n_estimators=300, score=0.685, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=15, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=15, n_estimators=300, score=0.662, total=   0.3s
[CV] le

[CV]  learning_rate=1, max_depth=1, min_child_samples=23, n_estimators=300, score=0.475, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=23, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=23, n_estimators=300, score=0.625, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=24, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=24, n_estimators=300, score=0.685, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=24, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=24, n_estimators=300, score=0.662, total=   0.4s
[CV] learning_rate=1, max_depth=1, min_child_samples=24, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=24, n_estimators=300, score=0.643, total=   0.3s
[CV] learning_rate=1, max_depth=1, min_child_samples=24, n_estimators=300 
[CV]  learning_rate=1, max_depth=1, min_child_samples=24, n_estimators=300, score=0.475, total=   0.3s
[CV] le

[CV]  learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300, score=0.591, total=   0.3s
[CV] learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300, score=0.591, total=   0.4s
[CV] learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300, score=0.571, total=   0.4s
[CV] learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300, score=0.380, total=   0.4s
[CV] learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=13, n_estimators=300, score=0.534, total=   0.4s
[CV] learning_rate=1, max_depth=2, min_child_samples=14, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=14, n_estimators=300, score=0.593, total=   0.4s
[CV] le

[CV]  learning_rate=1, max_depth=2, min_child_samples=22, n_estimators=300, score=0.575, total=   0.4s
[CV] learning_rate=1, max_depth=2, min_child_samples=22, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=22, n_estimators=300, score=0.413, total=   0.3s
[CV] learning_rate=1, max_depth=2, min_child_samples=22, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=22, n_estimators=300, score=0.558, total=   0.4s
[CV] learning_rate=1, max_depth=2, min_child_samples=23, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=23, n_estimators=300, score=0.578, total=   0.3s
[CV] learning_rate=1, max_depth=2, min_child_samples=23, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=23, n_estimators=300, score=0.583, total=   0.4s
[CV] learning_rate=1, max_depth=2, min_child_samples=23, n_estimators=300 
[CV]  learning_rate=1, max_depth=2, min_child_samples=23, n_estimators=300, score=0.574, total=   0.4s
[CV] le

[CV]  learning_rate=1, max_depth=3, min_child_samples=11, n_estimators=300, score=0.383, total=   0.4s
[CV] learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300, score=0.550, total=   0.4s
[CV] learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300, score=0.507, total=   0.4s
[CV] learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300, score=0.513, total=   0.4s
[CV] learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300, score=0.336, total=   0.4s
[CV] learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=12, n_estimators=300, score=0.417, total=   0.4s
[CV] le

[CV]  learning_rate=1, max_depth=3, min_child_samples=21, n_estimators=300, score=0.502, total=   0.6s
[CV] learning_rate=1, max_depth=3, min_child_samples=21, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=21, n_estimators=300, score=0.488, total=   0.5s
[CV] learning_rate=1, max_depth=3, min_child_samples=21, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=21, n_estimators=300, score=0.404, total=   0.6s
[CV] learning_rate=1, max_depth=3, min_child_samples=21, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=21, n_estimators=300, score=0.370, total=   0.6s
[CV] learning_rate=1, max_depth=3, min_child_samples=22, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=22, n_estimators=300, score=0.531, total=   0.5s
[CV] learning_rate=1, max_depth=3, min_child_samples=22, n_estimators=300 
[CV]  learning_rate=1, max_depth=3, min_child_samples=22, n_estimators=300, score=0.521, total=   0.6s
[CV] le

[CV]  learning_rate=1, max_depth=4, min_child_samples=10, n_estimators=300, score=0.264, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=10, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=10, n_estimators=300, score=0.348, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=11, n_estimators=300, score=0.435, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=11, n_estimators=300, score=0.463, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=11, n_estimators=300, score=0.408, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=11, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=11, n_estimators=300, score=0.218, total=   0.5s
[CV] le

[CV]  learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300, score=0.447, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300, score=0.463, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300, score=0.464, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300, score=0.309, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=20, n_estimators=300, score=0.323, total=   0.6s
[CV] learning_rate=1, max_depth=4, min_child_samples=21, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=21, n_estimators=300, score=0.444, total=   0.5s
[CV] le

[CV]  learning_rate=1, max_depth=4, min_child_samples=29, n_estimators=300, score=0.368, total=   0.6s
[CV] learning_rate=1, max_depth=4, min_child_samples=29, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=29, n_estimators=300, score=0.301, total=   0.5s
[CV] learning_rate=1, max_depth=4, min_child_samples=29, n_estimators=300 
[CV]  learning_rate=1, max_depth=4, min_child_samples=29, n_estimators=300, score=0.356, total=   0.6s


[Parallel(n_jobs=1)]: Done 1200 out of 1200 | elapsed:  8.5min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=LGBMRegressor(boosting_type='gbdt', class_weight=None,
                                     colsample_bytree=1.0,
                                     importance_type='split', learning_rate=0.1,
                                     max_depth=-1, min_child_samples=20,
                                     min_child_weight=0.001, min_split_gain=0.0,
                                     n_estimators=100, n_jobs=-1, num_leaves=31,
                                     objective=None, random_state=None,
                                     reg_alpha=0.0, reg_lambda=0.0, silent=True,
                                     subsample=1.0, subsample_for_bin=200000,
                                     subsample_freq=0),
             iid='deprecated', n_jobs=None,
             param_grid={'learning_rate': [0.01, 0.1, 1],
                         'max_depth': [1, 2, 3, 4],
                         'min_child_samples': range(10, 30),
      

In [61]:
# To  find the parameters giving accuracy
print(grid.best_params_)
model=grid.best_estimator_
print("train score is   ----------->    ",model.score(X_train,y_train))
print("test score is   ----------->    ",model.score(X_test,y_test))

{'learning_rate': 1, 'max_depth': 1, 'min_child_samples': 13, 'n_estimators': 300}
train score is   ----------->     0.6663448129216347
test score is   ----------->     0.755541772519361
