# ML REGRESSION - {"BIGMART SALES" DATASET}

## 1. Importing Modules and Setting Configurations

In [10]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sb

from pickle import dump, load


from sklearn.compose import ColumnTransformer

from sklearn.preprocessing import PowerTransformer, FunctionTransformer
from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.feature_selection import SelectKBest, mutual_info_regression

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from xgboost import XGBRegressor

from xgboost import XGBClassifier

from sklearn.pipeline import Pipeline

from sklearn.metrics import r2_score

from sklearn.model_selection import KFold, cross_val_score, cross_validate

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import r2_score


import warnings
warnings.filterwarnings('ignore')

from sklearn import set_config
set_config(display='diagram')

In [11]:
# PD Options

pd.set_option('display.min_rows', 5)
pd.set_option('display.max_rows', 25)
pd.set_option('display.precision', 4)

# SB Options

sb.set_theme(context='notebook', style='whitegrid', palette='pastel', font='times new roman', font_scale=1.25)

## 2. Importing Train Dataset

In [12]:
tr = pd.read_pickle('/content/bms_FE_train_final.pkl')

print(f'Shape of the train dataset : {tr.shape}')
tr.head(5)

Shape of the train dataset : (8323, 11)


Unnamed: 0,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Category,Outlet_Age,Item_Outlet_Sales
2171,13.65,Regular,0.0808,Frozen Foods,262.8936,Medium,Tier 1,Supermarket Type1,Foods,14.0,4958.8784
5657,6.98,Low Fat,0.0412,Canned,82.8934,Small,Tier 2,Supermarket Type1,Foods,11.0,818.934
2156,20.1,Low Fat,0.0746,Dairy Foods,110.3228,Small,Tier 1,Supermarket Type1,Foods,16.0,1768.3648
110,13.3,Low Fat,0.0798,Dairy Foods,232.53,Small,Tier 1,Supermarket Type1,Foods,16.0,699.09
6709,10.1,Non Edible,0.0301,Health and Hygiene,154.3656,Medium,Tier 3,Supermarket Type3,Non Consumables,28.0,2471.4496


In [13]:
Xtr = tr.drop(columns='Item_Outlet_Sales')
ytr = tr['Item_Outlet_Sales']

## 3. Hyper Parameter Tuning

In [14]:
# Pre Processors ---------------------------------------------------------------------------------------------

def ft_exp(x):
    y = x**(1/1.2)
    return y


def ft_sqrt(x):
    y = x**(1/2)
    return y

ft_mrp_exp = FunctionTransformer(func=ft_exp)
ft_age_sqrt = FunctionTransformer(func=ft_sqrt)

pre_proc_num = ColumnTransformer(transformers=[
                ('pt_it_vis',PowerTransformer(method='yeo-johnson', standardize=False),[2]),
                ('ft_it_mrp',ft_mrp_exp,[4]),
                ('ft_ol_age',ft_age_sqrt,[9])
                ],
                remainder='passthrough')


pre_proc_ss = ColumnTransformer(transformers=[
                ('ss',StandardScaler(),[0,1,2,3])
                ],
                remainder='passthrough')


pre_proc_cat = ColumnTransformer(transformers=[
                ('oe',OrdinalEncoder(categories=[['Small','Medium','High'], ['Tier 3','Tier 2','Tier 1'],
                        ['Grocery Store','Supermarket Type3','Supermarket Type2','Supermarket Type1']], dtype='object'),[6,7,8]),
                ('ohe',OneHotEncoder(drop='first', sparse_output=False, dtype='int8'),[4,5,9]),
                ],
                remainder='passthrough')

In [15]:
# Feature Selection ---------------------------------------------------------------------------------------------

skb = SelectKBest(mutual_info_regression, k='all')

### 3.1 LinearRegression

In [None]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',LinearRegression())]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'skb__k':['all',10,15,20,25]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=10, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'skb__k': ['all', 10, 15, 20, 25]} 

KFold Setting : 
 KFold(n_splits=10, random_state=46, shuffle=True) 

Fitting 10 folds for each of 5 candidates, totalling 50 fits
Best Params : 
 {'skb__k': 10} 

Best Scores : 
 0.4124


### 3.2 Lasso

In [None]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',Lasso(random_state=46))]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'skb__k':['all',15,20,25],
    'mdl__alpha':[0,0.01,0.05,0.1,0.5,1],
    'mdl__max_iter':[500,1000,1500]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=10, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'skb__k': ['all', 15, 20, 25], 'mdl__alpha': [0, 0.01, 0.05, 0.1, 0.5, 1], 'mdl__max_iter': [500, 1000, 1500]} 

KFold Setting : 
 KFold(n_splits=10, random_state=46, shuffle=True) 

Fitting 10 folds for each of 72 candidates, totalling 720 fits
Best Params : 
 {'mdl__alpha': 0.5, 'mdl__max_iter': 1000, 'skb__k': 15} 

Best Scores : 
 0.4123


### 3.3 Ridge

In [None]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',Ridge(random_state=46, solver='auto'))]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'skb__k':['all',15,20,25],
    'mdl__alpha':[0,0.01,0.05,0.1,0.5,1],
    'mdl__max_iter':[500,1000,1500]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=10, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'skb__k': ['all', 15, 20, 25], 'mdl__alpha': [0, 0.01, 0.05, 0.1, 0.5, 1], 'mdl__max_iter': [500, 1000, 1500]} 

KFold Setting : 
 KFold(n_splits=10, random_state=46, shuffle=True) 

Fitting 10 folds for each of 72 candidates, totalling 720 fits
Best Params : 
 {'mdl__alpha': 0.05, 'mdl__max_iter': 1500, 'skb__k': 15} 

Best Scores : 
 0.4123


### 3.4 KNeighborsRegressor

In [None]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',KNeighborsRegressor())]

pipe = Pipeline(steps)

# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'mdl__n_neighbors': [3,5,7,9,13,17],
    'mdl__weights':['uniform', 'distance'],
    'mdl__metric':['euclidean', 'manhattan', 'minkowski'],
    'mdl__algorithm':['brute','auto']
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=10, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'mdl__n_neighbors': [3, 5, 7, 9, 13, 17], 'mdl__weights': ['uniform', 'distance'], 'mdl__metric': ['euclidean', 'manhattan', 'minkowski'], 'mdl__algorithm': ['brute', 'auto']} 

KFold Setting : 
 KFold(n_splits=10, random_state=46, shuffle=True) 

Fitting 10 folds for each of 72 candidates, totalling 720 fits
Best Params : 
 {'mdl__algorithm': 'brute', 'mdl__metric': 'euclidean', 'mdl__n_neighbors': 17, 'mdl__weights': 'uniform'} 

Best Scores : 
 0.5616


### 3.5 SVR

In [None]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',SVR())]

pipe = Pipeline(steps)

# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'mdl__C':[1.0, 0.1, 0.01],
    'mdl__kernel':['linear','poly', 'rbf', 'sigmoid'],
    'mdl__gamma':['scale','auto'],
    'mdl__degree':[2,3,4,5]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=10, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'mdl__C': [1.0, 0.1, 0.01], 'mdl__kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'mdl__gamma': ['scale', 'auto'], 'mdl__degree': [2, 3, 4, 5]} 

KFold Setting : 
 KFold(n_splits=10, random_state=46, shuffle=True) 

Fitting 10 folds for each of 96 candidates, totalling 960 fits
Best Params : 
 {'mdl__C': 1.0, 'mdl__degree': 2, 'mdl__gamma': 'scale', 'mdl__kernel': 'linear'} 

Best Scores : 
 0.3576


### 3.6 DecisionTreeRegressor

In [None]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',DecisionTreeRegressor(random_state=46))]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'mdl__criterion':['squared_error','absolute_error'],
    'mdl__max_depth':[1,3,5],
    'mdl__splitter':['best','random'],
    'mdl__min_samples_split':[0.3,0.5,0.8],
    'mdl__min_impurity_decrease':[0.0,0.2,0.5]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=10, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'mdl__criterion': ['squared_error', 'absolute_error'], 'mdl__max_depth': [1, 3, 5], 'mdl__splitter': ['best', 'random'], 'mdl__min_samples_split': [0.3, 0.5, 0.8], 'mdl__min_impurity_decrease': [0.0, 0.2, 0.5]} 

KFold Setting : 
 KFold(n_splits=10, random_state=46, shuffle=True) 

Fitting 10 folds for each of 108 candidates, totalling 1080 fits
Best Params : 
 {'mdl__criterion': 'squared_error', 'mdl__max_depth': 5, 'mdl__min_impurity_decrease': 0.0, 'mdl__min_samples_split': 0.3, 'mdl__splitter': 'best'} 

Best Scores : 
 0.5553


### 3.7 BaggingRegressor

In [None]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',BaggingRegressor(random_state=46))]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'mdl__n_estimators':[50,100,200],
    'mdl__estimator':[KNeighborsRegressor()],
    'mdl__max_samples':[0.25,0.5],
    'mdl__bootstrap':[True,False],
    'mdl__oob_score':[True]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=5, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'mdl__n_estimators': [50, 100, 200], 'mdl__estimator': [KNeighborsRegressor()], 'mdl__max_samples': [0.25, 0.5], 'mdl__bootstrap': [True, False], 'mdl__oob_score': [True]} 

KFold Setting : 
 KFold(n_splits=5, random_state=46, shuffle=True) 

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best Params : 
 {'mdl__bootstrap': True, 'mdl__estimator': KNeighborsRegressor(), 'mdl__max_samples': 0.25, 'mdl__n_estimators': 200, 'mdl__oob_score': True} 

Best Scores : 
 0.5702


### 3.8 RandomForestRegressor

In [None]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',RandomForestRegressor(random_state=46))]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'mdl__n_estimators':[100, 200, 300],
    'mdl__criterion':['squared_error','absolute_error'],
    'mdl__max_depth': [1,3,5],
    'mdl__max_samples': [0.25,0.50],
    'mdl__bootstrap':[True,False],
    'mdl__oob_score':[True]
    # 'mdl__min_samples_split':[0.3,0.5,0.8],
    # 'mdl__min_impurity_decrease':[0.0,0.2,0.5]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=5, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'mdl__n_estimators': [100, 200, 300], 'mdl__criterion': ['squared_error', 'absolute_error'], 'mdl__max_depth': [1, 3, 5], 'mdl__max_samples': [0.25, 0.5], 'mdl__bootstrap': [True, False], 'mdl__oob_score': [True]} 

KFold Setting : 
 KFold(n_splits=5, random_state=46, shuffle=True) 

Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best Params : 
 {'mdl__bootstrap': True, 'mdl__criterion': 'squared_error', 'mdl__max_depth': 5, 'mdl__max_samples': 0.25, 'mdl__n_estimators': 100, 'mdl__oob_score': True} 

Best Scores : 
 0.5982


### 3.9 GradientBoostingRegressor

In [8]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',GradientBoostingRegressor(random_state=46))]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'mdl__learning_rate':[0.001,0.01,0.1],
    'mdl__criterion':['friedman_mse', 'squared_error'],
    'mdl__n_estimators':[50,100,200],
    'mdl__max_depth':[1,3,5],
    'mdl__subsample':[0.5,0.75]
    # 'mdl__min_samples_split':[0.3,0.5,0.8],
    # 'mdl__min_impurity_decrease':[0.0,0.2,0.5]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=5, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'mdl__learning_rate': [0.001, 0.01, 0.1], 'mdl__criterion': ['friedman_mse', 'squared_error'], 'mdl__n_estimators': [50, 100, 200], 'mdl__max_depth': [1, 3, 5], 'mdl__subsample': [0.5, 0.75]} 

KFold Setting : 
 KFold(n_splits=5, random_state=46, shuffle=True) 

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Params : 
 {'mdl__criterion': 'squared_error', 'mdl__learning_rate': 0.1, 'mdl__max_depth': 3, 'mdl__n_estimators': 50, 'mdl__subsample': 0.75} 

Best Scores : 
 0.5986


### 3.10 HistGradientBoostingRegressor

In [7]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',HistGradientBoostingRegressor(random_state=46))]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'mdl__learning_rate':[0.01,0.1,0.5],
    'mdl__max_depth':[1,3,5],
    'mdl__max_iter':[50,100,200],
    'mdl__max_leaf_nodes':[20,25],
    'mdl__l2_regularization':[0.1,0.5]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=5, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'mdl__learning_rate': [0.01, 0.1, 0.5], 'mdl__max_depth': [1, 3, 5], 'mdl__max_iter': [50, 100, 200], 'mdl__max_leaf_nodes': [20, 25], 'mdl__l2_regularization': [0.1, 0.5]} 

KFold Setting : 
 KFold(n_splits=5, random_state=46, shuffle=True) 

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Params : 
 {'mdl__l2_regularization': 0.1, 'mdl__learning_rate': 0.1, 'mdl__max_depth': 3, 'mdl__max_iter': 50, 'mdl__max_leaf_nodes': 20} 

Best Scores : 
 0.5998


### 3.11 XGBRegressor

In [None]:
#! pip install xgboost

In [16]:
# ML Pipeline ------------------------------------------------------------------------------------------------
steps = [('pp_num',pre_proc_num),
         ('pp_ss',pre_proc_ss),
         ('pp_cat',pre_proc_cat),
         ('skb',skb),
         ('mdl',XGBRegressor(objective='reg:squarederror', eval_metric='rmse', seed=46))]

pipe = Pipeline(steps)


# Model Param Grid -------------------------------------------------------------------------------------------
param_grid = {
    'mdl__eta':[0.01,0.1,0.5],
    'mdl__n_estimators':[50,100,200],
    'mdl__max_depth':[3,5,7],
    'mdl__gamma':[0.01,0.05,0.1],
    'mdl__subsample':[0.5,0.75]
    # 'mdl__lambda':[0.1,0.25,0.5],
    # 'mdl__alpha':[0.1,0.25,0.5],
    #'mdl__colsample_bytree':[0.25,0.5,0.75]
    }
print(f'Param Grid : \n {param_grid} \n')


# GridSearchCV Configuration ---------------------------------------------------------------------------------
kfold = KFold(n_splits=5, shuffle=True, random_state=46)
print(f'KFold Setting : \n {kfold} \n')
gscv = GridSearchCV(pipe, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)
gscv.fit(Xtr, ytr)


# GridSearch Results -----------------------------------------------------------------------------------------
print(f'Best Params : \n {gscv.best_params_} \n')
print(f'Best Scores : \n {round(gscv.best_score_,4)}')

Param Grid : 
 {'mdl__eta': [0.01, 0.1, 0.5], 'mdl__n_estimators': [50, 100, 200], 'mdl__max_depth': [3, 5, 7], 'mdl__gamma': [0.01, 0.05, 0.1], 'mdl__subsample': [0.5, 0.75]} 

KFold Setting : 
 KFold(n_splits=5, random_state=46, shuffle=True) 

Fitting 5 folds for each of 162 candidates, totalling 810 fits
Best Params : 
 {'mdl__eta': 0.1, 'mdl__gamma': 0.01, 'mdl__max_depth': 3, 'mdl__n_estimators': 50, 'mdl__subsample': 0.75} 

Best Scores : 
 0.5999
