Using `GridSearchCV` with `Pipeline`

In [22]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression,ElasticNet
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.impute import SimpleImputer
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV

In [23]:
chem = pd.read_csv("ChemicalProcess.csv")
X=chem.drop('Yield', axis=1)
y= chem['Yield']

# using Pipeline

In [24]:
imp_mean= SimpleImputer(strategy='mean').set_output(transform='pandas')
lr= LinearRegression()
pipe_lr = Pipeline([('IMP', imp_mean), ('LR', lr)])
res_lr = cross_val_score(pipe_lr, X, y)
res_lr.mean()

-94.57748595795053

In [25]:
imp_med= SimpleImputer(strategy='median').set_output(transform='pandas')
lr=LinearRegression()
pipe_lr = Pipeline([('IMP', imp_med),('LR', lr)])
res_lr = cross_val_score(pipe_lr, X,y)
res_lr.mean()

-86.99645845793464

In [26]:
imp= SimpleImputer()
pipe_lr = Pipeline([('IMP', imp),('LR', lr)])
pipe_lr.get_params()


#By this we can see the significance of IMP

{'memory': None,
 'steps': [('IMP', SimpleImputer()), ('LR', LinearRegression())],
 'verbose': False,
 'IMP': SimpleImputer(),
 'LR': LinearRegression(),
 'IMP__add_indicator': False,
 'IMP__copy': True,
 'IMP__fill_value': None,
 'IMP__keep_empty_features': False,
 'IMP__missing_values': nan,
 'IMP__strategy': 'mean',
 'LR__copy_X': True,
 'LR__fit_intercept': True,
 'LR__n_jobs': None,
 'LR__positive': False}

# using GridSearchCV

In [30]:
imp= SimpleImputer()
pipe_lr = Pipeline([('IMP', imp),('LR', lr)])
params= {'IMP__strategy':['mean', 'median']}     
gcv_lr = GridSearchCV(pipe_lr, param_grid= params)          #pipe_lr
gcv_lr.fit(X,y)
print("Best Params:", gcv_lr.best_params_)
print("Best Score:", gcv_lr.best_score_)


Best Params: {'IMP__strategy': 'median'}
Best Score: -86.99645845793464


# using ElasticNet

In [39]:
elastic= ElasticNet()
imp= SimpleImputer()
pipe_el = Pipeline([('IMP', imp), ('EL', elastic)])
pipe_el.get_params()


{'memory': None,
 'steps': [('IMP', SimpleImputer()), ('EL', ElasticNet())],
 'verbose': False,
 'IMP': SimpleImputer(),
 'EL': ElasticNet(),
 'IMP__add_indicator': False,
 'IMP__copy': True,
 'IMP__fill_value': None,
 'IMP__keep_empty_features': False,
 'IMP__missing_values': nan,
 'IMP__strategy': 'mean',
 'EL__alpha': 1.0,
 'EL__copy_X': True,
 'EL__fit_intercept': True,
 'EL__l1_ratio': 0.5,
 'EL__max_iter': 1000,
 'EL__positive': False,
 'EL__precompute': False,
 'EL__random_state': None,
 'EL__selection': 'cyclic',
 'EL__tol': 0.0001,
 'EL__warm_start': False}

In [50]:
params= {'IMP__strategy':['mean', 'median'], 'EL__alpha': np.linspace(0.0001,10,20),
          'EL__l1_ratio' : np.linspace(0.0001,1,10) }     
gcv_el = GridSearchCV(pipe_el, param_grid= params)          #pipe_el
gcv_el.fit(X,y)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

In [41]:
print("Best Params:", gcv_el.best_params_)
print("Best Score:", gcv_el.best_score_)


Best Params: {'EL__alpha': 10.0, 'EL__l1_ratio': 1.0, 'IMP__strategy': 'median'}
Best Score: -1.55172620877767


# Grid View:

In [53]:
df_results = pd.DataFrame(gcv_el.cv_results_)
df_results.shape

(400, 16)

Exporting the Grid to `.csv`

In [57]:
df_results.to_csv("ParameterCombination.csv")

PermissionError: [Errno 13] Permission denied: 'ParameterCombination.csv'

PermissionError: [Errno 13] Permission denied: 'ParameterCombination.csv'