<h1> Grid-Searching Preprocessing Steps and Model Parameters </h1>

In [1]:
# This pipeline example creates 3 steps :
# 1. Scaling the data
# 2. Creating order of the polynomial features
# 3. Applying Ridge Regression

In [2]:
# Import modules

# import module for scaling the data
from sklearn.preprocessing import StandardScaler

# import module for polynomial feature
from sklearn.preprocessing import PolynomialFeatures

# import module for Ridge regression
from sklearn.linear_model import Ridge

In [3]:
# import and load boston data
from sklearn.datasets import load_boston

boston = load_boston()

# import module from splitting data into training and testing datasets
from sklearn.model_selection import train_test_split

# split the data into training and testing datasets

x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size = 0.3, random_state = 0)

In [4]:
# import pipeline module
from sklearn.pipeline import make_pipeline

pipe = make_pipeline(StandardScaler(), PolynomialFeatures(), Ridge())

In [6]:
# import grid search module
from sklearn.model_selection import GridSearchCV

# Create parameter grid for Grid Search CV

param_grid = {'polynomialfeatures__degree': [1, 2, 3],
              'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

In [7]:
# Create grid model

grid = GridSearchCV(pipe, param_grid = param_grid, cv = 5)

In [8]:
# Fit the model to training data

grid.fit(x_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('polynomialfeatures', PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)), ('ridge', Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001))]),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'polynomialfeatures__degree': [1, 2, 3], 'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [9]:
# Print best estimator

print('Best Extimator: \n {}'.format(grid.best_estimator_))

Best Extimator: 
 Pipeline(memory=None,
     steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('polynomialfeatures', PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)), ('ridge', Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001))])


In [12]:
# Print best polynomial order

print('Best polynomial order: \n {}'.format(grid.best_estimator_.named_steps['polynomialfeatures'].degree))

Best polynomial order: 
 2


In [15]:
# Print Best Score

print('Best Score: \n {:0.2f}'.format(grid.score(x_test, y_test)))

Best Score: 
 0.79
