<a href="https://colab.research.google.com/github/sayanarajasekhar/sklearn/blob/main/TrainAndPredictWithGridSearchCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install scikit-learn



In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import matplotlib.pylab as plt

In [3]:
X,y = fetch_california_housing(return_X_y=True)

In [4]:
pipe = Pipeline([
    ("scale", StandardScaler()),
    ("model", KNeighborsRegressor(n_neighbors=1))
])
pipe.get_params()

{'memory': None,
 'steps': [('scale', StandardScaler()),
  ('model', KNeighborsRegressor(n_neighbors=1))],
 'transform_input': None,
 'verbose': False,
 'scale': StandardScaler(),
 'model': KNeighborsRegressor(n_neighbors=1),
 'scale__copy': True,
 'scale__with_mean': True,
 'scale__with_std': True,
 'model__algorithm': 'auto',
 'model__leaf_size': 30,
 'model__metric': 'minkowski',
 'model__metric_params': None,
 'model__n_jobs': None,
 'model__n_neighbors': 1,
 'model__p': 2,
 'model__weights': 'uniform'}

In [17]:
# Create Grid search cv,
# provide the pipeline as estimator,
# param_grid as model n_neighbors starting from 1 to 10
# cross validator as 3 to split the data into three groups and validate
model = GridSearchCV(
    estimator=pipe,
    param_grid={'model__n_neighbors': range(1, 11)},
    cv=3
)

In [18]:
model.fit(X,y)

In [19]:
model.cv_results_

{'mean_fit_time': array([0.02832071, 0.02682169, 0.02705908, 0.03585513, 0.0256687 ,
        0.02560345, 0.02641606, 0.02580134, 0.02601687, 0.03024801]),
 'std_fit_time': array([1.98399385e-03, 1.09170956e-03, 1.75245006e-03, 6.06451774e-04,
        5.53437035e-05, 8.75480756e-05, 4.23613170e-04, 3.12060949e-04,
        5.34373582e-04, 5.78468587e-03]),
 'mean_score_time': array([0.393471  , 0.43727374, 0.56860995, 0.76672538, 0.51460656,
        0.52597626, 0.55583024, 0.56403605, 0.56962323, 0.78887852]),
 'std_score_time': array([0.03077159, 0.0355912 , 0.17392914, 0.07387146, 0.04250325,
        0.03413605, 0.0430242 , 0.04536956, 0.03696087, 0.23831788]),
 'param_model__n_neighbors': masked_array(data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value=999999),
 'params': [{'model__n_neighbors': 1},
  {'model__n_neighbors': 2},
  {'model__n_neighbors': 3},
  {'model__n_n

In [20]:
import pandas as pd
pd.DataFrame(model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.028321,0.001984,0.393471,0.030772,1,{'model__n_neighbors': 1},0.324068,0.33483,0.323371,0.327423,0.005245,10
1,0.026822,0.001092,0.437274,0.035591,2,{'model__n_neighbors': 2},0.468788,0.503457,0.424388,0.465544,0.032361,9
2,0.027059,0.001752,0.56861,0.173929,3,{'model__n_neighbors': 3},0.518547,0.54334,0.473595,0.511827,0.028867,8
3,0.035855,0.000606,0.766725,0.073871,4,{'model__n_neighbors': 4},0.540323,0.564974,0.499827,0.535041,0.026857,7
4,0.025669,5.5e-05,0.514607,0.042503,5,{'model__n_neighbors': 5},0.551149,0.579313,0.511781,0.547414,0.027696,6
5,0.025603,8.8e-05,0.525976,0.034136,6,{'model__n_neighbors': 6},0.558435,0.586185,0.521134,0.555251,0.026652,5
6,0.026416,0.000424,0.55583,0.043024,7,{'model__n_neighbors': 7},0.564207,0.590509,0.525223,0.559979,0.02682,4
7,0.025801,0.000312,0.564036,0.04537,8,{'model__n_neighbors': 8},0.568637,0.593942,0.529337,0.563972,0.02658,3
8,0.026017,0.000534,0.569623,0.036961,9,{'model__n_neighbors': 9},0.571972,0.594863,0.529928,0.565588,0.026891,2
9,0.030248,0.005785,0.788879,0.238318,10,{'model__n_neighbors': 10},0.57376,0.595831,0.532973,0.567522,0.026038,1
