In [3]:
import pandas as pd
import numpy as np
import os
import tarfile
from six.moves import urllib
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

**Loading file**

In [None]:
housing = pd.read_csv('housing.csv')
housing.head()
median_house_value = housing['median_house_value']
housing.drop('median_house_value', axis=1)


**Seperating numerical values**

In [70]:
housing_num = housing.drop("ocean_proximity", axis=1)
housing_num_name = list(housing_num)


**Seperating categorical values**

In [71]:
housing_cat = housing['ocean_proximity']
housing_cat_name = ['ocean_proximity']


**Creating pipeline for handling missing numerical and categorical values.**

In [72]:
categorical_pipeline = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore')),
    ('imputer', SimpleImputer(strategy='most_frequent'))
])

In [73]:
numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

**Merging both types of columns implementing the pipelines** 

In [74]:
preprocessor = ColumnTransformer([
    ('cat', categorical_pipeline, housing_cat_name),
    ('num', numerical_pipeline, housing_num_name)
])
X_transform = preprocessor.fit_transform(housing)

**Grid Search**

In [75]:
grid_param = [
    {'n_estimators':[3,10,30], 'max_features':[2,4,6,8]},
    {'bootstrap':[False], 'n_estimators':[3,10], 'max_features':[2,3,4]} 
    ]
model = RandomForestRegressor()

grd_search = GridSearchCV(model, param_grid=grid_param, scoring='neg_mean_absolute_error', cv=5, return_train_score=True)

grd_search.fit(X_transform, median_house_value)


In [77]:
grd_search.best_params_

{'max_features': 8, 'n_estimators': 30}

In [79]:
g = grd_search.cv_results_
g['params']

[{'max_features': 2, 'n_estimators': 3},
 {'max_features': 2, 'n_estimators': 10},
 {'max_features': 2, 'n_estimators': 30},
 {'max_features': 4, 'n_estimators': 3},
 {'max_features': 4, 'n_estimators': 10},
 {'max_features': 4, 'n_estimators': 30},
 {'max_features': 6, 'n_estimators': 3},
 {'max_features': 6, 'n_estimators': 10},
 {'max_features': 6, 'n_estimators': 30},
 {'max_features': 8, 'n_estimators': 3},
 {'max_features': 8, 'n_estimators': 10},
 {'max_features': 8, 'n_estimators': 30},
 {'bootstrap': False, 'max_features': 2, 'n_estimators': 3},
 {'bootstrap': False, 'max_features': 2, 'n_estimators': 10},
 {'bootstrap': False, 'max_features': 3, 'n_estimators': 3},
 {'bootstrap': False, 'max_features': 3, 'n_estimators': 10},
 {'bootstrap': False, 'max_features': 4, 'n_estimators': 3},
 {'bootstrap': False, 'max_features': 4, 'n_estimators': 10}]