In [71]:
%run main.ipynb

# Fine-Tune Your Model
### (hyperparameter optimization)

## Grid Search

All you need to do is tell it which hyperparameters you want it to experiment with <br> 
and what values to try out, <br>
and it will use cross-validation to evaluate all the possible combinations of hyperparameter values.

In [72]:
from sklearn.model_selection import GridSearchCV

**Hyperparameter Optimization for Linear Regression model**

In [73]:
lin_reg.get_params()

{'copy_X': True,
 'fit_intercept': True,
 'n_jobs': None,
 'normalize': False,
 'positive': False}

In [74]:
lin_reg_param_grid = [{"fit_intercept": [True, False], "normalize": [True, False]}]

In [75]:
lin_reg_grid_search = GridSearchCV(lin_reg, lin_reg_param_grid, cv=5, scoring="neg_mean_squared_error")

In [76]:
lin_reg_grid_search.fit(titanic_clean, titanic_train_labels)

GridSearchCV(cv=5, estimator=LinearRegression(),
             param_grid=[{'fit_intercept': [True, False],
                          'normalize': [True, False]}],
             scoring='neg_mean_squared_error')

In [77]:
# best combination of paramaters
lin_reg_grid_search.best_params_

{'fit_intercept': True, 'normalize': True}

In [78]:
# best estimator directly
lin_reg_grid_search.best_estimator_

LinearRegression(normalize=True)

In [79]:
# Evaluation scores are available
cvres = lin_reg_grid_search.cv_results_
print(cvres)

{'mean_fit_time': array([0.0023355 , 0.00140057, 0.00186887, 0.00109472]), 'std_fit_time': array([0.00069266, 0.00034059, 0.00101927, 0.00058523]), 'mean_score_time': array([0.00082269, 0.00088925, 0.00077634, 0.00041146]), 'std_score_time': array([0.00025598, 0.00090588, 0.00039735, 0.00016943]), 'param_fit_intercept': masked_array(data=[True, True, False, False],
             mask=[False, False, False, False],
       fill_value='?',
            dtype=object), 'param_normalize': masked_array(data=[True, False, True, False],
             mask=[False, False, False, False],
       fill_value='?',
            dtype=object), 'params': [{'fit_intercept': True, 'normalize': True}, {'fit_intercept': True, 'normalize': False}, {'fit_intercept': False, 'normalize': True}, {'fit_intercept': False, 'normalize': False}], 'split0_test_score': array([-0.15375341, -0.15375341, -0.15375341, -0.15375341]), 'split1_test_score': array([-0.1597815, -0.1597815, -0.1597815, -0.1597815]), 'split2_test_score'

In [80]:
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(np.sqrt(-mean_score), params)

0.402083548341115 {'fit_intercept': True, 'normalize': True}
0.402083548341115 {'fit_intercept': True, 'normalize': False}
0.402083548341115 {'fit_intercept': False, 'normalize': True}
0.402083548341115 {'fit_intercept': False, 'normalize': False}


**Hyperparameter Optimization for RandomForestRegressor model**

In [81]:
forest_reg.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [82]:
forest_reg_param_grid = [
{'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8]},
{'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},
]

In [83]:
forest_reg_grid_search = GridSearchCV(lin_reg, param_grid, cv=5, scoring="neg_mean_squared_error")

In [84]:
forest_reg_grid_search.fit(titanic_clean, titanic_train_labels)

GridSearchCV(cv=5, estimator=LinearRegression(),
             param_grid=[{'fit_intercept': [True, False],
                          'normalize': [True, False]}],
             scoring='neg_mean_squared_error')

In [85]:
print(f"Best parameters: {forest_reg_grid_search.best_params_}\n",
     f"Best estimator: {forest_reg_grid_search.best_estimator_}")

Best parameters: {'fit_intercept': True, 'normalize': True}
 Best estimator: LinearRegression(normalize=True)


In [86]:
forest_reg_cvres = forest_reg_grid_search.cv_results_
#print(forest_reg_cvres)

for mean_score, params in zip(forest_reg_cvres["mean_test_score"], forest_reg_cvres["params"]):
    print(np.sqrt(-mean_score), params)

0.402083548341115 {'fit_intercept': True, 'normalize': True}
0.402083548341115 {'fit_intercept': True, 'normalize': False}
0.402083548341115 {'fit_intercept': False, 'normalize': True}
0.402083548341115 {'fit_intercept': False, 'normalize': False}


## Randomized Search