# Grid Search

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn import cross_validation, grid_search
from sklearn.ensemble import ExtraTreesClassifier

from exampleDatasets.utilities import visualize_classifier
import warnings
warnings.filterwarnings(action = 'ignore', category = DeprecationWarning)

  from numpy.core.umath_tests import inner1d


In [2]:
input_file = './exampleDatasets/data_random_forests.txt'

In [3]:
data = np.loadtxt(input_file, delimiter = ',')
X, y = data[:, :-1], data[:, -1]

In [4]:
class_0 = np.array(X[y==0])
class_1 = np.array(X[y==1])
class_2 = np.array(X[y==2])

In [5]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
               X, y, test_size=0.25, random_state=5)

In [6]:
parameter_grid = [{
    'n_estimators' : [100],
    'max_depth' : [2, 4, 7 , 12, 16]
}, 
{
    'max_depth' : [4],
    'n_estimators' : [25, 50, 100, 250]
}]

Here, we want the best combination of 2 parameters, first dictionary we keep 1 constant and vary the other.

In [7]:
metrics = ['precision_weighted', 'recall_weighted']

In [11]:
for metric in metrics:
    print('\n Searching optimal parameters for {}'.format(metric))
    classifier = grid_search.GridSearchCV(
                            ExtraTreesClassifier(random_state = 123),
                parameter_grid ,
                cv = 5,
                scoring = metric)
    classifier.fit(X_train, y_train)


 Searching optimal parameters for precision_weighted

 Searching optimal parameters for recall_weighted


In [18]:
help(grid_search.GridSearchCV)

Help on class GridSearchCV in module sklearn.grid_search:

class GridSearchCV(BaseSearchCV)
 |  Exhaustive search over specified parameter values for an estimator.
 |  
 |  .. deprecated:: 0.18
 |      This module will be removed in 0.20.
 |      Use :class:`sklearn.model_selection.GridSearchCV` instead.
 |  
 |  Important members are fit, predict.
 |  
 |  GridSearchCV implements a "fit" and a "score" method.
 |  It also implements "predict", "predict_proba", "decision_function",
 |  "transform" and "inverse_transform" if they are implemented in the
 |  estimator used.
 |  
 |  The parameters of the estimator used to apply these methods are optimized
 |  by cross-validated grid-search over a parameter grid.
 |  
 |  Read more in the :ref:`User Guide <grid_search>`.
 |  
 |  Parameters
 |  ----------
 |  estimator : estimator object.
 |      A object of that type is instantiated for each grid point.
 |      This is assumed to implement the scikit-learn estimator interface.
 |      Eith

In [16]:
print("\nGrid scores for the parameter grid:")
for params, avg_score, _ in classifier.grid_scores_:
    print(params, '-->', round(avg_score, 3))

print("\nBest parameters:", classifier.best_params_)


Grid scores for the parameter grid:
{'max_depth': 2, 'n_estimators': 100} --> 0.839
{'max_depth': 4, 'n_estimators': 100} --> 0.837
{'max_depth': 7, 'n_estimators': 100} --> 0.837
{'max_depth': 12, 'n_estimators': 100} --> 0.831
{'max_depth': 16, 'n_estimators': 100} --> 0.812
{'max_depth': 4, 'n_estimators': 25} --> 0.841
{'max_depth': 4, 'n_estimators': 50} --> 0.84
{'max_depth': 4, 'n_estimators': 100} --> 0.837
{'max_depth': 4, 'n_estimators': 250} --> 0.847

Best parameters: {'max_depth': 4, 'n_estimators': 250}
