In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from utilities import visualize_classifier

In [None]:
input_file = 'data_random_forests.txt'
data = np.loadtxt(input_file, delimiter=',')
X, y = data[:, :-1], data[:, -1]

In [None]:
# Separate input data into three classes based on labels
class_0 = np.array(X[y==0])
class_1 = np.array(X[y==1])
class_2 = np.array(X[y==2])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 5)

In [None]:
from sklearn.model_selection import GridSearchCV 

# defining parameter range 
param_grid = {'n_estimators': [10, 100, 200, 300, 400, 500, 750, 1000], 
      'max_depth': [1, 2, 3, 4, 5, 6]}

In [None]:
from sklearn.ensemble import RandomForestClassifier
grid = GridSearchCV(RandomForestClassifier(), param_grid, refit = True, verbose = 3)

In [None]:
grid.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.800, total=   0.0s
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.830, total=   0.0s
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.807, total=   0.0s
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.770, total=   0.0s
[CV] max_depth=1, n_estimators=10 ....................................
[CV] ........ max_depth=1, n_estimators=10, score=0.815, total=   0.0s
[CV] max_depth=1, n_estimators=100 ...................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ....... max_depth=1, n_estimators=100, score=0.822, total=   0.2s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] ....... max_depth=1, n_estimators=100, score=0.830, total=   0.1s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] ....... max_depth=1, n_estimators=100, score=0.748, total=   0.2s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] ....... max_depth=1, n_estimators=100, score=0.778, total=   0.1s
[CV] max_depth=1, n_estimators=100 ...................................
[CV] ....... max_depth=1, n_estimators=100, score=0.822, total=   0.2s
[CV] max_depth=1, n_estimators=200 ...................................
[CV] ....... max_depth=1, n_estimators=200, score=0.815, total=   0.3s
[CV] max_depth=1, n_estimators=200 ...................................
[CV] ....... max_depth=1, n_estimators=200, score=0.852, total=   0.3s
[CV] max_depth=1, n_estimators=200 ...................................
[CV] .

[Parallel(n_jobs=1)]: Done 240 out of 240 | elapsed:  2.6min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              ra

In [None]:
# print best parameter after tuning 
print(grid.best_params_) 

# print how our model looks after hyper-parameter tuning 
print(grid.best_estimator_)

{'max_depth': 3, 'n_estimators': 100}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=3, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)


In [None]:
grid_predictions = grid.predict(X_test) 

# print classification report 
print(confusion_matrix(y_test,grid_predictions))
print(classification_report(y_test, grid_predictions))

[[69  5  5]
 [ 5 59  6]
 [ 1  7 68]]
              precision    recall  f1-score   support

         0.0       0.92      0.87      0.90        79
         1.0       0.83      0.84      0.84        70
         2.0       0.86      0.89      0.88        76

    accuracy                           0.87       225
   macro avg       0.87      0.87      0.87       225
weighted avg       0.87      0.87      0.87       225

