# < Random Forest >
- n_estimators
  - the number of trees in the forest
  - 기본값은 100
- criterion
  - the function to measure the quality of a split
  - default = “gini”

In [22]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn import tree

from sklearn.model_selection import train_test_split, RandomizedSearchCV
import numpy as np

In [13]:
iris = load_iris()
X, y = iris.data, iris.target

# clf = RandomForestClassifier()
clf = RandomForestClassifier(n_estimators=10, 
                             max_depth=3, 
                             random_state=0)
clf = clf.fit(X, y)

In [14]:
len(clf.estimators_)

10

In [15]:
clf.estimators_

[DecisionTreeClassifier(max_depth=3, max_features='auto', random_state=209652396),
 DecisionTreeClassifier(max_depth=3, max_features='auto', random_state=398764591),
 DecisionTreeClassifier(max_depth=3, max_features='auto', random_state=924231285),
 DecisionTreeClassifier(max_depth=3, max_features='auto',
                        random_state=1478610112),
 DecisionTreeClassifier(max_depth=3, max_features='auto', random_state=441365315),
 DecisionTreeClassifier(max_depth=3, max_features='auto',
                        random_state=1537364731),
 DecisionTreeClassifier(max_depth=3, max_features='auto', random_state=192771779),
 DecisionTreeClassifier(max_depth=3, max_features='auto',
                        random_state=1491434855),
 DecisionTreeClassifier(max_depth=3, max_features='auto',
                        random_state=1819583497),
 DecisionTreeClassifier(max_depth=3, max_features='auto', random_state=530702035)]

In [5]:
clf.estimators_[0]

DecisionTreeClassifier(max_depth=3, max_features='auto', random_state=209652396)

In [6]:
result = clf.predict([[4.6, 3.1, 1.5, 0.2], 
                      [7.0, 3.2, 4.7, 1.4],
                      [6.3, 3.3, 6.0, 2.5]])
# 학습할 때도 [[]] 형태로 넣어줬기 때문에 예측할 때도 X set가 들어오길 기다리고 있음.
# 그래서 [[]] 2차원으로 넣어주는 거.

result

array([0, 1, 2])

# RandomizedSearchCV

In [17]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start=200, stop=2000, num=10)]

# Number of features to consider at every split
max_features = ['auto', 'sqrt']
 
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)

# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]

# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]

# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

random_grid

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000],
 'max_features': ['auto', 'sqrt'],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'min_samples_split': [2, 5, 10],
 'min_samples_leaf': [1, 2, 4],
 'bootstrap': [True, False]}

In [20]:
rf = RandomForestClassifier(random_state=42)
rf_random = RandomizedSearchCV(estimator = rf, 
                               param_distributions = random_grid, 
                               n_iter = 100, 
                               cv = 3, 
                               verbose=2, 
                               random_state=42, 
                               n_jobs = -1)

rf_result = rf_random.fit(X, y)
rf_result.best_params_, rf_result.best_score_

Fitting 3 folds for each of 100 candidates, totalling 300 fits


({'n_estimators': 400,
  'min_samples_split': 5,
  'min_samples_leaf': 1,
  'max_features': 'sqrt',
  'max_depth': 30,
  'bootstrap': True},
 0.9666666666666667)

In [21]:
clf = RandomForestClassifier(n_estimators=400, 
                             max_depth=30, 
                             min_samples_split=5,
                             min_samples_leaf=1,
                             max_features='sqrt',
                             bootstrap=True,
                             random_state=0)