# Chapter 7
# Ensemble Learning and Random Forests

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import mean_squared_error
from sklearn.svm import LinearSVC

In [2]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Voting Classifiers
**_Create and train a voting classifier_**

In [3]:
log_clf = LogisticRegression(solver="liblinear", random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)
svm_clf = SVC(gamma="auto", probability=True, random_state=42)

voting_clf = VotingClassifier(
        estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
        voting='soft'
    )

voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=42, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False)), ('rf', Rando...bf',
  max_iter=-1, probability=True, random_state=42, shrinking=True,
  tol=0.001, verbose=False))],
         flatten_transform=None, n_jobs=None, voting='soft', weights=None)

**_Look at the accuracy of each classifier on the test set_**

In [4]:
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.872
SVC 0.888
VotingClassifier 0.912


## Bagging and Pasting
### Bagging and Pasting in Scikit-Learn

In [5]:
bag_clf = BaggingClassifier(
        DecisionTreeClassifier(), n_estimators=500,
        max_samples=100, bootstrap=True, n_jobs=-1
    )

bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)
print('BaggingClassifier', accuracy_score(y_test, y_pred))

BaggingClassifier 0.928


In [6]:
tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train, y_train)
y_pred_tree = tree_clf.predict(X_test)
print('DecisionTreeClassifier', accuracy_score(y_test, y_pred_tree))

DecisionTreeClassifier 0.856


### Out-of-Bag Evaluation

In [7]:
bag_clf = BaggingClassifier(
        DecisionTreeClassifier(), n_estimators=500,
        bootstrap=True, n_jobs=-1, oob_score=True
    )

bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.8986666666666666

In [8]:
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.904

In [9]:
bag_clf.oob_decision_function_

array([[0.38857143, 0.61142857],
       [0.3258427 , 0.6741573 ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.        , 1.        ],
       [0.08235294, 0.91764706],
       [0.34969325, 0.65030675],
       [0.00558659, 0.99441341],
       [0.98351648, 0.01648352],
       [0.97777778, 0.02222222],
       [0.835     , 0.165     ],
       [0.00534759, 0.99465241],
       [0.72251309, 0.27748691],
       [0.84916201, 0.15083799],
       [0.95767196, 0.04232804],
       [0.04278075, 0.95721925],
       [0.        , 1.        ],
       [0.9893617 , 0.0106383 ],
       [0.96610169, 0.03389831],
       [1.        , 0.        ],
       [0.01657459, 0.98342541],
       [0.34659091, 0.65340909],
       [0.88125   , 0.11875   ],
       [1.        , 0.        ],
       [0.96531792, 0.03468208],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.61616162, 0.38383838],
       [0.

## Random Forests
**_Creating a Random Forest by using the RandomForestClassifier_**

In [10]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)

**_Creating an equivalent Random Forest by using the BaggingClassifier_**

In [11]:
bag_clf = BaggingClassifier(
        DecisionTreeClassifier(splitter="random", max_leaf_nodes=16),
        n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1
    )

### Feature Importance

In [12]:
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris["data"], iris["target"])

for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.10452605915909884
sepal width (cm) 0.021224302660570047
petal length (cm) 0.42005793474462105
petal width (cm) 0.4541917034357104


## Boosting
### AdaBoost

In [13]:
ada_clf = AdaBoostClassifier(
        DecisionTreeClassifier(max_depth=1), n_estimators=200,
        algorithm="SAMME.R", learning_rate=0.5
    )

ada_clf.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=0.5, n_estimators=200, random_state=None)

### Gradient Boosting

In [14]:
np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)

In [15]:
tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X, y)

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [16]:
y2 = y - tree_reg1.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X, y2)

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [17]:
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X, y3)

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [18]:
X_new = np.array([[0.9]])
y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))
y_pred

array([0.75026781])

**_Train a GBRT ensemble using GradientBoostingRegressor_**

In [19]:
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)
gbrt.fit(X, y)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=1.0, loss='ls', max_depth=2, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=3, n_iter_no_change=None, presort='auto',
             random_state=None, subsample=1.0, tol=0.0001,
             validation_fraction=0.1, verbose=0, warm_start=False)

**_Finding optimal number of trees in GBRT_**

In [20]:
X_train, X_val, y_train, y_val = train_test_split(X, y)

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120)
gbrt.fit(X_train, y_train)

errors = [mean_squared_error(y_val, y_pred)
          for y_pred in gbrt.staged_predict(X_val)]
bst_n_estimators = np.argmin(errors)

gbrt_best = GradientBoostingRegressor(max_depth=2,n_estimators=bst_n_estimators)
gbrt_best.fit(X_train, y_train)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=2, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=84, n_iter_no_change=None, presort='auto',
             random_state=None, subsample=1.0, tol=0.0001,
             validation_fraction=0.1, verbose=0, warm_start=False)

**_Early stopping_**

In [21]:
gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True)

min_val_error = float("inf")
error_going_up = 0
for n_estimators in range(1, 120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train, y_train)
    y_pred = gbrt.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up == 5:
            break # early stopping

# Chapter 7 Exercises

**_Exercise 8_**

In [22]:
try:
    from sklearn.datasets import fetch_openml
    mnist = fetch_openml('mnist_784', version=1, cache=True)
    mnist.target = mnist.target.astype(np.int8)
except ImportError:
    from sklearn.datasets import fetch_mldata
    mnist = fetch_mldata('MNIST original')

In [23]:
X_train_val, X_test, y_train_val, y_test = train_test_split(
    mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=10000, random_state=42)

In [24]:
rand_frst_clf = RandomForestClassifier()
extra_tree_clf = ExtraTreesClassifier()
svc = LinearSVC()

rand_frst_clf.fit(X_train, y_train)
extra_tree_clf.fit(X_train, y_train)
svc.fit(X_train, y_train)



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [25]:
print(rand_frst_clf.score(X_val, y_val))
print(extra_tree_clf.score(X_val, y_val))
print(svc.score(X_val, y_val))

0.947
0.9482
0.8441


In [26]:
voting_clf = VotingClassifier(
        estimators=[('rand_frst_clf', rand_frst_clf), ('extra_tree_clf', extra_tree_clf)],
        voting='soft'
    )

voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('rand_frst_clf', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
       ...ators=10, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False))],
         flatten_transform=None, n_jobs=None, voting='soft', weights=None)

In [27]:
voting_clf.score(X_val, y_val)

0.9616

In [28]:
print(rand_frst_clf.score(X_test, y_test))
print(extra_tree_clf.score(X_test, y_test))
print(svc.score(X_test, y_test))

0.9417
0.9466
0.8437


In [29]:
voting_clf.score(X_test, y_test)

0.9569

**_Exercise 9_**

In [30]:
y_pred_1 = rand_frst_clf.predict(X_val).reshape((-1, 1))
y_pred_2 = extra_tree_clf.predict(X_val).reshape((-1, 1))
y_pred_3 = svc.predict(X_val).reshape((-1, 1))

X_train_new = np.concatenate((y_pred_1, y_pred_2, y_pred_3), axis=1)
X_train_new

array([[5, 5, 5],
       [8, 2, 8],
       [2, 2, 2],
       ...,
       [7, 7, 7],
       [6, 6, 6],
       [7, 7, 7]], dtype=int8)

In [31]:
rand_frst_blender = RandomForestClassifier()
rand_frst_blender.fit(X_train_new, y_val)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [32]:
rand_frst_blender.score(X_train_new, y_val)

0.9672

In [33]:
y_pred_1 = rand_frst_clf.predict(X_test).reshape((-1, 1))
y_pred_2 = extra_tree_clf.predict(X_test).reshape((-1, 1))
y_pred_3 = svc.predict(X_test).reshape((-1, 1))

X_test_new = np.concatenate((y_pred_1, y_pred_2, y_pred_3), axis=1)
X_test_new

array([[8, 8, 8],
       [4, 4, 4],
       [5, 5, 8],
       ...,
       [3, 3, 3],
       [3, 8, 3],
       [3, 3, 3]], dtype=int8)

In [34]:
rand_frst_blender.score(X_test_new, y_test)

0.9469