# Ensemble Learning and Random Forests
---

### Voting Classifiers

In [1]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

moons = make_moons()
X_train, X_test, y_train, y_test = train_test_split(moons[0], moons[1], test_size=0.33, random_state=42)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)], voting='soft')
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)), ('rf', RandomF...',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False))],
         n_jobs=1, voting='soft', weights=None)

In [3]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.878787878788
RandomForestClassifier 0.939393939394
SVC 0.969696969697
VotingClassifier 0.939393939394


### Bagging and Pasting

In [5]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# Bagging - bootstrap=True
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples=0.5, bootstrap=True, n_jobs=-1)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [11]:
y_pred == y_test

array([ True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True], dtype=bool)

In [8]:
#Pasting - bootstrap=False
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples=0.5, bootstrap=False, n_jobs=-1)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [10]:
y_pred == y_test

array([ True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True], dtype=bool)

### Out-of-Bag Evaluation

In [12]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, bootstrap=True, n_jobs=-1, oob_score=True)
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.92537313432835822

In [13]:
from sklearn.metrics import accuracy_score

y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.93939393939393945

In [14]:
bag_clf.oob_decision_function_

array([[ 0.20430108,  0.79569892],
       [ 0.19879518,  0.80120482],
       [ 1.        ,  0.        ],
       [ 1.        ,  0.        ],
       [ 1.        ,  0.        ],
       [ 0.01075269,  0.98924731],
       [ 0.        ,  1.        ],
       [ 1.        ,  0.        ],
       [ 0.8680203 ,  0.1319797 ],
       [ 0.        ,  1.        ],
       [ 0.        ,  1.        ],
       [ 0.        ,  1.        ],
       [ 0.4076087 ,  0.5923913 ],
       [ 0.15340909,  0.84659091],
       [ 0.        ,  1.        ],
       [ 1.        ,  0.        ],
       [ 0.49740933,  0.50259067],
       [ 1.        ,  0.        ],
       [ 0.67484663,  0.32515337],
       [ 0.        ,  1.        ],
       [ 0.66666667,  0.33333333],
       [ 0.94219653,  0.05780347],
       [ 0.85882353,  0.14117647],
       [ 0.80412371,  0.19587629],
       [ 0.91847826,  0.08152174],
       [ 0.76439791,  0.23560209],
       [ 1.        ,  0.        ],
       [ 0.21164021,  0.78835979],
       [ 0.93478261,