# Chapter 7 - Ensemble Learning And Random Forests

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Voting Classifiers

In [2]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [3]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

voting_clf = VotingClassifier(
    estimators=[
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(random_state=42))
    ]
)
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf', RandomForestClassifier(random_state=42)),
                             ('svc', SVC(random_state=42))])

In [5]:
for name, clf in voting_clf.named_estimators_.items():
    print(f"{name} accuracy = {clf.score(X_test, y_test)}")

lr accuracy = 0.864
rf accuracy = 0.896
svc accuracy = 0.896


Calling the voting classifiers `predict()` method uses hard voting.

In [6]:
voting_clf.predict(X_test[:1])

array([1])

In [7]:
[clf.predict(X_test[:1]) for clf in voting_clf.estimators_]

[array([1]), array([1]), array([0])]

In [9]:
# test set performance
voting_clf.score(X_test, y_test)

0.912

Using **soft voting**:

In [10]:
voting_clf.voting = "soft"

# add `predict_proba()` method using cross validation
voting_clf.named_estimators["svc"].probability = True
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)

0.92