In [0]:
import numpy as np
import matplotlib.pyplot as plt

In [0]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# 1. voting classifier

In [0]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression(solver='liblinear', random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)
svm_clf = SVC(gamma='auto', random_state=42)

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=42,
                                                 solver='liblinear', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     class_weight=None,...
                                        

In [0]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.872
SVC 0.888
VotingClassifier 0.896


# 2. Bagging & Pasting

In [0]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True, random_state=42)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [0]:
bag_clf.oob_score_

0.9253333333333333

In [0]:
bag_clf.oob_decision_function_[:10] # oob sample 의 각 class 에 대한 score

array([[0.35849057, 0.64150943],
       [0.43513514, 0.56486486],
       [1.        , 0.        ],
       [0.0128866 , 0.9871134 ],
       [0.03174603, 0.96825397],
       [0.07928389, 0.92071611],
       [0.4027027 , 0.5972973 ],
       [0.06703911, 0.93296089],
       [0.92950392, 0.07049608],
       [0.88461538, 0.11538462]])

In [0]:
accuracy_score(y_test, y_pred)

0.904

In [0]:
# Bagging
bagging_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42),n_estimators=10,
    max_samples=0.1,max_features=1.0, bootstrap=True, bootstrap_features=True)

In [0]:
# Pasting
pasting_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42),n_estimators=10,
    max_samples=0.1,max_features=1.0, bootstrap=False,bootstrap_features=True)

In [0]:
# Random Subspaces Method
random_subspace_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=10,
    max_samples=1.0,max_features=0.5, bootstrap=False,bootstrap_features=True)

In [0]:
# Random Patches Method
random_patches_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=10,
    max_samples=0.1,max_features=0.5, bootstrap=True,,bootstrap_features=True)

# 3. RandomForest Classifier

In [0]:
bag_clf_rf=BaggingClassifier(
    DecisionTreeClassifier(splitter="random",max_leaf_nodes=16,random_state=42),
    n_estimators=500,max_samples=1.0,bootstrap=True,n_jobs=-1,random_state=42
)

bag_clf_rf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.904

In [0]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)

rnd_clf.fit(X_train, y_train)
y_pred_rf = rnd_clf.predict(X_test)
accuracy_score(y_test, y_pred_rf)

0.912

In [0]:
rnd_clf.feature_importances_

array([0.42253629, 0.57746371])

In [0]:
from sklearn.ensemble import ExtraTreesClassifier

ext_clf = ExtraTreesClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)

ext_clf.fit(X_train, y_train)
y_pred_ext = rnd_clf.predict(X_test)
accuracy_score(y_test, y_pred_ext)

0.912

In [0]:
rnd_clf.feature_importances_

array([0.42253629, 0.57746371])