In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "ensembles"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [2]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1)
mnist.target = mnist.target.astype(np.uint8)

from sklearn.model_selection import train_test_split

X_train_val, X_test, y_train_val, y_test = train_test_split(
    mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=10000, random_state=42)

In [6]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)#randomforest
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)#extratree
svm_clf = LinearSVC(random_state=42)#SVM
mlp_clf = MLPClassifier(random_state=42)#MLP
#2017250045 정태환
estimators = [random_forest_clf, extra_trees_clf, svm_clf, mlp_clf]
for estimator in estimators:
    print("Training the", estimator)
    estimator.fit(X_train, y_train)

Training the RandomForestClassifier(random_state=42)
Training the ExtraTreesClassifier(random_state=42)
Training the LinearSVC(random_state=42)




Training the MLPClassifier(random_state=42)


In [7]:
#2017250045 정태환
print("[estimator.score(X_val, y_val) for estimator in estimators]:",
      [estimator.score(X_val, y_val) for estimator in estimators])

[estimator.score(X_val, y_val) for estimator in estimators]: [0.9692, 0.9715, 0.8695, 0.9606]


In [8]:
from sklearn.ensemble import VotingClassifier
named_estimators = [
    ("random_forest_clf", random_forest_clf),
    ("extra_trees_clf", extra_trees_clf),
    ("svm_clf", svm_clf),
    ("mlp_clf", mlp_clf),
]
voting_clf = VotingClassifier(named_estimators)
#2017250045 정태환
voting_clf.fit(X_train, y_train)

print("voting_clf.score(X_val, y_val):",voting_clf.score(X_val, y_val))
print("[estimator.score(X_val, y_val) for estimator in voting_clf.estimators_]: ",
      [estimator.score(X_val, y_val) for estimator in voting_clf.estimators_])



voting_clf.score(X_val, y_val): 0.97
[estimator.score(X_val, y_val) for estimator in voting_clf.estimators_]:  [0.9692, 0.9715, 0.8695, 0.9606]


In [10]:
print("voting_clf.set_params(svm_clf=None):",voting_clf.set_params(svm_clf=None))
print("\nvoting_clf.estimators: ",voting_clf.estimators)
print("\nvoting_clf.estimators_:",voting_clf.estimators_)
#2017250045 정태환

voting_clf.set_params(svm_clf=None): VotingClassifier(estimators=[('random_forest_clf',
                              RandomForestClassifier(random_state=42)),
                             ('extra_trees_clf',
                              ExtraTreesClassifier(random_state=42)),
                             ('svm_clf', None),
                             ('mlp_clf', MLPClassifier(random_state=42))])

voting_clf.estimators:  [('random_forest_clf', RandomForestClassifier(random_state=42)), ('extra_trees_clf', ExtraTreesClassifier(random_state=42)), ('svm_clf', None), ('mlp_clf', MLPClassifier(random_state=42))]

voting_clf.estimators_: [RandomForestClassifier(random_state=42), ExtraTreesClassifier(random_state=42), LinearSVC(random_state=42), MLPClassifier(random_state=42)]


In [11]:
#2017250045 정태환
del voting_clf.estimators_[2]
print("voting_clf.score(X_val, y_val): ",voting_clf.score(X_val, y_val))

voting_clf.score(X_val, y_val):  0.9737


In [12]:
voting_clf.voting = "soft"#SOFTVOTING으로 변환
print("voting_clf.score(X_val, y_val):",voting_clf.score(X_val, y_val))


voting_clf.score(X_val, y_val): 0.97


In [13]:
voting_clf.voting = "hard"#HARDVOTING으로 변환
print("\nvoting_clf.score(X_test, y_test):",voting_clf.score(X_test, y_test))


voting_clf.score(X_test, y_test): 0.9711


In [15]:
print("[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]:",
[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_])
print("\nThe voting classifier only very slightly reduced the error rate of the best model in this case.")

[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]: [0.9645, 0.9691, 0.9586]

The voting classifier only very slightly reduced the error rate of the best model in this case.
