<a href="https://colab.research.google.com/github/nfilipas/handson-ml3/blob/main/exercises/chapter7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exercise 8

In [55]:
from sklearn.datasets import fetch_openml
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, ExtraTreesClassifier, StackingClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.calibration import CalibratedClassifierCV
from sklearn.neural_network import MLPClassifier

import numpy as np

In [44]:
mnist = fetch_openml("mnist_784", as_frame=False)

  warn(


In [45]:
X = mnist.data
y = mnist.target

X_train, y_train = X[:50_000], y[:50_000]
X_val, y_val = X[50_000:60_000], y[50_000:60_000]
X_test, y_test = X[60_000:], y[60_000:]

In [46]:
forest_clf = RandomForestClassifier(n_estimators=100, random_state=42,
                                    n_jobs=-1)
ext_tree_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = LinearSVC(max_iter=100, tol=20, dual=True, random_state=42)
mlp_clf = MLPClassifier(random_state=42)

clfs = [forest_clf, ext_tree_clf, svm_clf, mlp_clf]
pipelines = []

for clf in clfs:
    clf_pipeline = make_pipeline(StandardScaler(), clf)
    clf_pipeline.fit(X_train, y_train)
    pipelines.append(clf_pipeline)

In [47]:
for pipeline in pipelines:
    print(f"{pipeline[1]}: {accuracy_score(y_val, pipeline.predict(X_val))}")

RandomForestClassifier(n_jobs=-1, random_state=42): 0.9735
ExtraTreesClassifier(random_state=42): 0.9743
LinearSVC(max_iter=100, random_state=42, tol=20): 0.8691
MLPClassifier(random_state=42): 0.9749


In [48]:
hard_vot_clf = VotingClassifier(estimators=[("forest_clf", forest_clf),
                                            ("ext_tree_clf", ext_tree_clf),
                                            ("mlp_clf", mlp_clf)])
soft_vot_clf = VotingClassifier(estimators=[("forest_clf", forest_clf),
                                            ("ext_tree_clf", ext_tree_clf),
                                            ("mlp_clf", mlp_clf)],
                                voting="soft")

hard_vot_clf.fit(X_train, y_train)
soft_vot_clf.fit(X_train, y_train)

In [49]:
print(f"Hard voting classifier: accuracy = {accuracy_score(y_val, hard_vot_clf.predict(X_val))}")
print(f"Soft voting classifier: accuracy = {accuracy_score(y_val, soft_vot_clf.predict(X_val))}")

Hard voting classifier: accuracy = 0.9761
Soft voting classifier: accuracy = 0.9703


In [50]:
for pipeline in pipelines:
    print(f"{pipeline[1]}: {accuracy_score(y_test, pipeline.predict(X_test))}")
print(f"Hard voting classifier: accuracy = {accuracy_score(y_test, hard_vot_clf.predict(X_test))}")

RandomForestClassifier(n_jobs=-1, random_state=42): 0.9682
ExtraTreesClassifier(random_state=42): 0.9703
LinearSVC(max_iter=100, random_state=42, tol=20): 0.8715
MLPClassifier(random_state=42): 0.9756
Hard voting classifier: accuracy = 0.9733


# Exercise 9

In [51]:
X_train_new = np.empty((X_val.shape[0], len(pipelines)), dtype='object')
for i in (range(len(pipelines))):
    X_train_new[:, i] = pipeline.predict(X_val)

blender_clf = RandomForestClassifier(n_estimators=100, random_state=42,
                                     n_jobs=-1)

blender_clf.fit(X_train_new, y_val)

In [52]:
X_test_new = np.empty((X_test.shape[0], len(pipelines)), dtype='object')
for i in range(len(pipelines)):
    X_test_new[:, i] = pipeline.predict(X_test)

print(accuracy_score(y_test, blender_clf.predict(X_test_new)))

0.9756


In [53]:
# Let's try to remove the svm and see if we can improve the results

X_train_new2 = np.delete(X_train_new, 2, axis=1)
blender_clf.fit(X_train_new2, y_val)

X_test_new2 = np.delete(X_test_new, 2, axis=1)
print(accuracy_score(y_test, blender_clf.predict(X_test_new2)))

0.9756


In [57]:
stacking_clf = StackingClassifier(
    estimators=[
        ("rf_clf", RandomForestClassifier(n_estimators=100, random_state=42,
                                          n_jobs=-1)),
        ("ext_trees_clf", ExtraTreesClassifier(n_estimators=100,
                                               random_state=42)),
        ("mlp_clf", MLPClassifier(random_state=42))],
    final_estimator=RandomForestClassifier(n_estimators=100, random_state=42,
                                           n_jobs=-1),
    cv=5)

stacking_clf.fit(np.concatenate((X_train, X_val), axis=0),
                 np.concatenate((y_train, y_val), axis=0))

In [58]:
print(accuracy_score(y_test, stacking_clf.predict(X_test)))

0.9787
