# Chapter 7 Exercises

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

def save_fig(fig_id, tight_layout=True):
    path = os.path.join("images", fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

Load the MNIST data (introduced in Chapter 3), and split it into a training set, a validation set, and a test set (e.g., use the first 40,000 instances for training, the next 10,000 for validation, and the last 10,000 for testing). Then train various classifiers, such as a Random Forest classifier, an Extra-Trees classifier, and an SVM. Next, try to combine them into an ensemble that outperforms them all on the validation set, using a soft or hard voting classifier. Once you have found one, try it on the test set. How much better does it perform compared to the individual classifiers?

In [24]:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')

In [25]:
X, y = mnist["data"], mnist["target"]

In [33]:
X_train, X_val, X_test, y_train, y_val, y_test = X[:60000], X[60000:65000], X[65000:70000], y[:60000], y[60000:65000], y[65000:70000]

In [34]:
from sklearn.neighbors import KNeighborsClassifier

knn_clf = KNeighborsClassifier(n_jobs=-1, weights='distance', n_neighbors=3)
knn_clf.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=3, p=2,
           weights='distance')

In [37]:
from sklearn.ensemble import RandomForestClassifier
forest_clf = RandomForestClassifier(random_state=42)
forest_clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=42,
            verbose=0, warm_start=False)

In [38]:
from sklearn.svm import LinearSVC
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train, y_train)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)

In [39]:
from sklearn.metrics import accuracy_score

y_pred_knn = knn_clf.predict(X_test)
y_pred_lin = lin_clf.predict(X_test)
y_pred_forest = forest_clf.predict(X_test)
print(accuracy_score(y_test, y_pred_knn))
print(accuracy_score(y_test, y_pred_lin))
print(accuracy_score(y_test, y_pred_forest))

0.9652
0.852
0.9298


In [40]:
from sklearn.model_selection import cross_val_score
cross_val_score(knn_clf, X_val, y_val, cv=3, scoring="accuracy")

array([ 0.97182254,  0.98020396,  0.97957958])

In [41]:
cross_val_score(forest_clf, X_val, y_val, cv=3, scoring="accuracy")

array([ 0.96223022,  0.96760648,  0.95975976])

In [42]:
cross_val_score(lin_clf, X_val, y_val, cv=3, scoring="accuracy")

array([ 0.93645084,  0.95140972,  0.94714715])

In [43]:
from sklearn.ensemble import VotingClassifier
voting_clf = VotingClassifier(
    estimators=[('lin', lin_clf), ('forest', forest_clf), ('knn', knn_clf)],
    voting='hard')
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lin', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)), ('forest', RandomForestClassifier(bootstrap=True, class_weight=No...ski',
           metric_params=None, n_jobs=-1, n_neighbors=3, p=2,
           weights='distance'))],
         n_jobs=1, voting='hard', weights=None)

In [70]:
y_pred_voting = voting_clf.predict(X_test)
accuracy_score(y_test, y_pred_voting)

0.9476

In [45]:
cross_val_score(voting_clf, X_val, y_val, cv=3, scoring="accuracy")

array([ 0.97961631,  0.9760048 ,  0.97657658])

Run the individual classifiers from the previous exercise to make predictions on the validation set, and create a new training set with the resulting predictions: each training instance is a vector containing the set of predictions from all your classifiers for an image, and the target is the image’s class. Congratulations, you have just trained a blender, and together with the classifiers they form a stacking ensemble! Now let’s evaluate the ensemble on the test set. For each image in the test set, make predictions with all your classifiers, then feed the predictions to the blender to get the ensemble’s predictions. How does it compare to the voting classifier you trained earlier?

In [48]:
y_pred_val_knn = knn_clf.predict(X_val)
y_pred_val_lin = lin_clf.predict(X_val)
y_pred_val_forest = forest_clf.predict(X_val)

In [63]:
X_val.shape

(5000, 784)

In [53]:
X_new = np.zeros((5000, 3))

In [58]:
tmp_new = X_new.tolist()
tmp_knn = y_pred_val_knn.tolist()
tmp_lin = y_pred_val_lin.tolist()
tmp_forest = y_pred_val_forest.tolist()

In [68]:
for i in range(len(tmp_new)):
    tmp_new[i][0] = tmp_knn[i]
    tmp_new[i][1] = tmp_lin[i]
    tmp_new[i][2] = tmp_forest[i]
    
#tmp_new

In [66]:
X_train_new = np.array(tmp_new)

In [69]:
knn_blender = KNeighborsClassifier(n_jobs=-1, weights='distance', n_neighbors=3)
knn_blender.fit(X_train_new, y_val)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=3, p=2,
           weights='distance')

In [107]:
blender_preds = []

for image in X_test:
    pred_knn = knn_clf.predict(image)
    pred_lin = lin_clf.predict(image)
    pred_forest = forest_clf.predict(image)
    tmp_preds = np.array([pred_knn.item(0), pred_lin.item(0), pred_forest.item(0)])
    
    pred_blender = knn_blender.predict(tmp_preds.reshape(-1,1))
    blender_preds.append(pred_blender)
    
print('Finished')



JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/usr/lib/python3.5/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    179         sys.exit(msg)
    180     main_globals = sys.modules["__main__"].__dict__
    181     if alter_argv:
    182         sys.argv[0] = mod_spec.origin
    183     return _run_code(code, main_globals, None,
--> 184                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.5/site-packages/ipykernel_launcher.py')
    185 
    186 def run_module(mod_name, init_globals=None,
    187                run_name=None, alter_sys=False):
    188     """Execute a module's code without importing it

...........................................................................
/usr/lib/python3.5/runpy.py in _run_code(code=<code object <module> at 0x7fb47ccd9b70, file "/...3.5/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__builtins__': <module 'builtins' (built-in)>, '__cached__': '/home/niall/.local/lib/python3.5/site-packages/__pycache__/ipykernel_launcher.cpython-35.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/home/niall/.local/lib/python3.5/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.5/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/home/niall/.../python3.5/site-packages/ipykernel/kernelapp.py'>, 'sys': <module 'sys' (built-in)>}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.5/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x7fb47ccd9b70, file "/...3.5/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__builtins__': <module 'builtins' (built-in)>, '__cached__': '/home/niall/.local/lib/python3.5/site-packages/__pycache__/ipykernel_launcher.cpython-35.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/home/niall/.local/lib/python3.5/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.5/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/home/niall/.../python3.5/site-packages/ipykernel/kernelapp.py'>, 'sys': <module 'sys' (built-in)>}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()
     17 
     18 
     19 
     20 

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    472             return self.subapp.start()
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         try:
--> 477             ioloop.IOLoop.instance().start()
    478         except KeyboardInterrupt:
    479             pass
    480 
    481 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 5
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 5), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 5)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=5)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "blender_preds = []\n\nfor image in X_test:\n    pre...preds.append(pred_blender)\n    \nprint('Finished')", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 6, 23, 22, 55, 47, 874887, tzinfo=datetime.timezone.utc), 'msg_id': '3F30CD8144924A0FBE24E1E4BEDDB351', 'msg_type': 'execute_request', 'session': 'F33913D5F3644A09B607D743A2A03E4B', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '3F30CD8144924A0FBE24E1E4BEDDB351', 'msg_type': 'execute_request', 'parent_header': {}})
    230             self.log.warn("Unknown message type: %r", msg_type)
    231         else:
    232             self.log.debug("%s: %s", msg_type, msg)
    233             self.pre_handler_hook()
    234             try:
--> 235                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'F33913D5F3644A09B607D743A2A03E4B']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "blender_preds = []\n\nfor image in X_test:\n    pre...preds.append(pred_blender)\n    \nprint('Finished')", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 6, 23, 22, 55, 47, 874887, tzinfo=datetime.timezone.utc), 'msg_id': '3F30CD8144924A0FBE24E1E4BEDDB351', 'msg_type': 'execute_request', 'session': 'F33913D5F3644A09B607D743A2A03E4B', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '3F30CD8144924A0FBE24E1E4BEDDB351', 'msg_type': 'execute_request', 'parent_header': {}}
    236             except Exception:
    237                 self.log.error("Exception in message handler:", exc_info=True)
    238             finally:
    239                 self.post_handler_hook()

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'F33913D5F3644A09B607D743A2A03E4B'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "blender_preds = []\n\nfor image in X_test:\n    pre...preds.append(pred_blender)\n    \nprint('Finished')", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 6, 23, 22, 55, 47, 874887, tzinfo=datetime.timezone.utc), 'msg_id': '3F30CD8144924A0FBE24E1E4BEDDB351', 'msg_type': 'execute_request', 'session': 'F33913D5F3644A09B607D743A2A03E4B', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '3F30CD8144924A0FBE24E1E4BEDDB351', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code="blender_preds = []\n\nfor image in X_test:\n    pre...preds.append(pred_blender)\n    \nprint('Finished')", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = "blender_preds = []\n\nfor image in X_test:\n    pre...preds.append(pred_blender)\n    \nprint('Finished')"
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=("blender_preds = []\n\nfor image in X_test:\n    pre...preds.append(pred_blender)\n    \nprint('Finished')",), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ("blender_preds = []\n\nfor image in X_test:\n    pre...preds.append(pred_blender)\n    \nprint('Finished')",)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="blender_preds = []\n\nfor image in X_test:\n    pre...preds.append(pred_blender)\n    \nprint('Finished')", store_history=True, silent=False, shell_futures=True)
   2693                 self.displayhook.exec_result = result
   2694 
   2695                 # Execute the user code
   2696                 interactivity = "none" if silent else self.ast_node_interactivity
   2697                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2698                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2699                 
   2700                 self.last_execution_succeeded = not has_raised
   2701 
   2702                 # Reset this so later displayed values do not modify the

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.For object>, <_ast.Expr object>], cell_name='<ipython-input-107-50d79ef78099>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 7fb44188c2b0, executi..._before_exec=None error_in_exec=None result=None>)
   2797 
   2798         try:
   2799             for i, node in enumerate(to_run_exec):
   2800                 mod = ast.Module([node])
   2801                 code = compiler(mod, cell_name, "exec")
-> 2802                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7fb428de68a0, file "<ipython-input-107-50d79ef78099>", line 3>
        result = <ExecutionResult object at 7fb44188c2b0, executi..._before_exec=None error_in_exec=None result=None>
   2803                     return True
   2804 
   2805             for i, node in enumerate(to_run_interactive):
   2806                 mod = ast.Interactive([node])

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7fb428de68a0, file "<ipython-input-107-50d79ef78099>", line 3>, result=<ExecutionResult object at 7fb44188c2b0, executi..._before_exec=None error_in_exec=None result=None>)
   2857         outflag = True  # happens in more places, so it's easier as default
   2858         try:
   2859             try:
   2860                 self.hooks.pre_run_code_hook()
   2861                 #rprint('Running code', repr(code_obj)) # dbg
-> 2862                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7fb428de68a0, file "<ipython-input-107-50d79ef78099>", line 3>
        self.user_global_ns = {'In': ['', "# To support both python 2 and python 3\nfrom __f...ut()\n    plt.savefig(path, format='png', dpi=300)", "from sklearn.datasets import fetch_mldata\nmnist = fetch_mldata('MNIST original')", 'X, y = mnist["data"], mnist["target"]', 'X_train, X_val, X_test, y_train, y_val, y_test =...60000], y[:40000], y[40000:50000], y[50000:60000]', 'import numpy as np\n\nshuffle_index = np.random.pe... = X_train[shuffle_index], y_train[shuffle_index]', "from sklearn.neighbors import KNeighborsClassifi...ce', n_neighbors=3)\nknn_clf.fit(X_train, y_train)", 'from sklearn.ensemble import RandomForestClassif...random_state=42)\nforest_clf.fit(X_train, y_train)', 'from sklearn.svm import LinearSVC\nlin_clf = LinearSVC(random_state=42)\nlin_clf.fit(X_train, y_train)', 'cross_val_score(forest_clf, X_val, y_val, cv=3, scoring="accuracy")', 'from sklearn.model_selection import cross_val_sc...(knn_clf, X_val, y_val, cv=3, scoring="accuracy")', 'cross_val_score(forest_clf, X_val, y_val, cv=3, scoring="accuracy")', 'cross_val_score(lin_clf, X_val, y_val, cv=3, scoring="accuracy")', 'from sklearn.metrics import accuracy_score\n\ny_pr...lf.predict(X_test)\naccuracy_score(y_test, y_pred)', 'from sklearn.metrics import accuracy_score\n\ny_pr..._clf.predict(X_val)\naccuracy_score(y_val, y_pred)', 'from sklearn.metrics import accuracy_score\n\ny_pr..._clf.predict(X_val)\naccuracy_score(y_val, y_pred)', 'from sklearn.metrics import accuracy_score\n\ny_pr..._clf.predict(X_val)\naccuracy_score(y_val, y_pred)', "from sklearn.ensemble import VotingClassifier\nvo...  voting='soft')\nvoting_clf.fit(X_train, y_train)", 'y_pred_voting = voting_clf.predict(X_val)\naccuracy_score(y_val, y_pred)', "from sklearn.ensemble import VotingClassifier\nvo...  voting='hard')\nvoting_clf.fit(X_train, y_train)", ...], 'KNeighborsClassifier': <class 'sklearn.neighbors.classification.KNeighborsClassifier'>, 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, 'Out': {6: KNeighborsClassifier(algorithm='auto', leaf_size..._neighbors=3, p=2,
           weights='distance'), 7: RandomForestClassifier(bootstrap=True, class_wei...tate=42,
            verbose=0, warm_start=False), 8: LinearSVC(C=1.0, class_weight=None, dual=True, f...l2', random_state=42, tol=0.0001,
     verbose=0), 10: array([ 0.99460108,  0.99129913,  0.99279928]), 11: array([ 0.98470306,  0.98259826,  0.98229823]), 12: array([ 0.97960408,  0.9729973 ,  0.98109811]), 13: 0.0, 14: 0.1915, 15: 0.18379999999999999, 16: 0.1804, ...}, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'VotingClassifier': <class 'sklearn.ensemble.voting_classifier.VotingClassifier'>, 'X': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,...0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), 'X_new': array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
...
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]]), 'X_test': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,...0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), 'X_train': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,...0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), ...}
        self.user_ns = {'In': ['', "# To support both python 2 and python 3\nfrom __f...ut()\n    plt.savefig(path, format='png', dpi=300)", "from sklearn.datasets import fetch_mldata\nmnist = fetch_mldata('MNIST original')", 'X, y = mnist["data"], mnist["target"]', 'X_train, X_val, X_test, y_train, y_val, y_test =...60000], y[:40000], y[40000:50000], y[50000:60000]', 'import numpy as np\n\nshuffle_index = np.random.pe... = X_train[shuffle_index], y_train[shuffle_index]', "from sklearn.neighbors import KNeighborsClassifi...ce', n_neighbors=3)\nknn_clf.fit(X_train, y_train)", 'from sklearn.ensemble import RandomForestClassif...random_state=42)\nforest_clf.fit(X_train, y_train)', 'from sklearn.svm import LinearSVC\nlin_clf = LinearSVC(random_state=42)\nlin_clf.fit(X_train, y_train)', 'cross_val_score(forest_clf, X_val, y_val, cv=3, scoring="accuracy")', 'from sklearn.model_selection import cross_val_sc...(knn_clf, X_val, y_val, cv=3, scoring="accuracy")', 'cross_val_score(forest_clf, X_val, y_val, cv=3, scoring="accuracy")', 'cross_val_score(lin_clf, X_val, y_val, cv=3, scoring="accuracy")', 'from sklearn.metrics import accuracy_score\n\ny_pr...lf.predict(X_test)\naccuracy_score(y_test, y_pred)', 'from sklearn.metrics import accuracy_score\n\ny_pr..._clf.predict(X_val)\naccuracy_score(y_val, y_pred)', 'from sklearn.metrics import accuracy_score\n\ny_pr..._clf.predict(X_val)\naccuracy_score(y_val, y_pred)', 'from sklearn.metrics import accuracy_score\n\ny_pr..._clf.predict(X_val)\naccuracy_score(y_val, y_pred)', "from sklearn.ensemble import VotingClassifier\nvo...  voting='soft')\nvoting_clf.fit(X_train, y_train)", 'y_pred_voting = voting_clf.predict(X_val)\naccuracy_score(y_val, y_pred)', "from sklearn.ensemble import VotingClassifier\nvo...  voting='hard')\nvoting_clf.fit(X_train, y_train)", ...], 'KNeighborsClassifier': <class 'sklearn.neighbors.classification.KNeighborsClassifier'>, 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, 'Out': {6: KNeighborsClassifier(algorithm='auto', leaf_size..._neighbors=3, p=2,
           weights='distance'), 7: RandomForestClassifier(bootstrap=True, class_wei...tate=42,
            verbose=0, warm_start=False), 8: LinearSVC(C=1.0, class_weight=None, dual=True, f...l2', random_state=42, tol=0.0001,
     verbose=0), 10: array([ 0.99460108,  0.99129913,  0.99279928]), 11: array([ 0.98470306,  0.98259826,  0.98229823]), 12: array([ 0.97960408,  0.9729973 ,  0.98109811]), 13: 0.0, 14: 0.1915, 15: 0.18379999999999999, 16: 0.1804, ...}, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'VotingClassifier': <class 'sklearn.ensemble.voting_classifier.VotingClassifier'>, 'X': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,...0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), 'X_new': array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
...
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]]), 'X_test': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,...0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), 'X_train': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0,...0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), ...}
   2863             finally:
   2864                 # Reset our crash handler in place
   2865                 sys.excepthook = old_excepthook
   2866         except SystemExit as e:

...........................................................................
/home/niall/Python-Learning/ML/Chapter7_Exercises/<ipython-input-107-50d79ef78099> in <module>()
      4     pred_knn = knn_clf.predict(image)
      5     pred_lin = lin_clf.predict(image)
      6     pred_forest = forest_clf.predict(image)
      7     tmp_preds = np.array([pred_knn.item(0), pred_lin.item(0), pred_forest.item(0)])
      8     
----> 9     pred_blender = knn_blender.predict(tmp_preds.reshape(-1,1))
     10     blender_preds.append(pred_blender)
     11     
     12 print('Finished')
     13 

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/sklearn/neighbors/classification.py in predict(self=KNeighborsClassifier(algorithm='auto', leaf_size..._neighbors=3, p=2,
           weights='distance'), X=array([[ 9.],
       [ 4.],
       [ 8.]]))
    140         y : array of shape [n_samples] or [n_samples, n_outputs]
    141             Class labels for each data sample.
    142         """
    143         X = check_array(X, accept_sparse='csr')
    144 
--> 145         neigh_dist, neigh_ind = self.kneighbors(X)
        neigh_dist = undefined
        neigh_ind = undefined
        self.kneighbors = <bound method KNeighborsMixin.kneighbors of KNei...neighbors=3, p=2,
           weights='distance')>
        X = array([[ 9.],
       [ 4.],
       [ 8.]])
    146 
    147         classes_ = self.classes_
    148         _y = self._y
    149         if not self.outputs_2d_:

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/sklearn/neighbors/base.py in kneighbors(self=KNeighborsClassifier(algorithm='auto', leaf_size..._neighbors=3, p=2,
           weights='distance'), X=array([[ 9.],
       [ 4.],
       [ 8.]]), n_neighbors=3, return_distance=True)
    376                     "%s does not work with sparse matrices. Densify the data, "
    377                     "or set algorithm='brute'" % self._fit_method)
    378             result = Parallel(n_jobs, backend='threading')(
    379                 delayed(self._tree.query, check_pickle=False)(
    380                     X[s], n_neighbors, return_distance)
--> 381                 for s in gen_even_slices(X.shape[0], n_jobs)
        X.shape = (3, 1)
        n_jobs = 4
    382             )
    383             if return_distance:
    384                 dist, neigh_ind = tuple(zip(*result))
    385                 result = np.vstack(dist), np.vstack(neigh_ind)

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=4), iterable=<generator object KNeighborsMixin.kneighbors.<locals>.<genexpr>>)
    763             if pre_dispatch == "all" or n_jobs == 1:
    764                 # The iterable was consumed all at once by the above for loop.
    765                 # No need to wait for async callbacks to trigger to
    766                 # consumption.
    767                 self._iterating = False
--> 768             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=4)>
    769             # Make sure that we get a last message telling us we are done
    770             elapsed_time = time.time() - self._start_time
    771             self._print('Done %3i out of %3i | elapsed: %s finished',
    772                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Fri Jun 23 18:55:47 2017
PID: 5427                                    Python 3.5.2: /usr/bin/python3
...........................................................................
/home/niall/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<built-in method query of sklearn.neighbors.kd_tree.KDTree object>, (array([[ 9.]]), 3, True), {})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <built-in method query of sklearn.neighbors.kd_tree.KDTree object>
        args = (array([[ 9.]]), 3, True)
        kwargs = {}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/niall/.local/lib/python3.5/site-packages/sklearn/neighbors/kd_tree.cpython-35m-x86_64-linux-gnu.so in sklearn.neighbors.kd_tree.BinaryTree.query (sklearn/neighbors/kd_tree.c:11328)()
   1289 
   1290 
   1291 
   1292 
   1293 
-> 1294 
   1295 
   1296 
   1297 
   1298 

ValueError: query data dimension must match training data dimension
___________________________________________________________________________

In [105]:
y_pred_blender = np.array(blender_preds)
accuracy_score(y_test, y_pred_blender)

AttributeError: 'float' object has no attribute 'item'

In [104]:
y_pred_blender

array([ 3.,  4.,  4., ...,  4.,  4.,  4.])

In [100]:
y_test

array([ 4.,  4.,  4., ...,  9.,  9.,  9.])