In [1]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [2]:
import pandas as pd
import numpy as np
import imp
import matplotlib.pyplot as plt
import xgboost
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import *
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
import seaborn as sns
%matplotlib inline
from HelperClass.DataProcessing import *

In [3]:
trainDataFile = 'Data/train.csv'
testDataFile = 'Data/test.csv'
dataTypes = {
    'PassengerId' : 'int64',
    'Survived' : 'int64',
    'Pclass' : 'int64',
    'Name' : 'object',
    'Sex' : 'object',
    'Age' : 'float64',
    'SibSp' : 'int64',
    'Parch' : 'int64',
    'Ticket' : 'object',
    'Fare' : 'float64',
    'Cabin' : 'object',
    'Embarked' : 'object'
}
sep = ','
target = 'Survived'
randomSeed = 83213
testRatio = 0.20
numCores = 10

In [4]:
dataProc = DataProcessing(trainDataFile, dataTypes, sep)
dataProc.ReadFile()
dataProc.AllData = dataProc.AllData[~dataProc.AllData.Embarked.isnull()]

submitDataProc = DataProcessing(testDataFile, dataTypes, sep)
submitDataProc.ReadFile()
submitDataProc.AllData = submitDataProc.AllData[~submitDataProc.AllData.Embarked.isnull()]

Findings
1. Age makes difference, F_onewayResult(statistic=4.271194933815904, pvalue=0.03912465401348333)
2. Fare makes difference, F_onewayResult(statistic=63.03076422804448, pvalue=6.120189341921873e-15)
3. PassengerId doesn't make difference, F_onewayResult(statistic=0.022284812266068058, pvalue=0.8813657768798144)
4. Pclass makes difference, F_onewayResult(statistic=115.03127218827665, pvalue=2.5370473879805644e-25)
5. SibSp doesn't make difference, F_onewayResult(statistic=1.110572204113227, pvalue=0.29224392869817906)
6. Parch makes difference, F_onewayResult(statistic=5.963463836603541, pvalue=0.0147992453747224)

In [5]:
def PopulateFeatures(data):
    data.Cabin.fillna('NA', inplace=True)
    data['CabinType'] = data.Cabin.fillna('NA').str.get(0)
    return data

In [6]:
dataProc.AllData = PopulateFeatures(dataProc.AllData)
submitDataProc.AllData = PopulateFeatures(submitDataProc.AllData)

In [7]:
dataProc.AllData['CabinType_B_D_E'] = np.where(dataProc.AllData.CabinType.isin(['B', 'D', 'E']), 2, 1)
submitDataProc.AllData['CabinType_B_D_E'] = np.where(submitDataProc.AllData.CabinType.isin(['B', 'D', 'E']), 2, 1)

In [8]:
dataProc.AllData.Age.fillna(-999, inplace=True)
submitDataProc.AllData.Age.fillna(-999, inplace=True)

In [9]:
bins = [-10000, 0, 20, 60, 1000]
dataProc.AllData['AgeGroup'] = pd.cut(dataProc.AllData.Age, bins).astype(str)
submitDataProc.AllData['AgeGroup'] = pd.cut(submitDataProc.AllData.Age, bins).astype(str)

In [10]:
dataProc.AllData['GenderAgeGroup'] = dataProc.AllData.Sex + '_' + dataProc.AllData.AgeGroup
submitDataProc.AllData['GenderAgeGroup'] = submitDataProc.AllData.Sex + '_' + submitDataProc.AllData.AgeGroup

In [11]:
dataProc.AllData.groupby(['GenderAgeGroup']).agg({'Survived' : ['mean', 'count']}).sort_values(by=('Survived', 'mean'), 
                                                                                               ascending=False)

Unnamed: 0_level_0,Survived,Survived
Unnamed: 0_level_1,mean,count
GenderAgeGroup,Unnamed: 1_level_2,Unnamed: 2_level_2
"female_(60, 1000]",1.0,2
"female_(20, 60]",0.777778,180
"female_(0, 20]",0.688312,77
"female_(-10000, 0]",0.679245,53
"male_(0, 20]",0.284314,102
"male_(20, 60]",0.186747,332
"male_(-10000, 0]",0.129032,124
"male_(60, 1000]",0.105263,19


In [12]:
genderAgeGroups = {
    'GenderAgeGroup' : {
        'female_(20, 60]' : 7, 
        'female_(60, 1000]' : 8,
        'female_(0, 20]' : 6, 
        'female_(-10000, 0]' : 5,
        'male_(0, 20]' : 4, 
        'male_(20, 60]' : 3, 
        'male_(-10000, 0]' : 2, 
        'male_(60, 1000]' : 1
    }
}

In [13]:
dataProc.AllData.replace(genderAgeGroups, inplace=True)
submitDataProc.AllData.replace(genderAgeGroups, inplace=True)

In [14]:
dataProc.AllData['Sex'] = np.where(dataProc.AllData['Sex'] == 'male', 1, 2)
submitDataProc.AllData['Sex'] = np.where(submitDataProc.AllData['Sex'] == 'male', 1, 2)

In [15]:
embarkedEncoding = {
    'Embarked' : {
       'S' : 1,
       'Q' : 1,
       'C' : 2
    }
}

In [16]:
dataProc.AllData.replace(embarkedEncoding, inplace=True)
submitDataProc.AllData.replace(embarkedEncoding, inplace=True)

In [17]:
numCols = ['Fare', 'Pclass', 'Parch', 'CabinType_B_D_E', 'GenderAgeGroup', 'Embarked', 'Sex']

ageFilledInData

dataProc.AllData['Ticket_Num_Ind'] = dataProc.AllData['Ticket'].str.isnumeric()

In [18]:
dataProc.PopulateFeatureColumns(numCols)
submitDataProc.PopulateFeatureColumns(numCols)

In [19]:
dataProc.PopulateFeatureAndLabel(target)
submitDataProc.X = submitDataProc.AllData[numCols]

In [20]:
dataProc.RandomSplitTrainTestData(testRatio, randomSeed, False)

In [21]:
averageFareThridClass = submitDataProc.AllData[(submitDataProc.AllData.Pclass == 3) & 
                                               (submitDataProc.AllData.CabinType == 'N')].Fare.mean()
submitDataProc.X.Fare.fillna(averageFareThridClass, inplace=True)

In [22]:
sortedCols = ['CabinType_B_D_E', 'Embarked', 'Fare', 'GenderAgeGroup', 'Parch', 'Pclass', 'Sex']

## Random forest model

In [23]:
preProc = ColumnTransformer([
    #('StdScaler', StandardScaler(), numCols),
    #('OneHot', OneHotEncoder(handle_unknown='ignore'), catCols)
])
rf = RandomForestClassifier(random_state = randomSeed)
rfpipeline = Pipeline([#('preProcessing', preProc),
                       #('ReduceDim', SelectKBest(f_classif)),
                       ('rf', rf)])
params = {
   # 'ReduceDim__k' : range(5, 11),
    'rf__n_estimators' : range(3, 20),
    'rf__max_depth' : range(1, 5),
    'rf__max_features' : ['sqrt', 'log2', None]
}
rfcv = RandomizedSearchCV(rfpipeline, n_iter= 1000, scoring = 'accuracy', 
                          param_distributions=params, verbose=1, cv = 10, n_jobs = numCores, random_state=randomSeed)

## XGB model

In [24]:
param_grid = {
    'xgb__max_depth' : [3, 4, 5, 6],
    'xgb__learning_rate' : [0.05, 0.1, 0.2],
    'xgb__n_estimators' : range(5, 21),
    'xgb__subsample' : [0.8, 0.9, 0.95],
    #'xgb__colsample_bytree' : [0.8, 0.9, 0.95],
    'xgb__reg_alpha' : [0.05, 0.1, 0.2, 0.4],
    'xgb__reg_lambda' : [0.05, 0.1, 0.2, 0.4],
    'xgb__gamma' : [0.0001, 0.001, 0.01, 0.1]
}

xgb = xgboost.XGBClassifier(seed=randomSeed)
xgbpipeline = Pipeline([#('preProcessing', preProc),
                        ('xgb', xgb)])
xgbcv = RandomizedSearchCV(xgbpipeline, n_iter = 500, cv=10, param_distributions =param_grid,
                        scoring='accuracy', verbose = 1, n_jobs = numCores, random_state=randomSeed)

## Model stacking

In [28]:
from mlens.ensemble import BlendEnsemble
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

estimators = [rfcv, xgbcv]
ensemble = BlendEnsemble()
ensemble.add(estimators, proba=True)   # Specify 'proba' here
ensemble.add_meta(LogisticRegression())

[MLENS] backend: threading


BlendEnsemble(array_check=None, backend=None,
       layers=[Layer(backend='threading', dtype=<class 'numpy.float32'>, n_jobs=-1,
   name='layer-1', propagate_features=None, raise_on_exception=True,
   random_state=None, shuffle=False,
   stack=[Group(backend='threading', dtype=<class 'numpy.float32'>,
   indexer=BlendIndex(X=None, raise_on_exception=...rer=None)],
   n_jobs=-1, name='group-1', raise_on_exception=True, transformers=[])],
   verbose=0)],
       model_selection=False, n_jobs=None, raise_on_exception=True,
       random_state=None, sample_size=20, scorer=None, shuffle=False,
       test_size=0.5, verbose=False)

In [30]:
ensemble.fit(dataProc.X_train.reindex(columns=sortedCols), dataProc.y_train)

Fitting 10 folds for each of 204 candidates, totalling 2040 fits
Fitting 10 folds for each of 204 candidates, totalling 2040 fits
Fitting 10 folds for each of 500 candidates, totalling 5000 fits
Fitting 10 folds for each of 500 candidates, totalling 5000 fits


[Parallel(n_jobs=10)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=10)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=10)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=10)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=10)]: Done 2040 out of 2040 | elapsed:  2.7min finished
[Parallel(n_jobs=10)]: Done 2040 out of 2040 | elapsed:  3.4min finished
[Parallel(n_jobs=10)]: Done 5000 out of 5000 | elapsed:  3.9min finished
[Parallel(n_jobs=10)]: Done 5000 out of 5000 | elapsed:  3.9min finished
[Parallel(n_jobs=10)]: Done 2040 out of 2040 | elapsed:  3.9min finished
[Parallel(n_jobs=10)]: Done 5000 out of 5000 | elapsed:  4.1min finished
Exception ignored in: <bound method DMatrix.__del__ of <xgboost.core.DMatrix object at 0x000002A32C5610B8>>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\xgboost\core.py", lin

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...wGPU\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\runpy.py in _run_code(code=<code object <module> at 0x000002A3233646F0, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\...ges\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...wGPU\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\ProgramD...PU\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...wGPU\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x000002A3233646F0, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\...ges\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...wGPU\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\ProgramD...PU\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    500         if self.poller is not None:
    501             self.poller.start()
    502         self.kernel.start()
    503         self.io_loop = ioloop.IOLoop.current()
    504         try:
--> 505             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    506         except KeyboardInterrupt:
    507             pass
    508 
    509 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\tornado\platform\asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    143         except (RuntimeError, AssertionError):
    144             old_loop = None  # type: ignore
    145         try:
    146             self._setup_logging()
    147             asyncio.set_event_loop(self.asyncio_loop)
--> 148             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Win...EventLoop running=True closed=False debug=False>>
    149         finally:
    150             asyncio.set_event_loop(old_loop)
    151 
    152     def stop(self) -> None:

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\asyncio\base_events.py in run_forever(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
    422             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    423                                    finalizer=self._asyncgen_finalizer_hook)
    424         try:
    425             events._set_running_loop(self)
    426             while True:
--> 427                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_Windo...EventLoop running=True closed=False debug=False>>
    428                 if self._stopping:
    429                     break
    430         finally:
    431             self._stopping = False

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\asyncio\base_events.py in _run_once(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
   1435                         logger.warning('Executing %s took %.3f seconds',
   1436                                        _format_handle(handle), dt)
   1437                 finally:
   1438                     self._current_handle = None
   1439             else:
-> 1440                 handle._run()
        handle._run = <bound method Handle._run of <Handle IOLoop.add_...flowGPU\lib\site-packages\tornado\ioloop.py:690>>
   1441         handle = None  # Needed to break cycles when an exception occurs.
   1442 
   1443     def _set_coroutine_wrapper(self, enabled):
   1444         try:

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\asyncio\events.py in _run(self=<Handle IOLoop.add_future.<locals>.<lambda>(<Fut...rflowGPU\lib\site-packages\tornado\ioloop.py:690>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <function IOLoop.add_future.<locals>.<lambda>>
        self._args = (<Future finished result=(10, 32, <bound method.....61D31B8>, <zmq.sugar.fr...002A3261D3270>, ...]))>,)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\tornado\ioloop.py in <lambda>(f=<Future finished result=(10, 32, <bound method.....61D31B8>, <zmq.sugar.fr...002A3261D3270>, ...]))>)
    685             #
    686             # Wrap the callback in self._run_callback so we control
    687             # the error logging (i.e. it goes to tornado.log.app_log
    688             # instead of asyncio's log).
    689             future.add_done_callback(
--> 690                 lambda f: self._run_callback(functools.partial(callback, future))
        f = <Future finished result=(10, 32, <bound method.....61D31B8>, <zmq.sugar.fr...002A3261D3270>, ...]))>
    691             )
    692         else:
    693             assert is_future(future)
    694             # For concurrent futures, we use self.add_callback, so

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\tornado\ioloop.py in _run_callback(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, callback=functools.partial(<function Runner.handle_yield....1D31B8>, <zmq.sugar.fr...002A3261D3270>, ...]))>))
    738         .. versionchanged:: 6.0
    739 
    740            CancelledErrors are no longer logged.
    741         """
    742         try:
--> 743             ret = callback()
        ret = undefined
        callback = functools.partial(<function Runner.handle_yield....1D31B8>, <zmq.sugar.fr...002A3261D3270>, ...]))>)
    744             if ret is not None:
    745                 from tornado import gen
    746 
    747                 # Functions that return Futures typically swallow all

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\tornado\gen.py in inner(f=None)
    776         elif not self.future.done():
    777 
    778             def inner(f: Any) -> None:
    779                 # Break a reference cycle to speed GC.
    780                 f = None  # noqa: F841
--> 781                 self.run()
    782 
    783             self.io_loop.add_future(self.future, inner)
    784             return False
    785         return True

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\tornado\gen.py in run(self=<tornado.gen.Runner object>)
    737                         finally:
    738                             # Break up a reference to itself
    739                             # for faster GC on CPython.
    740                             exc_info = None
    741                     else:
--> 742                         yielded = self.gen.send(value)
        yielded = undefined
        self.gen.send = <built-in method send of generator object>
        value = (10, 32, <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object>>, (<zmq.eventloop.zmqstream.ZMQStream object>, [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]))
    743 
    744                 except (StopIteration, Return) as e:
    745                     self.finished = True
    746                     self.future = _null_future

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel\kernelbase.py in process_one(self=<ipykernel.ipkernel.IPythonKernel object>, wait=True)
    352         else:
    353             try:
    354                 priority, t, dispatch, args = self.msg_queue.get_nowait()
    355             except QueueEmpty:
    356                 return None
--> 357         yield gen.maybe_future(dispatch(*args))
        dispatch = <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object>>
        args = (<zmq.eventloop.zmqstream.ZMQStream object>, [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    358 
    359     @gen.coroutine
    360     def dispatch_queue(self):
    361         """Coroutine to preserve order of message handling

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\tornado\gen.py in wrapper(*args=(<ipykernel.ipkernel.IPythonKernel object>, <zmq.eventloop.zmqstream.ZMQStream object>, [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]), **kwargs={})
    204                 # avoid the cost of creating a Runner when the coroutine
    205                 # never actually yields, which in turn allows us to
    206                 # use "optional" coroutines in critical path code without
    207                 # performance penalty for the synchronous case.
    208                 try:
--> 209                     yielded = next(result)
        yielded = undefined
        result = <generator object dispatch_shell>
    210                 except (StopIteration, Return) as e:
    211                     future_set_result_unless_cancelled(
    212                         future, _value_from_stopiteration(e)
    213                     )

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': 'ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 6, 22, 3, 47, 11, 377019, tzinfo=tzutc()), 'msg_id': '701f8c05ac084fb79eae078ffde3f248', 'msg_type': 'execute_request', 'session': '19ee29c83ceb4c8c9957b181ff677507', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '701f8c05ac084fb79eae078ffde3f248', 'msg_type': 'execute_request', 'parent_header': {}})
    262             try:
    263                 self.pre_handler_hook()
    264             except Exception:
    265                 self.log.debug("Unable to signal in pre_handler_hook:", exc_info=True)
    266             try:
--> 267                 yield gen.maybe_future(handler(stream, idents, msg))
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'19ee29c83ceb4c8c9957b181ff677507']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': 'ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 6, 22, 3, 47, 11, 377019, tzinfo=tzutc()), 'msg_id': '701f8c05ac084fb79eae078ffde3f248', 'msg_type': 'execute_request', 'session': '19ee29c83ceb4c8c9957b181ff677507', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '701f8c05ac084fb79eae078ffde3f248', 'msg_type': 'execute_request', 'parent_header': {}}
    268             except Exception:
    269                 self.log.error("Exception in message handler:", exc_info=True)
    270             finally:
    271                 try:

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\tornado\gen.py in wrapper(*args=(<ipykernel.ipkernel.IPythonKernel object>, <zmq.eventloop.zmqstream.ZMQStream object>, [b'19ee29c83ceb4c8c9957b181ff677507'], {'buffers': [], 'content': {'allow_stdin': True, 'code': 'ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 6, 22, 3, 47, 11, 377019, tzinfo=tzutc()), 'msg_id': '701f8c05ac084fb79eae078ffde3f248', 'msg_type': 'execute_request', 'session': '19ee29c83ceb4c8c9957b181ff677507', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '701f8c05ac084fb79eae078ffde3f248', 'msg_type': 'execute_request', 'parent_header': {}}), **kwargs={})
    204                 # avoid the cost of creating a Runner when the coroutine
    205                 # never actually yields, which in turn allows us to
    206                 # use "optional" coroutines in critical path code without
    207                 # performance penalty for the synchronous case.
    208                 try:
--> 209                     yielded = next(result)
        yielded = undefined
        result = <generator object execute_request>
    210                 except (StopIteration, Return) as e:
    211                     future_set_result_unless_cancelled(
    212                         future, _value_from_stopiteration(e)
    213                     )

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'19ee29c83ceb4c8c9957b181ff677507'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': 'ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 6, 22, 3, 47, 11, 377019, tzinfo=tzutc()), 'msg_id': '701f8c05ac084fb79eae078ffde3f248', 'msg_type': 'execute_request', 'session': '19ee29c83ceb4c8c9957b181ff677507', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '701f8c05ac084fb79eae078ffde3f248', 'msg_type': 'execute_request', 'parent_header': {}})
    529             self._publish_execute_input(code, parent, self.execution_count)
    530 
    531         reply_content = yield gen.maybe_future(
    532             self.do_execute(
    533                 code, silent, store_history,
--> 534                 user_expressions, allow_stdin,
        user_expressions = {}
        allow_stdin = True
    535             )
    536         )
    537 
    538         # Flush output before sending the reply.

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\tornado\gen.py in wrapper(*args=(<ipykernel.ipkernel.IPythonKernel object>, 'ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', False, True, {}, True), **kwargs={})
    204                 # avoid the cost of creating a Runner when the coroutine
    205                 # never actually yields, which in turn allows us to
    206                 # use "optional" coroutines in critical path code without
    207                 # performance penalty for the synchronous case.
    208                 try:
--> 209                     yielded = next(result)
        yielded = undefined
        result = <generator object do_execute>
    210                 except (StopIteration, Return) as e:
    211                     future_set_result_unless_cancelled(
    212                         future, _value_from_stopiteration(e)
    213                     )

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    289                     res = yield coro_future
    290             else:
    291                 # runner isn't already running,
    292                 # make synchronous call,
    293                 # letting shell dispatch to loop runners
--> 294                 res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        code = 'ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))'
        store_history = True
        silent = False
    295         finally:
    296             self._restore_input()
    297 
    298         if res.error_before_exec is not None:

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))',), **kwargs={'silent': False, 'store_history': True})
    531             )
    532         self.payload_manager.write_payload(payload)
    533 
    534     def run_cell(self, *args, **kwargs):
    535         self._last_traceback = None
--> 536         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))',)
        kwargs = {'silent': False, 'store_history': True}
    537 
    538     def _showtraceback(self, etype, evalue, stb):
    539         # try to preserve ordering of tracebacks and print statements
    540         sys.stdout.flush()

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', store_history=True, silent=False, shell_futures=True)
   2843         result : :class:`ExecutionResult`
   2844         """
   2845         result = None
   2846         try:
   2847             result = self._run_cell(
-> 2848                 raw_cell, store_history, silent, shell_futures)
        raw_cell = 'ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))'
        store_history = True
        silent = False
        shell_futures = True
   2849         finally:
   2850             self.events.trigger('post_execute')
   2851             if not silent:
   2852                 self.events.trigger('post_run_cell', result)

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\IPython\core\interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', store_history=True, silent=False, shell_futures=True)
   2869             runner = self.loop_runner
   2870         else:
   2871             runner = _pseudo_sync_runner
   2872 
   2873         try:
-> 2874             return runner(coro)
        runner = <function _pseudo_sync_runner>
        coro = <generator object InteractiveShell.run_cell_async>
   2875         except BaseException as e:
   2876             info = ExecutionInfo(raw_cell, store_history, silent, shell_futures)
   2877             result = ExecutionResult(info)
   2878             result.error_in_exec = e

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\IPython\core\async_helpers.py in _pseudo_sync_runner(coro=<generator object InteractiveShell.run_cell_async>)
     62 
     63     Credit to Nathaniel Smith
     64 
     65     """
     66     try:
---> 67         coro.send(None)
        coro.send = <built-in method send of generator object>
     68     except StopIteration as exc:
     69         return exc.value
     70     else:
     71         # TODO: do not raise but return an execution result with the right info.

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\IPython\core\interactiveshell.py in run_cell_async(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='ensemble.fit(dataProc.X_train.reindex(columns=so...acy_score(dataProc.y_test, y_test_stacking_pred))', store_history=True, silent=False, shell_futures=True)
   3044                 interactivity = "none" if silent else self.ast_node_interactivity
   3045                 if _run_async:
   3046                     interactivity = 'async'
   3047 
   3048                 has_raised = yield from self.run_ast_nodes(code_ast.body, cell_name,
-> 3049                        interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   3050 
   3051                 self.last_execution_succeeded = not has_raised
   3052                 self.last_execution_result = result
   3053 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Expr object>, <_ast.Assign object>, <_ast.Expr object>], cell_name='<ipython-input-30-9587da1d2955>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 2a32b141518, executio...rue silent=False shell_futures=True> result=None>)
   3209                     return True
   3210             else:
   3211                 for i, node in enumerate(to_run_exec):
   3212                     mod = Module([node], [])
   3213                     code = compiler(mod, cell_name, "exec")
-> 3214                     if (yield from self.run_code(code, result)):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x000002A32B507D20, file "<ipython-input-30-9587da1d2955>", line 2>
        result = <ExecutionResult object at 2a32b141518, executio...rue silent=False shell_futures=True> result=None>
   3215                         return True
   3216 
   3217                 for i, node in enumerate(to_run_interactive):
   3218                     mod = ast.Interactive([node])

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x000002A32B507D20, file "<ipython-input-30-9587da1d2955>", line 2>, result=<ExecutionResult object at 2a32b141518, executio...rue silent=False shell_futures=True> result=None>, async_=False)
   3291                 if async_:
   3292                     last_expr = (yield from self._async_exec(code_obj, self.user_ns))
   3293                     code = compile('last_expr', 'fake', "single")
   3294                     exec(code, {'last_expr': last_expr})
   3295                 else:
-> 3296                     exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x000002A32B507D20, file "<ipython-input-30-9587da1d2955>", line 2>
        self.user_global_ns = {'BlendEnsemble': <class 'mlens.ensemble.blend.BlendEnsemble'>, 'ColumnTransformer': <class 'sklearn.compose._column_transformer.ColumnTransformer'>, 'DataProcessing': <class 'HelperClass.DataProcessing.DataProcessing'>, 'GenericUnivariateSelect': <class 'sklearn.feature_selection.univariate_selection.GenericUnivariateSelect'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'def warn(*args, **kwargs):\n    pass\nimport warnings\nwarnings.warn = warn', "import pandas as pd\nimport numpy as np\nimport im...inline')\nfrom HelperClass.DataProcessing import *", "trainDataFile = 'Data/train.csv'\ntestDataFile = ...randomSeed = 83213\ntestRatio = 0.20\nnumCores = 10", 'dataProc = DataProcessing(trainDataFile, dataTyp...llData[~submitDataProc.AllData.Embarked.isnull()]', "def PopulateFeatures(data):\n    data.Cabin.filln...ata.Cabin.fillna('NA').str.get(0)\n    return data", 'dataProc.AllData = PopulateFeatures(dataProc.All...llData = PopulateFeatures(submitDataProc.AllData)', "dataProc.AllData['CabinType_B_D_E'] = np.where(d...oc.AllData.CabinType.isin(['B', 'D', 'E']), 2, 1)", 'dataProc.AllData.Age.fillna(-999, inplace=True)\n...itDataProc.AllData.Age.fillna(-999, inplace=True)', 'bins = [-10000, 0, 20, 60, 1000]\ndataProc.AllDat...cut(submitDataProc.AllData.Age, bins).astype(str)', "dataProc.AllData['GenderAgeGroup'] = dataProc.Al...lData.Sex + '_' + submitDataProc.AllData.AgeGroup", "dataProc.AllData.groupby(['GenderAgeGroup']).agg...                                 ascending=False)", "genderAgeGroups = {\n    'GenderAgeGroup' : {\n   ..., 0]' : 2, \n        'male_(60, 1000]' : 1\n    }\n}", 'dataProc.AllData.replace(genderAgeGroups, inplac...oc.AllData.replace(genderAgeGroups, inplace=True)', "dataProc.AllData['Sex'] = np.where(dataProc.AllD...re(submitDataProc.AllData['Sex'] == 'male', 1, 2)", "embarkedEncoding = {\n    'Embarked' : {\n       'S' : 1,\n       'Q' : 1,\n       'C' : 2\n    }\n}", 'dataProc.AllData.replace(embarkedEncoding, inpla...c.AllData.replace(embarkedEncoding, inplace=True)', "numCols = ['Fare', 'Pclass', 'Parch', 'CabinType_B_D_E', 'GenderAgeGroup', 'Embarked', 'Sex']", 'dataProc.PopulateFeatureColumns(numCols)\nsubmitDataProc.PopulateFeatureColumns(numCols)', 'dataProc.PopulateFeatureAndLabel(target)\nsubmitDataProc.X = submitDataProc.AllData[numCols]', ...], 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'OneHotEncoder': <class 'sklearn.preprocessing._encoders.OneHotEncoder'>, 'Out': {11:                     Survived      
             ...0.129032   124
male_(60, 1000]     0.105263    19, 25:      CabinType_B_D_E  Embarked      Fare  Gender...    3      0       2    1

[711 rows x 7 columns], 26: array(['CabinType_B_D_E', 'Embarked', 'Fare', 'G..., 'Parch',
       'Pclass', 'Sex'], dtype=object), 27:      CabinType_B_D_E  Embarked      Fare  Gender...    2      1       3    1

[418 rows x 7 columns], 28: BlendEnsemble(array_check=None, backend=None,
  ...uffle=False,
       test_size=0.5, verbose=False)}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, ...}
        self.user_ns = {'BlendEnsemble': <class 'mlens.ensemble.blend.BlendEnsemble'>, 'ColumnTransformer': <class 'sklearn.compose._column_transformer.ColumnTransformer'>, 'DataProcessing': <class 'HelperClass.DataProcessing.DataProcessing'>, 'GenericUnivariateSelect': <class 'sklearn.feature_selection.univariate_selection.GenericUnivariateSelect'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'def warn(*args, **kwargs):\n    pass\nimport warnings\nwarnings.warn = warn', "import pandas as pd\nimport numpy as np\nimport im...inline')\nfrom HelperClass.DataProcessing import *", "trainDataFile = 'Data/train.csv'\ntestDataFile = ...randomSeed = 83213\ntestRatio = 0.20\nnumCores = 10", 'dataProc = DataProcessing(trainDataFile, dataTyp...llData[~submitDataProc.AllData.Embarked.isnull()]', "def PopulateFeatures(data):\n    data.Cabin.filln...ata.Cabin.fillna('NA').str.get(0)\n    return data", 'dataProc.AllData = PopulateFeatures(dataProc.All...llData = PopulateFeatures(submitDataProc.AllData)', "dataProc.AllData['CabinType_B_D_E'] = np.where(d...oc.AllData.CabinType.isin(['B', 'D', 'E']), 2, 1)", 'dataProc.AllData.Age.fillna(-999, inplace=True)\n...itDataProc.AllData.Age.fillna(-999, inplace=True)', 'bins = [-10000, 0, 20, 60, 1000]\ndataProc.AllDat...cut(submitDataProc.AllData.Age, bins).astype(str)', "dataProc.AllData['GenderAgeGroup'] = dataProc.Al...lData.Sex + '_' + submitDataProc.AllData.AgeGroup", "dataProc.AllData.groupby(['GenderAgeGroup']).agg...                                 ascending=False)", "genderAgeGroups = {\n    'GenderAgeGroup' : {\n   ..., 0]' : 2, \n        'male_(60, 1000]' : 1\n    }\n}", 'dataProc.AllData.replace(genderAgeGroups, inplac...oc.AllData.replace(genderAgeGroups, inplace=True)', "dataProc.AllData['Sex'] = np.where(dataProc.AllD...re(submitDataProc.AllData['Sex'] == 'male', 1, 2)", "embarkedEncoding = {\n    'Embarked' : {\n       'S' : 1,\n       'Q' : 1,\n       'C' : 2\n    }\n}", 'dataProc.AllData.replace(embarkedEncoding, inpla...c.AllData.replace(embarkedEncoding, inplace=True)', "numCols = ['Fare', 'Pclass', 'Parch', 'CabinType_B_D_E', 'GenderAgeGroup', 'Embarked', 'Sex']", 'dataProc.PopulateFeatureColumns(numCols)\nsubmitDataProc.PopulateFeatureColumns(numCols)', 'dataProc.PopulateFeatureAndLabel(target)\nsubmitDataProc.X = submitDataProc.AllData[numCols]', ...], 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'OneHotEncoder': <class 'sklearn.preprocessing._encoders.OneHotEncoder'>, 'Out': {11:                     Survived      
             ...0.129032   124
male_(60, 1000]     0.105263    19, 25:      CabinType_B_D_E  Embarked      Fare  Gender...    3      0       2    1

[711 rows x 7 columns], 26: array(['CabinType_B_D_E', 'Embarked', 'Fare', 'G..., 'Parch',
       'Pclass', 'Sex'], dtype=object), 27:      CabinType_B_D_E  Embarked      Fare  Gender...    2      1       3    1

[418 rows x 7 columns], 28: BlendEnsemble(array_check=None, backend=None,
  ...uffle=False,
       test_size=0.5, verbose=False)}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, ...}
   3297             finally:
   3298                 # Reset our crash handler in place
   3299                 sys.excepthook = old_excepthook
   3300         except SystemExit as e:

...........................................................................
C:\WIP\Titanic\<ipython-input-30-9587da1d2955> in <module>()
      1 ensemble.fit(dataProc.X_train.reindex(columns=sortedCols), dataProc.y_train)
----> 2 y_test_stacking_pred = ensemble.predict(dataProc.X_test)
      3 print('Stacking Random Forest and XGB %.3f ' % accuracy_score(dataProc.y_test, y_test_stacking_pred))

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\ensemble\base.py in predict(self=BlendEnsemble(array_check=None, backend=None,
  ...uffle=False,
       test_size=0.5, verbose=False), X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns], **kwargs={})
    605             predictions for provided input array.
    606         """
    607         if not check_ensemble_build(self._backend):
    608             # No layers instantiated, but raise_on_exception is False
    609             return
--> 610         return self._backend.predict(X, **kwargs)
        self._backend.predict = <bound method Sequential.predict of Sequential(b...rmers=[])],
   verbose=0)],
      verbose=False)>
        X =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
        kwargs = {}
    611 
    612     def predict_proba(self, X, **kwargs):
    613         """Predict class probabilities with fitted ensemble.
    614 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\ensemble\base.py in predict(self=Sequential(backend='threading', dtype=<class 'nu...ormers=[])],
   verbose=0)],
      verbose=False), X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns], **kwargs={})
    201         if not self.__fitted__:
    202             NotFittedError("Instance not fitted.")
    203 
    204         f, t0 = print_job(self, "Predicting")
    205 
--> 206         out = self._predict(X, 'predict', **kwargs)
        out = undefined
        self._predict = <bound method Sequential._predict of Sequential(...rmers=[])],
   verbose=0)],
      verbose=False)>
        X =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
        kwargs = {}
    207 
    208         if self.verbose:
    209             print_time(t0, "{:<35}".format("Predict complete"),
    210                        file=f, flush=True)

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\ensemble\base.py in _predict(self=Sequential(backend='threading', dtype=<class 'nu...ormers=[])],
   verbose=0)],
      verbose=False), X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns], job='predict', **kwargs={})
    261             data.
    262         """
    263         r = kwargs.pop('return_preds', True)
    264         with ParallelProcessing(self.backend, self.n_jobs,
    265                                 max(self.verbose - 4, 0)) as manager:
--> 266             out = manager.stack(self, job, X, return_preds=r, **kwargs)
        out = undefined
        manager.stack = <bound method ParallelProcessing.stack of <mlens.parallel.backend.ParallelProcessing object>>
        self = Sequential(backend='threading', dtype=<class 'nu...ormers=[])],
   verbose=0)],
      verbose=False)
        job = 'predict'
        X =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
        r = True
        kwargs = {}
    267 
    268         if not isinstance(out, list):
    269             out = [out]
    270         out = [p.squeeze() for p in out]

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\parallel\backend.py in stack(self=<mlens.parallel.backend.ParallelProcessing object>, caller=Sequential(backend='threading', dtype=<class 'nu...ormers=[])],
   verbose=0)],
      verbose=False), job='predict', X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns], y=None, path=None, return_preds=True, warm_start=False, split=True, **kwargs={})
    668             Prediction array(s).
    669         """
    670         out = self.initialize(
    671             job=job, X=X, y=y, path=path, warm_start=warm_start,
    672             return_preds=return_preds, split=split, stack=True)
--> 673         return self.process(caller=caller, out=out, **kwargs)
        self.process = <bound method ParallelProcessing.process of <mlens.parallel.backend.ParallelProcessing object>>
        caller = Sequential(backend='threading', dtype=<class 'nu...ormers=[])],
   verbose=0)],
      verbose=False)
        out = {}
        kwargs = {}
    674 
    675     def process(self, caller, out, **kwargs):
    676         """Process job.
    677 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\parallel\backend.py in process(self=<mlens.parallel.backend.ParallelProcessing object>, caller=Sequential(backend='threading', dtype=<class 'nu...ormers=[])],
   verbose=0)],
      verbose=False), out=None, **kwargs={})
    713                       backend=self.backend) as parallel:
    714 
    715             for task in caller:
    716                 self.job.clear()
    717 
--> 718                 self._partial_process(task, parallel, **kwargs)
        self._partial_process = <bound method ParallelProcessing._partial_proces...lens.parallel.backend.ParallelProcessing object>>
        task = Layer(backend='threading', dtype=<class 'numpy.f..._exception=True, transformers=[])],
   verbose=0)
        parallel = Parallel(n_jobs=-1)
        kwargs = {}
    719 
    720                 if task.name in return_names:
    721                     out.append(self.get_preds(dtype=_dtype(task)))
    722 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\parallel\backend.py in _partial_process(self=<mlens.parallel.backend.ParallelProcessing object>, task=Layer(backend='threading', dtype=<class 'numpy.f..._exception=True, transformers=[])],
   verbose=0), parallel=Parallel(n_jobs=-1), **kwargs={})
    734         task.setup(self.job.predict_in, self.job.targets, self.job.job)
    735 
    736         if not task.__no_output__:
    737             self._gen_prediction_array(task, self.job.job, self.__threading__)
    738 
--> 739         task(self.job.args(**kwargs), parallel=parallel)
        task = Layer(backend='threading', dtype=<class 'numpy.f..._exception=True, transformers=[])],
   verbose=0)
        self.job.args = <bound method Job.args of <mlens.parallel.backend.Job object>>
        kwargs = {}
        parallel = Parallel(n_jobs=-1)
    740 
    741         if not task.__no_output__ and getattr(task, 'n_feature_prop', 0):
    742             self._propagate_features(task)
    743 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\parallel\layer.py in __call__(self=Layer(backend='threading', dtype=<class 'numpy.f..._exception=True, transformers=[])],
   verbose=0), args={'auxiliary': {'P': None, 'X':        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]}, 'dir': [], 'job': 'predict', 'main': {'P': array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]...0., 0.],
       [0., 0., 0., 0.]], dtype=float32), 'X':        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]}}, parallel=Parallel(n_jobs=-1))
    147         if self.verbose >= 2:
    148             safe_print(msg.format('Learners ...'), file=f, end=e2)
    149             t1 = time()
    150 
    151         parallel(delayed(sublearner, not _threading)()
--> 152                  for learner in self.learners
        self.learners = [Learner(attr='predict_proba', backend='threading...a=True,
    raise_on_exception=True, scorer=None), Learner(attr='predict_proba', backend='threading...a=True,
    raise_on_exception=True, scorer=None)]
    153                  for sublearner in learner(args, 'main'))
    154 
    155         if self.verbose >= 2:
    156             print_time(t1, 'done', file=f)

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object Layer.__call__.<locals>.<genexpr>>)
    788             if pre_dispatch == "all" or n_jobs == 1:
    789                 # The iterable was consumed all at once by the above for loop.
    790                 # No need to wait for async callbacks to trigger to
    791                 # consumption.
    792                 self._iterating = False
--> 793             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    794             # Make sure that we get a last message telling us we are done
    795             elapsed_time = time.time() - self._start_time
    796             self._print('Done %3i out of %3i | elapsed: %s finished',
    797                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Fri Jun 21 22:51:20 2019
PID: 8636Python 3.6.7: C:\ProgramData\Anaconda3\envs\tensorflowGPU\python.exe
...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\externals\joblib\parallel.py in __call__(self=<mlens.externals.joblib.parallel.BatchedCalls object>)
    130     def __init__(self, iterator_slice):
    131         self.items = list(iterator_slice)
    132         self._size = len(self.items)
    133 
    134     def __call__(self):
--> 135         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<mlens.parallel.learner.SubLearner object>, (), {})]
    136 
    137     def __len__(self):
    138         return self._size
    139 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    130     def __init__(self, iterator_slice):
    131         self.items = list(iterator_slice)
    132         self._size = len(self.items)
    133 
    134     def __call__(self):
--> 135         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <mlens.parallel.learner.SubLearner object>
        args = ()
        kwargs = {}
    136 
    137     def __len__(self):
    138         return self._size
    139 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\parallel\learner.py in __call__(self=<mlens.parallel.learner.SubLearner object>)
    119         else:
    120             self.processing_index = ''
    121 
    122     def __call__(self):
    123         """Launch job"""
--> 124         return getattr(self, self.job)()
        self = <mlens.parallel.learner.SubLearner object>
        self.job = 'predict'
    125 
    126     def fit(self, path=None):
    127         """Fit sub-learner"""
    128         if path is None:

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\parallel\learner.py in predict(self=<mlens.parallel.learner.SubLearner object>, path=[])
    154         if path is None:
    155             path = self.path
    156         t0 = time()
    157         transformers = self._load_preprocess(path)
    158 
--> 159         self._predict(transformers, False)
        self._predict = <bound method SubLearner._predict of <mlens.parallel.learner.SubLearner object>>
        transformers = None
    160         if self.verbose:
    161             msg = "{:<30} {}".format(self.name_index, "done")
    162             f = "stdout" if self.verbose < 10 - 3 else "stderr"
    163             print_time(t0, msg, file=f)

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\mlens\parallel\learner.py in _predict(self=<mlens.parallel.learner.SubLearner object>, transformers=None, score_preds=False)
    194         xtemp, ytemp = slice_array(self.in_array, self.targets, self.out_index)
    195         t0 = time()
    196 
    197         if transformers:
    198             xtemp, ytemp = transformers.transform(xtemp, ytemp)
--> 199         predictions = getattr(self.estimator, self.attr)(xtemp)
        predictions = undefined
        self.estimator = RandomizedSearchCV(cv=10, error_score='raise-dep...rain_score='warn', scoring='accuracy', verbose=1)
        self.attr = 'predict_proba'
        xtemp =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
    200 
    201         self.pred_time_ = time() - t0
    202 
    203         # Assign predictions to matrix

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\sklearn\utils\metaestimators.py in <lambda>(*args=(       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns],), **kwargs={})
    113                     break
    114             else:
    115                 attrgetter(self.delegate_names[-1])(obj)
    116 
    117         # lambda, but not partial, allows help() to work with update_wrapper
--> 118         out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
        args = (       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns],)
        kwargs = {}
    119         # update the docstring of the returned function
    120         update_wrapper(out, self.fn)
    121         return out
    122 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\sklearn\model_selection\_search.py in predict_proba(self=RandomizedSearchCV(cv=10, error_score='raise-dep...rain_score='warn', scoring='accuracy', verbose=1), X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns])
    503             Must fulfill the input assumptions of the
    504             underlying estimator.
    505 
    506         """
    507         self._check_is_fitted('predict_proba')
--> 508         return self.best_estimator_.predict_proba(X)
        self.best_estimator_.predict_proba = <function Pipeline.predict_proba>
        X =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
    509 
    510     @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))
    511     def predict_log_proba(self, X):
    512         """Call predict_log_proba on the estimator with the best found parameters.

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\sklearn\utils\metaestimators.py in <lambda>(*args=(       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns],), **kwargs={})
    113                     break
    114             else:
    115                 attrgetter(self.delegate_names[-1])(obj)
    116 
    117         # lambda, but not partial, allows help() to work with update_wrapper
--> 118         out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
        args = (       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns],)
        kwargs = {}
    119         # update the docstring of the returned function
    120         update_wrapper(out, self.fn)
    121         return out
    122 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\sklearn\pipeline.py in predict_proba(self=Pipeline(memory=None,
     steps=[('rf', RandomF...3213, verbose=0,
            warm_start=False))]), X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns])
    377         """
    378         Xt = X
    379         for name, transform in self.steps[:-1]:
    380             if transform is not None:
    381                 Xt = transform.transform(Xt)
--> 382         return self.steps[-1][-1].predict_proba(Xt)
        self.steps.predict_proba = undefined
        Xt =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
    383 
    384     @if_delegate_has_method(delegate='_final_estimator')
    385     def decision_function(self, X):
    386         """Apply transforms, and decision_function of the final estimator

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\sklearn\ensemble\forest.py in predict_proba(self=RandomForestClassifier(bootstrap=True, class_wei...e=83213, verbose=0,
            warm_start=False), X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns])
    578             The class probabilities of the input samples. The order of the
    579             classes corresponds to that in the attribute `classes_`.
    580         """
    581         check_is_fitted(self, 'estimators_')
    582         # Check data
--> 583         X = self._validate_X_predict(X)
        X =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
        self._validate_X_predict = <bound method BaseForest._validate_X_predict of ...=83213, verbose=0,
            warm_start=False)>
    584 
    585         # Assign chunk of trees to jobs
    586         n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)
    587 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\sklearn\ensemble\forest.py in _validate_X_predict(self=RandomForestClassifier(bootstrap=True, class_wei...e=83213, verbose=0,
            warm_start=False), X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns])
    357         """Validate X whenever one tries to predict, apply, predict_proba"""
    358         if self.estimators_ is None or len(self.estimators_) == 0:
    359             raise NotFittedError("Estimator not fitted, "
    360                                  "call `fit` before exploiting the model.")
    361 
--> 362         return self.estimators_[0]._validate_X_predict(X, check_input=True)
        self.estimators_._validate_X_predict = undefined
        X =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
    363 
    364     @property
    365     def feature_importances_(self):
    366         """Return the feature importances (the higher, the more important the

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\sklearn\tree\tree.py in _validate_X_predict(self=DecisionTreeClassifier(class_weight=None, criter...         random_state=516035225, splitter='best'), X=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns], check_input=True)
    372         return self
    373 
    374     def _validate_X_predict(self, X, check_input):
    375         """Validate X whenever one tries to predict, apply, predict_proba"""
    376         if check_input:
--> 377             X = check_array(X, dtype=DTYPE, accept_sparse="csr")
        X =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
    378             if issparse(X) and (X.indices.dtype != np.intc or
    379                                 X.indptr.dtype != np.intc):
    380                 raise ValueError("No support for np.int64 index based "
    381                                  "sparse matrices")

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\sklearn\utils\validation.py in check_array(array=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns], accept_sparse='csr', accept_large_sparse=True, dtype=<class 'numpy.float32'>, order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, ensure_min_samples=1, ensure_min_features=1, warn_on_dtype=False, estimator=None)
    522         # thereby passing the test made in the lines following the scope
    523         # of warnings context manager.
    524         with warnings.catch_warnings():
    525             try:
    526                 warnings.simplefilter('error', ComplexWarning)
--> 527                 array = np.asarray(array, dtype=dtype, order=order)
        array =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
        dtype = <class 'numpy.float32'>
        order = None
    528             except ComplexWarning:
    529                 raise ValueError("Complex data not supported\n"
    530                                  "{}\n".format(array))
    531 

...........................................................................
C:\ProgramData\Anaconda3\envs\tensorflowGPU\lib\site-packages\numpy\core\numeric.py in asarray(a=       Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns], dtype=<class 'numpy.float32'>, order=None)
    533     False
    534     >>> np.asanyarray(a) is a
    535     True
    536 
    537     """
--> 538     return array(a, dtype, copy=False, order=order)
        a =        Age     AgeGroup        Cabin CabinType  ... 0                4579  

[178 rows x 15 columns]
        dtype = <class 'numpy.float32'>
        order = None
    539 
    540 
    541 @set_module('numpy')
    542 def asanyarray(a, dtype=None, order=None):

ValueError: could not convert string to float: '(20, 60]'
___________________________________________________________________________

In [31]:
y_test_stacking_pred = ensemble.predict(dataProc.X_test.reindex(columns=sortedCols))
print('Stacking Random Forest and XGB %.3f ' % accuracy_score(dataProc.y_test, y_test_stacking_pred))

Stacking Random Forest and XGB 0.809 


In [32]:
submitDataProc.AllData[target] =  ensemble.predict(submitDataProc.X.reindex(columns=sortedCols))
submitDataProc.AllData[['PassengerId', 'Survived']].to_csv('Output/submit.txt', index=False)

FileNotFoundError: [Errno 2] No such file or directory: 'Output/submit.txt'