## Stacking: Implement a stacking model

Using the Titanic dataset from [this](https://www.kaggle.com/c/titanic/overview) Kaggle competition.

In this section, we will fit and evaluate a simple stacked model.

### Read in Data

In [2]:
import joblib
import pandas as pd
from sklearn.ensemble import StackingClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [3]:
tr_features = pd.read_csv('../Data/train_features.csv') 
tr_labels = pd.read_csv('../Data/train_labels.csv')

### Hyperparameter tuning

In [9]:
def print_results(results):
    print('BEST PARAMS: {}\n'.format(results.best_params_))

    means = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [10]:
estimators = [('gb', GradientBoostingClassifier()), ('rf', RandomForestClassifier())]

sc = StackingClassifier(estimators)
sc.get_params()

{'cv': None,
 'estimators': [('gb', GradientBoostingClassifier()),
  ('rf', RandomForestClassifier())],
 'final_estimator': None,
 'n_jobs': None,
 'passthrough': False,
 'stack_method': 'auto',
 'verbose': 0,
 'gb': GradientBoostingClassifier(),
 'rf': RandomForestClassifier(),
 'gb__ccp_alpha': 0.0,
 'gb__criterion': 'friedman_mse',
 'gb__init': None,
 'gb__learning_rate': 0.1,
 'gb__loss': 'deviance',
 'gb__max_depth': 3,
 'gb__max_features': None,
 'gb__max_leaf_nodes': None,
 'gb__min_impurity_decrease': 0.0,
 'gb__min_impurity_split': None,
 'gb__min_samples_leaf': 1,
 'gb__min_samples_split': 2,
 'gb__min_weight_fraction_leaf': 0.0,
 'gb__n_estimators': 100,
 'gb__n_iter_no_change': None,
 'gb__presort': 'deprecated',
 'gb__random_state': None,
 'gb__subsample': 1.0,
 'gb__tol': 0.0001,
 'gb__validation_fraction': 0.1,
 'gb__verbose': 0,
 'gb__warm_start': False,
 'rf__bootstrap': True,
 'rf__ccp_alpha': 0.0,
 'rf__class_weight': None,
 'rf__criterion': 'gini',
 'rf__max_depth':

In [11]:
parameters = {
    'gb__n_estimators': [50, 100],
    'rf__n_estimators': [50, 100],
    'final_estimator': [LogisticRegression(C=0.1),
                                LogisticRegression(C=1),
                                LogisticRegression(C=10)],
    'passthrough': [True, False]
}

cv = GridSearchCV(sc, parameters, cv=5)
cv.fit(tr_features, tr_labels.values.ravel())

print_results(cv)

Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


Traceback (most recent call last):
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 436, in fit
    return super().fit(X, self._le.transform(y), sample_weight)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_stacking.py", line 195, in fit
    _fit_single_estimator(self.final_estimator_, X_meta, y,
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\ensemble\_base.py", line 40, in _fit_single_estimator
    estimator.fit(X, y)
  File "C:\Users\Phone Thiri Yadana\.conda\envs\venv-datascience\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,


BEST PARAMS: {'final_estimator': LogisticRegression(C=10), 'gb__n_estimators': 100, 'passthrough': False, 'rf__n_estimators': 50}

nan (+/-nan) for {'final_estimator': LogisticRegression(C=0.1), 'gb__n_estimators': 50, 'passthrough': True, 'rf__n_estimators': 50}
nan (+/-nan) for {'final_estimator': LogisticRegression(C=0.1), 'gb__n_estimators': 50, 'passthrough': True, 'rf__n_estimators': 100}
0.824 (+/-0.064) for {'final_estimator': LogisticRegression(C=0.1), 'gb__n_estimators': 50, 'passthrough': False, 'rf__n_estimators': 50}
0.826 (+/-0.059) for {'final_estimator': LogisticRegression(C=0.1), 'gb__n_estimators': 50, 'passthrough': False, 'rf__n_estimators': 100}
nan (+/-nan) for {'final_estimator': LogisticRegression(C=0.1), 'gb__n_estimators': 100, 'passthrough': True, 'rf__n_estimators': 50}
nan (+/-nan) for {'final_estimator': LogisticRegression(C=0.1), 'gb__n_estimators': 100, 'passthrough': True, 'rf__n_estimators': 100}
0.82 (+/-0.065) for {'final_estimator': LogisticRegressi

In [12]:
cv.best_estimator_

StackingClassifier(estimators=[('gb', GradientBoostingClassifier()),
                               ('rf', RandomForestClassifier(n_estimators=50))],
                   final_estimator=LogisticRegression(C=10))

### Write out pickled model

In [18]:
joblib.dump(cv.best_estimator_, '../Pickled_Models/Stacked_model.pkl')

['../Pickled_Models/Stacked_model.pkl']