# Import

## Tools

In [None]:
# standard libary and settings
import copy
import os
import pickle
import sys
import importlib
import itertools
from functools import reduce
import time

rundate = time.strftime("%Y%m%d")

import warnings

warnings.simplefilter("ignore")

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:95% !important; }</style>"))

# data extensions and settings
import numpy as np

np.set_printoptions(threshold=np.inf, suppress=True)

import pandas as pd

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.options.display.float_format = "{:,.6f}".format

# modeling extensions
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.datasets import load_wine
from sklearn.ensemble import (
    RandomForestClassifier,
    GradientBoostingClassifier,
    AdaBoostClassifier,
    ExtraTreesClassifier,
    IsolationForest,
)
from sklearn.impute import SimpleImputer
from sklearn.linear_model import (
    Lasso,
    Ridge,
    ElasticNet,
    LinearRegression,
    LogisticRegression,
    SGDRegressor,
)
from sklearn.model_selection import (
    KFold,
    train_test_split,
    GridSearchCV,
    StratifiedKFold,
    cross_val_score,
    RandomizedSearchCV,
)
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.pipeline import make_pipeline, Pipeline, FeatureUnion
from sklearn.preprocessing import (
    StandardScaler,
    RobustScaler,
    PolynomialFeatures,
    OrdinalEncoder,
    LabelEncoder,
    OneHotEncoder,
    KBinsDiscretizer,
    QuantileTransformer,
    PowerTransformer,
    MinMaxScaler,
)
from sklearn.svm import SVC, SVR
from category_encoders import (
    WOEEncoder,
    TargetEncoder,
    CatBoostEncoder,
    BinaryEncoder,
    CountEncoder,
)

from lightgbm import LGBMClassifier, LGBMRegressor
from xgboost import XGBClassifier, XGBRegressor

from hyperopt import hp

import eif
import shap

shap.initjs()

# visualization extensions and settings
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import missingno as msno
import squarify

sys.path.append(f"{os.environ['REPOS']}/mlmachine")
sys.path.append(f"{os.environ['REPOS']}/prettierplot")

import mlmachine as mlm
import mlmachine.data as data
from mlmachine.features.preprocessing import (
    DataFrameSelector,
    PandasTransformer,
    KFoldEncoder,
    GroupbyImputer,
    PandasFeatureUnion,
    DualTransformer,
)
from prettierplot.plotter import PrettierPlot
import prettierplot.style as style

%load_ext autoreload
%autoreload 2

## Reload objects

In [None]:
# #

# experiment_path_root = "/data/t1-tpeterso/repos/wine-classification-demo/experiments/wine-classification"
# experiment = ""

# # reload objects
# machine = pickle.load(open(os.path.join(experiment_path_root, experiment, "machine", "machine.pkl"), 'rb'))
# impute_pipe = pickle.load(open(os.path.join(experiment_path_root, experiment, "transformers", "impute_pipe.pkl"), 'rb'))
# polynomial_pipe = pickle.load(open(os.path.join(experiment_path_root, experiment, "transformers", "polynomial_pipe.pkl"), 'rb'))
# encode_pipe = pickle.load(open(os.path.join(experiment_path_root, experiment, "transformers", "encode_pipe.pkl"), 'rb'))
# target_encode_pipe = pickle.load(open(os.path.join(experiment_path_root, experiment, "transformers", "target_encode_pipe.pkl"), 'rb'))
# skew_pipe = pickle.load(open(os.path.join(experiment_path_root, experiment, "transformers", "skew_pipe.pkl"), 'rb'))
# scale_pipe = pickle.load(open(os.path.join(experiment_path_root, experiment, "transformers", "scale_pipe.pkl"), 'rb'))
# fs = pickle.load(open(os.path.join(experiment_path_root, experiment, "feature_selection", "FeatureSelector.pkl"), 'rb'))


## Data

### Load & review dataset

In [None]:
# load and inspect data
dataset = load_wine()
dataset = pd.merge(
                pd.DataFrame(
                    dataset.data,
                    columns=dataset.feature_names
                ),
                pd.Series(dataset.target, name="target"),
                left_index=True,
                right_index=True
            )


In [None]:
# display info and first 5 rows
dataset.info()
display(dataset[:5])


In [None]:
# review counts of different column types
dataset.dtypes.value_counts()


### Create machine object

In [None]:
#
df_train, df_valid = mlm.train_test_df_compile(data=dataset, target_col="target")

#
continuous_columns = list(df_train.columns)[:-1]

#
machine = mlm.Machine(
    experiment_name="wine_classification",
    training_dataset=df_train,
    validation_dataset=df_valid,    
    target="target",
    identify_as_continuous=continuous_columns,
    is_classification=True,
    create_experiment_dir=True,
)


In [None]:
# review mlm dtypes
machine.training_features.mlm_dtypes


# EDA

## Category feature EDA

## Count feature EDA

## Continuous feature EDA

In [None]:
# continuous features
for feature in machine.training_features.mlm_dtypes["continuous"]:
    machine.eda_cat_target_num_feat(
        feature=feature,
        outliers_out_of_scope=5,
        chart_scale=12,
        training_data=True,
    )


In [None]:
# continuous features
machine.eda(save_plots=True)


### Correlation

In [None]:
# correlation heat map
p = PrettierPlot()
ax = p.make_canvas()
p.corr_heatmap(
    df=machine.recombine_data(training_data=True),
    annot=True,
    ax=ax,
)


In [None]:
# correlation heat map with most highly correlated features relative to the target
p = PrettierPlot(plot_orientation='tall',chart_scale=10)
ax = p.make_canvas()
p.corr_heatmap_target(
    df=machine.training_features,
    target=machine.training_target,
    thresh=0.01,
    annot=True,
    ax=ax,
)


### Pair plot

In [None]:

# pair plot
p = PrettierPlot(chart_scale=15)
p.pair_plot(
    df=machine.training_features,
    columns=machine.training_features.mlm_dtypes['continuous'],
    target=machine.training_target,
    diag_kind="auto",
    legend_labels=["Died","Survived"],
)


In [None]:
# pair plot
p = PrettierPlot(chart_scale=12)
p.pair_plot(
    df=machine.training_features.dropna(),
    diag_kind="kde",
    target=machine.training_target,
    columns=machine.training_features.mlm_dtypes['continuous'][:5],
    bbox=(2.0, 0.0),
)

## Faceting

## Target variable evaluation

In [None]:
# null score
pd.Series(machine.training_target).value_counts(normalize=True)


# Data preparation

## Missing data

No missing data


### Training

In [None]:
# evaluate missing data
machine.eda_missing_summary()


### Validation

In [None]:
# evaluate missing data
machine.eda_missing_summary()


### Training vs. validation

### Impute

## Feature engineering

### Handcrafted

### Polynomial features

In [None]:
# # transform pipe
# polynomial_pipe = PandasFeatureUnion([
#     ("polynomial", make_pipeline(
#         DataFrameSelector(include_mlm_dtypes=["continuous"]),
#         PandasTransformer(PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)),
#     )),
#     ("diff", make_pipeline(
#         DataFrameSelector(exclude_mlm_dtypes=["continuous"], exclude_columns=["Name","Cabin"]),
#     )),
# ])

# # fit & save objects
# polynomial_pipe.fit(machine.training_features)
# with open(os.path.join(machine.current_experiment_dir, "transformers", "polynomial_pipe.pkl"), 'wb') as handle:
#     pickle.dump(polynomial_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

# # transform datasets
# machine.training_features = polynomial_pipe.fit_transform(machine.training_features)
# machine.validation_features = polynomial_pipe.transform(machine.validation_features)

# machine.update_dtypes()


### Encoding

#### Evaluate

#### Encode

In [None]:
# # encode pipeline
# encode_pipe = PandasFeatureUnion([
#     ("bin", make_pipeline(
#         DataFrameSelector(include_columns=machine.training_features.mlm_dtypes["continuous"]),
#         PandasTransformer(KBinsDiscretizer(encode="ordinal")),
#     )),
# #     ("diff", make_pipeline(
# #         DataFrameSelector(exclude_columns=machine.training_features.mlm_dtypes["nominal"] + list(ordinal_encodings.keys())),
# #     )),
# ])

# # fit & save objects
# encode_pipe.fit(machine.training_features)
# with open(os.path.join(machine.current_experiment_dir, "transformers", "encode_pipe.pkl"), 'wb') as handle:
#     pickle.dump(encode_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

# # transform datasets
# machine.training_features = encode_pipe.fit_transform(machine.training_features)
# machine.validation_features = encode_pipe.transform(machine.validation_features)

# machine.update_dtypes()


## Feature transformation

### Skew correction

In [None]:
# evaluate skew of number features - training data
machine.skew_summary()

In [None]:
# evaluate skew of number features - validation data
machine.skew_summary()

In [None]:
# skew correction pipeline
skew_pipe = PandasFeatureUnion([
    ("skew", make_pipeline(
        DataFrameSelector(include_mlm_dtypes=["continuous"]),
        DualTransformer(),
    )),    
    ("diff", make_pipeline(
        DataFrameSelector(exclude_mlm_dtypes=["continuous"]),
    )),
])

# # fit & save objects
# skew_pipe.fit(machine.training_features)
# with open(os.path.join(machine.current_experiment_dir, "transformers", "skew_pipe.pkl"), 'wb') as handle:
#     pickle.dump(skew_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

# # transform datasets
# machine.training_features = skew_pipe.fit_transform(machine.training_features)
# machine.validation_features = skew_pipe.transform(machine.validation_features)

# machine.update_dtypes()


### Scaling

In [None]:
#
scale_pipe = PandasFeatureUnion([
    ("scale", make_pipeline(
        DataFrameSelector(),
        PandasTransformer(RobustScaler())
    )),
])

# fit & save objects
scale_pipe.fit(machine.training_features)
with open(os.path.join(machine.current_experiment_dir, "transformers", "scale_pipe.pkl"), 'wb') as handle:
    pickle.dump(scale_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

# transform datasets
machine.training_features = scale_pipe.fit_transform(machine.training_features)
machine.validation_features = scale_pipe.transform(machine.validation_features)

machine.update_dtypes()


## Outliers


In [None]:
# identify outliers using IQR
train_pipe = Pipeline([
    ("outlier",machine.OutlierIQR(
                outlier_count=5,
                iqr_step=1.5,
                features=machine.training_features.mlm_dtypes["continuous"],
                drop_outliers=False,))
    ])
machine.training_features = train_pipe.transform(machine.training_features)

# capture outliers
iqr_outliers = np.array(sorted(train_pipe.named_steps["outlier"].outliers))
print(iqr_outliers)

In [None]:
# identify outliers using Isolation Forest
clf = IsolationForest(
#     behaviour="new",
    max_samples=machine.training_features.shape[0],
    random_state=0,
    contamination=0.01
)
clf.fit(machine.training_features[machine.training_features.columns])
preds = clf.predict(machine.training_features[machine.training_features.columns])

# evaluate index values
mask = np.isin(preds, -1)
if_outliers = np.array(machine.training_features[mask].index)
print(if_outliers)

In [None]:
# identify outliers using extended isolation forest
train_pipe = Pipeline([
    ("outlier",machine.ExtendedIsoForest(
                columns=machine.training_features.mlm_dtypes["continuous"],
                n_trees=100,
                sample_size=int(np.ceil(machine.training_features.shape[0] * .25)),
                extension_level=1,
                anomalies_ratio=0.03,
                drop_outliers=False,))
    ])
machine.training_features = train_pipe.transform(machine.training_features)

# capture outliers
eif_outliers = np.array(sorted(train_pipe.named_steps["outlier"].outliers))
print(eif_outliers)

In [None]:
# identify outliers that are identified in multiple algorithms
outliers = reduce(np.intersect1d, (iqr_outliers, if_outliers, eif_outliers))
# outliers = reduce(np.intersect1d, (if_outliers, eif_outliers))
print(outliers)

In [None]:
# review outlier identification summary
outlier_summary = machine.outlier_summary(iqr_outliers=iqr_outliers,
                             if_outliers=if_outliers,
                             eif_outliers=eif_outliers
                            )
outlier_summary[outlier_summary["count"] >= 3]

In [None]:
# remove outlers from predictors and response
outliers = np.array([59,121])
machine.training_features = machine.training_features.drop(outliers)
machine.training_target = machine.training_target.drop(index=outliers)

## Additional exploratory data analysis

In [None]:
# correlation heat map with most highly correlated features relative to the target
p = PrettierPlot(plot_orientation='tall',chart_scale=10)
ax = p.make_canvas()
p.corr_heatmap_target(
    df=machine.training_features,
    target=machine.training_target,
    thresh=0.7,
    annot=True,
    ax=ax,
)

## Machine checkpoint

In [None]:
# save machine object
with open(os.path.join(machine.current_experiment_dir, "machine", "machine.pkl"), 'wb') as handle:
    pickle.dump(machine, handle, protocol=pickle.HIGHEST_PROTOCOL)


# Feature selection

In [None]:
# generate feature importance summary
estimators = [
    LGBMClassifier,
    RandomForestClassifier,
    GradientBoostingClassifier,
    XGBClassifier,
]

fs = machine.FeatureSelector(
    training_features=machine.training_features,
    training_target=machine.training_target,
    validation_features=machine.validation_features,
    validation_target=machine.validation_target,
    estimators=estimators,
    experiment_dir=machine.current_experiment_dir,
)

fs.feature_selector_suite(
    sequential_scoring=["accuracy"],
#     sequential_scoring=["accuracy","precision","recall","roc_auc"],
    n_jobs=4,
    save_to_csv=True,
    verbose=True,
)


In [None]:
# calculate cross-validation performance
fs.run_cross_val(
    estimators=estimators,
    scoring=["accuracy"],
    n_folds=5,
    step=1,
    n_jobs=2,
    save_to_csv=True,
)


## Accuracy

In [None]:
# visualize CV performance for diminishing feature set
fs.plot_results(
    scoring="accuracy",
    title_scale=0.8,
    save_plots=True
)


In [None]:
#
fs.create_cross_val_features_df(scoring="accuracy")
# fs.cross_val_features_df


In [None]:
#
fs.create_cross_val_features_dict(scoring="accuracy")
fs.cross_val_features_dict


In [None]:
# save feature selector
with open(os.path.join(machine.current_experiment_dir, "feature_selection", "FeatureSelector.pkl"), 'wb') as handle:
    pickle.dump(fs, handle, protocol=pickle.HIGHEST_PROTOCOL)


# Modeling

## Data preparation

In [None]:
#################################################################################
# import data
dataset = load_wine()
dataset = pd.merge(pd.DataFrame(dataset.data, columns=dataset.feature_names), pd.Series(dataset.target, name="Class label"), left_index=True, right_index=True)

# create training and validation datasets
df_train, df_valid = mlm.train_test_df_compile(data=dataset, target_col='Class label')

# load training data object
train = mlm.Machine(
    data=df_train,
    identify_as_continuous=['magnesium', 'proline'],
    target="Class label",
    target_type="category",
)

# load valid data object
valid = mlm.Machine(
    data=df_valid,
    identify_as_continuous=['magnesium', 'proline'],
    target="Class label",
)

#################################################################################
# feature transformation pipeline
# transform pipe
polynomial_pipe = PandasFeatureUnion([
    ("polynomial", make_pipeline(
        DataFrameSelector(include_mlm_dtypes=["continuous"]),
        PandasTransformer(PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)),
    )),
    ("diff", make_pipeline(
        DataFrameSelector(exclude_mlm_dtypes=["continuous"], exclude_columns=["Name","Cabin"]),
    )),
])

machine.training_features = polynomial_pipe.fit_transform(machine.training_features)
machine.validation_features = polynomial_pipe.transform(machine.validation_features)

machine.update_dtypes()


# skew correction pipeline
skew_pipe = PandasFeatureUnion([
    ("skew", make_pipeline(
        DataFrameSelector(include_mlm_dtypes=["continuous"]),
        DualTransformer(),
    )),    
    ("diff", make_pipeline(
        DataFrameSelector(exclude_mlm_dtypes=["continuous"]),
    )),
])

# machine.training_features = skew_pipe.fit_transform(machine.training_features)
# machine.validation_features = skew_pipe.transform(machine.validation_features)

# machine.update_dtypes()
# 

#
scale_pipe = PandasFeatureUnion([
    ("scale", make_pipeline(
        DataFrameSelector(),
        PandasTransformer(RobustScaler())
    )),
])

machine.training_features = scale_pipe.fit_transform(machine.training_features)
machine.validation_features = scale_pipe.transform(machine.validation_features)

machine.update_dtypes()



#################################################################################
# remove outliers
outliers = np.array([59,121])
machine.training_features = machine.training_features.drop(outliers)
machine.training_target = machine.training_target.drop(index=outliers)

print("completed")

## Bayesian hyper-parameter optimization

In [None]:
# model/parameter space
estimator_parameter_space = {
    "LGBMClassifier": {
        "class_weight": hp.choice("class_weight", [None, "balanced"]),
        "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1.0),
        "boosting_type": hp.choice("boosting_type", ["gbdt", "dart", "goss"])
        # ,'boosting_type': hp.choice('boosting_type'
        #                    ,[{'boosting_type': 'gbdt', 'subsample': hp.uniform('gdbt_subsample', 0.5, 1)}
        #                    ,{'boosting_type': 'dart', 'subsample': hp.uniform('dart_subsample', 0.5, 1)}
        #                    ,{'boosting_type': 'goss', 'subsample': 1.0}])
        ,
        "learning_rate": hp.uniform("learning_rate", 0.01, 0.3),
        "max_depth": hp.choice("max_depth", np.arange(2, 20, dtype=int)),
        "min_child_samples": hp.uniform("min_child_samples", 20, 500),
        "n_estimators": hp.choice("n_estimators", np.arange(100, 10000, 1, dtype=int)),
        "num_leaves": hp.uniform("num_leaves", 8, 150),
        "reg_alpha": hp.uniform("reg_alpha", 0.0, 1.5),
        "reg_lambda": hp.uniform("reg_lambda", 0.0, 1.0),
        "subsample_for_bin": hp.uniform("subsample_for_bin", 20000, 400000),
    },
#     "LogisticRegression": {
#         "C": hp.loguniform("C", np.log(0.001), np.log(0.2)),
#         "penalty": hp.choice("penalty", ["l1", "l2"]),
#     },
    "XGBClassifier": {
        "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1.0),
        "gamma": hp.uniform("gamma", 0.0, 10),
        "learning_rate": hp.uniform("learning_rate", 0.01, 0.3),
        "max_depth": hp.choice("max_depth", np.arange(2, 20, dtype=int)),
        "min_child_weight": hp.uniform("min_child_weight", 1, 20),
        "n_estimators": hp.choice("n_estimators", np.arange(100, 10000, 1, dtype=int)),
        "subsample": hp.uniform("subsample", 0.3, 1),
    },
    "RandomForestClassifier": {
        "bootstrap": hp.choice("bootstrap", [True, False]),
        "max_depth": hp.choice("max_depth", np.arange(2, 20, dtype=int)),
        "n_estimators": hp.choice("n_estimators", np.arange(100, 10000, 1, dtype=int)),
        "max_features": hp.choice("max_features", ["auto", "sqrt"]),
        "min_samples_split": hp.choice(
            "min_samples_split", np.arange(2, 40, dtype=int)
        ),
        "min_samples_leaf": hp.choice("min_samples_leaf", np.arange(2, 40, dtype=int)),
    },
    "GradientBoostingClassifier": {
        "n_estimators": hp.choice("n_estimators", np.arange(100, 10000, 1, dtype=int)),
        "max_depth": hp.choice("max_depth", np.arange(2, 20, dtype=int)),
        "max_features": hp.choice("max_features", ["auto", "sqrt"]),
        "learning_rate": hp.uniform("learning_rate", 0.01, 0.3),
        "loss": hp.choice("loss", ["deviance", "exponential"]),
        "min_samples_split": hp.choice(
            "min_samples_split", np.arange(2, 40, dtype=int)
        ),
        "min_samples_leaf": hp.choice("min_samples_leaf", np.arange(2, 40, dtype=int)),
    },
    "AdaBoostClassifier": {
        "n_estimators": hp.choice("n_estimators", np.arange(100, 10000, 1, dtype=int)),
        "learning_rate": hp.uniform("learning_rate", 0.01, 0.3),
        "algorithm": hp.choice("algorithm", ["SAMME", "SAMME.R"]),
    },
    "ExtraTreesClassifier": {
        "n_estimators": hp.choice("n_estimators", np.arange(100, 10000, 1, dtype=int)),
        "max_depth": hp.choice("max_depth", np.arange(2, 20, dtype=int)),
        "min_samples_split": hp.choice(
            "min_samples_split", np.arange(2, 40, dtype=int)
        ),
        "min_samples_leaf": hp.choice("min_samples_leaf", np.arange(2, 40, dtype=int)),
        "max_features": hp.choice("max_features", ["auto", "sqrt"]),
        "criterion": hp.choice("criterion", ["gini", "entropy"]),
    },
    "SVC": {
        "C": hp.uniform("C", 0.001, 15),
        "decision_function_shape": hp.choice("decision_function_shape", ["ovo", "ovr"]),
        "gamma": hp.uniform("gamma", 0.000000001, 5),
    },
    "KNeighborsClassifier": {
        "algorithm": hp.choice("algorithm", ["auto", "ball_tree", "kd_tree", "brute"]),
        "n_neighbors": hp.choice("n_neighbors", np.arange(1, 20, dtype=int)),
        "weights": hp.choice("weights", ["distance", "uniform"]),
    },
}

In [None]:
# execute bayesian optimization grid search
machine.exec_bayes_optim_search(
    estimator_parameter_space=estimator_parameter_space,
    training_features=machine.training_features,
    training_target=machine.training_target,
    validation_features=machine.validation_features,
    validation_target=machine.validation_target,
    scoring="accuracy",
    n_folds=5,
    n_jobs=2,
    iters=125,
    show_progressbar=True,
    columns=fs.cross_val_features_dict
)

# save Machine object
with open(os.path.join(machine.current_experiment_dir, "machine", "machine.pkl"), 'wb') as handle:
    pickle.dump(machine, handle, protocol=pickle.HIGHEST_PROTOCOL)


### Model loss by iteration

In [None]:
# model loss plot
for estimator in np.unique(machine.bayes_optim_summary["estimator"]):
    machine.model_loss_plot(
        bayes_optim_summary=machine.bayes_optim_summary,
        estimator_class=estimator,
        save_plots=True,
    )
    

### Parameter selection by iteration

In [None]:
# estimator parameter plots
for estimator in np.unique(machine.bayes_optim_summary["estimator"]):
    machine.model_param_plot(
        bayes_optim_summary=machine.bayes_optim_summary,
        estimator_class=estimator,
        estimator_parameter_space=estimator_parameter_space,
        n_iter=1000,
#         chart_scale=15,
        title_scale=1.2,
        save_plots=True
    )


In [None]:
sample_space = {
                'param': hp.uniform('param', np.log(0.4), np.log(0.6))
#     "": 0.000001 + hp.uniform("gamma", 0.000001, 10)
    #             'param2': hp.loguniform('param2', np.log(0.001), np.log(0.01))
}

machine.sample_plot(sample_space, 1000)

## Model performance evaluation - standard models

In [None]:
#
top_models = machine.top_bayes_optim_models(
                bayes_optim_summary=machine.bayes_optim_summary,
                metric="validation_score",
                num_models=1,
            )
top_models


In [None]:
# classification panel, single model
estimator = "SVC"; model_iter = 66
# estimator = 'GradientBoostingClassifier'; model_iter = 590
# estimator = 'XGBClassifier'; model_iter = 380

model = machine.BayesOptimModelBuilder(
    bayes_optim_summary=bayes_optim_summary, estimator=estimator, model_iter=model_iter
)

machine.binary_classification_panel(
    model=model, X_train=machine.training_features, y_train=machine.training_target, labels=[0, 1], n_folds=4
)

In [None]:
# create classification reports
for estimator, model_iters in top_models.items():
    for model_iter in model_iters:
        model = machine.BayesOptimModelBuilder(
            bayes_optim_summary=bayes_optim_summary,
            estimator=estimator,
            model_iter=model_iter,
        )
        machine.binary_classification_panel(
            model=model, X_train=machine.training_features, y_train=machine.training_target, labels=[0, 1], n_folds=4
        )

# Model explanability

In [None]:
# 
estimator = "ExtraTreesClassifier"; model_iter = 145

modelE = machine.BayesOptimModelBuilder(
    bayes_optim_summary=bayes_optim_summary, estimator=estimator, model_iter=model_iter
)

modelE.fit(machine.training_features.values, machine.training_target.values)

## Permutation importance

In [None]:
# permutation importance - how much does performance decrease when shuffling a certain feature?
perm = PermutationImportance(modelR.model, random_state=1).fit(machine.training_features, machine.training_target)
eli5.show_weights(perm, feature_names=feature_names)

## Partial dependence plots

In [None]:
for feature in feature_names:
    pdpFeature = pdp.pdp_isolate(
        model=modelR.model, dataset=machine.training_features, model_features=feature_names, feature=feature
    )

    pdp.pdp_plot(pdpFeature, feature)
    plt.rcParams["axes.facecolor"] = "white"
    plt.rcParams["figure.facecolor"] = "white"

    plt.grid(b=None)
    plt.show()

## SHAP

### Training

In [None]:
for i in np.arange(0, 4):
    machine.single_shap_viz_tree(obsIx=i, model=modelR, data=machine.training_features)

In [None]:
visual = machine.multi_shap_viz_tree(obs_ixs=np.arange(0, 800), model=modelR, data=machine.training_features)
visual

In [None]:
obs_data, _, obs_shap_values = machine.multi_shap_value_tree(
    obs_ixs=np.arange(0, 800), model=modelR, data=machine.training_features
)
machine.shap_dependence_plot(
    obs_data=obs_data,
    obs_shap_values=obs_shap_values,
    scatter_feature="Fare",
    color_feature="Age",
    feature_names=machine.training_features.columns.tolist(),
)

In [None]:
obs_data, _, obs_shap_values = machine.multi_shap_value_tree(
    obs_ixs=np.arange(0, 800), model=modelL, data=machine.training_features
)
feature_names = machine.training_features.columns.tolist()
top_shap = np.argsort(-np.sum(np.abs(obs_shap_values), 0))

# generate force plot
for top_ix in top_shap:
    machine.shap_dependence_plot(
        obs_data=obs_data,
        obs_shap_values=obs_shap_values,
        scatter_feature=feature_names[top_ix],
        color_feature="Age",
        feature_names=feature_names,
    )

In [None]:
obs_data, _, obs_shap_values = machine.multi_shap_value_tree(
    obs_ixs=np.arange(0, 800), model=modelG, data=machine.training_features
)
feature_names = machine.training_features.columns.tolist()
machine.shap_summary_plot(
        obs_data=obs_data,
        obs_shap_values=obs_shap_values,
        feature_names=feature_names,
    )

### Validation

# Save objects

In [None]:
# 
with open(os.path.join(machine.current_experiment_dir, "machine", "machine.pkl"), 'wb') as handle:
    pickle.dump(machine, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(os.path.join(machine.current_experiment_dir, "transformers", "impute_pipe.pkl"), 'wb') as handle:
    pickle.dump(impute_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(os.path.join(machine.current_experiment_dir, "transformers", "polynomial_pipe.pkl"), 'wb') as handle:
    pickle.dump(polynomial_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(os.path.join(machine.current_experiment_dir, "transformers", "encode_pipe.pkl"), 'wb') as handle:
    pickle.dump(encode_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(os.path.join(machine.current_experiment_dir, "transformers", "target_encode_pipe.pkl"), 'wb') as handle:
    pickle.dump(target_encode_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

# with open(os.path.join(machine.current_experiment_dir, "transformers", "skew_pipe.pkl"), 'wb') as handle:
#     pickle.dump(skew_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(os.path.join(machine.current_experiment_dir, "transformers", "scale_pipe.pkl"), 'wb') as handle:
    pickle.dump(scale_pipe, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open(os.path.join(machine.current_experiment_dir, "feature_selection", "FeatureSelector.pkl"), 'wb') as handle:
    pickle.dump(fs, handle, protocol=pickle.HIGHEST_PROTOCOL)


# Stacking

## Primary models

In [None]:
# get out-of-fold predictions
oof_train, oof_valid, columns = machine.model_stacker(
    models=top_models,
    bayes_optim_summary=bayes_optim_summary,
    X_train=machine.training_features.values,
    y_train=machine.training_target.values,
    X_valid=machine.validation_features.values,
    n_folds=10,
    n_jobs=10,
)

In [None]:
# view correlations of predictions
p = PrettierPlot()
ax = p.make_canvas()
p.corr_heatmap(
    df=pd.DataFrame(oof_train, columns=columns), annot=True, ax=ax, vmin=0
)

## Meta model

In [None]:
# parameter space
estimator_parameter_space = {
    "LGBMClassifier": {
        "class_weight": hp.choice("class_weight", [None]),
        "colsample_bytree": hp.uniform("colsample_bytree", 0.4, 0.7),
        "boosting_type": hp.choice("boosting_type", ["dart"]),
        "subsample": hp.uniform("subsample", 0.5, 1),
        "learning_rate": hp.uniform("learning_rate", 0.15, 0.25),
        "max_depth": hp.choice("max_depth", np.arange(4, 20, dtype=int)),
        "min_child_samples": hp.quniform("min_child_samples", 50, 150, 5),
        "n_estimators": hp.choice("n_estimators", np.arange(100, 4000, 10, dtype=int)),
        "num_leaves": hp.quniform("num_leaves", 30, 70, 1),
        "reg_alpha": hp.uniform("reg_alpha", 0.75, 1.25),
        "reg_lambda": hp.uniform("reg_lambda", 0.0, 1.0),
        "subsample_for_bin": hp.quniform("subsample_for_bin", 100000, 350000, 20000),
    },
    "XGBClassifier": {
        "colsample_bytree": hp.uniform("colsample_bytree", 0.4, 0.7),
        "gamma": hp.quniform("gamma", 0.0, 10, 0.05),
        "learning_rate": hp.quniform("learning_rate", 0.01, 0.2, 0.01),
        "max_depth": hp.choice("max_depth", np.arange(2, 15, dtype=int)),
        "min_child_weight": hp.quniform("min_child_weight", 2.5, 7.5, 1),
        "n_estimators": hp.choice("n_estimators", np.arange(100, 4000, 10, dtype=int)),
        "subsample": hp.uniform("subsample", 0.4, 0.7),
    },
    "RandomForestClassifier": {
        "bootstrap": hp.choice("bootstrap", [True, False]),
        "max_depth": hp.choice("max_depth", np.arange(2, 10, dtype=int)),
        "n_estimators": hp.choice("n_estimators", np.arange(100, 8000, 10, dtype=int)),
        "max_features": hp.choice("max_features", ["sqrt"]),
        "min_samples_split": hp.choice(
            "min_samples_split", np.arange(15, 25, dtype=int)
        ),
        "min_samples_leaf": hp.choice("min_samples_leaf", np.arange(2, 20, dtype=int)),
    },
    "GradientBoostingClassifier": {
        "n_estimators": hp.choice("n_estimators", np.arange(100, 4000, 10, dtype=int)),
        "max_depth": hp.choice("max_depth", np.arange(2, 11, dtype=int)),
        "max_features": hp.choice("max_features", ["sqrt"]),
        "learning_rate": hp.quniform("learning_rate", 0.01, 0.09, 0.01),
        "loss": hp.choice("loss", ["deviance", "exponential"]),
        "min_samples_split": hp.choice(
            "min_samples_split", np.arange(2, 40, dtype=int)
        ),
        "min_samples_leaf": hp.choice("min_samples_leaf", np.arange(2, 40, dtype=int)),
    },
    "SVC": {
        "C": hp.uniform("C", 0.00000001, 15),
        "decision_function_shape": hp.choice("decision_function_shape", ["ovr", "ovo"]),
        "gamma": hp.uniform("gamma", 0.00000001, 1.5),
    },
}

In [None]:
# execute bayesian optimization grid search
machine.exec_bayes_optim_search(
    estimator_parameter_space=estimator_parameter_space,
    results_dir="{}_hyperopt_meta_{}.csv".format(rundate, analysis),
    X=oof_train,
    y=machine.training_target,
    scoring="accuracy",
    n_folds=8,
    n_jobs=10,
    iters=1000,
    verbose=0,
)

In [None]:
# read scores summary table
analysis = "wine"
rundate = "20190807"
bayes_optim_summary_meta = pd.read_csv("{}_hyperopt_meta_{}.csv".format(rundate, analysis))
bayes_optim_summary_meta[:5]

In [None]:
# model loss plot
for estimator in np.unique(bayes_optim_summary_meta["estimator"]):
    machine.model_loss_plot(bayes_optim_summary=bayes_optim_summary_meta, estimator=estimator)

In [None]:
# estimator parameter plots
for estimator in np.unique(bayes_optim_summary_meta["estimator"]):
    machine.modelParamPlot(
        bayes_optim_summary=bayes_optim_summary_meta,
        estimator=estimator,
        estimator_parameter_space=estimator_parameter_space,
        n_iter=100,
        chart_scale=15,
    )

## Model performance evaluation - stacked models

In [None]:
top_models = machine.top_bayes_optim_models(
    bayes_optim_summary=bayes_optim_summary_meta, num_models=1
)
top_models

In [None]:
# best second level learning model
estimator = "LGBMClassifier"; model_iter = 668
# estimator = "XGBClassifier"; model_iter = 380
# estimator = "RandomForestClassifier"; model_iter = 411
# estimator = "GradientBoostingClassifier"; model_iter = 590
# estimator = "SVC"; model_iter = 135

# extract params and instantiate model
model = machine.BayesOptimModelBuilder(
    bayes_optim_summary=bayes_optim_summary_meta, estimator=estimator, model_iter=model_iter
)
machine.binary_classification_panel(
    model=model, X_train=oof_train, y_train=machine.training_target, labels=[0, 1]
)

In [None]:
# create classification reports
for estimator, model_iters in top_models.items():
    for model_iter in model_iters:
        model = machine.BayesOptimModelBuilder(
            bayes_optim_summary=bayes_optim_summary_meta,
            estimator=estimator,
            model_iter=model_iter,
        )
        machine.binary_classification_panel(
            model=model, X_train=oof_train, y_train=machine.training_target, labels=[0, 1], n_folds=4
        )

## Validation set evaluation - stacked models

In [None]:
## standard model fit and predict
# select estimator and iteration
estimator = "LGBMClassifier"; model_iter = 668
# estimator = "XGBClassifier"; model_iter = 380
# estimator = "RandomForestClassifier"; model_iter = 411
# estimator = "GradientBoostingClassifier"; model_iter = 590
# estimator = "SVC"; model_iter = 135

# extract params and instantiate model
model = machine.BayesOptimModelBuilder(
    bayes_optim_summary=bayes_optim_summary_meta, estimator=estimator, model_iter=model_iter
)
model.fit(oof_train, machine.training_target.values)

# fit model and make predictions
y_pred = model.predict(oof_valid)

In [None]:
machine.binary_classification_panel(
    model=model,
    X_train=oof_train,
    y_train=machine.training_target,
    X_valid=oof_valid,
    y_valid=machine.validation_target,
    labels=[0, 1],
)

In [None]:
# create classification reports
for estimator, model_iters in top_models.items():
    for model_iter in model_iters:
        model = machine.BayesOptimModelBuilder(
            bayes_optim_summary=bayes_optim_summary_meta,
            estimator=estimator,
            model_iter=model_iter,
        )
        machine.binary_classification_panel(
            model=model,
            X_train=oof_train,
            y_train=machine.training_target,
            X_valid=oof_valid,
            y_valid=machine.validation_target,
            labels=[0, 1],
        )