# TPOTWXYZ

A full example of a rich application using:
- [TPOT](https://github.com/EpistasisLab/tpot)
- [lime](https://github.com/marcotcr/lime)
- [yellowbrick](https://github.com/DistrictDataLabs/yellowbrick)
- lots of wxyz widgets

> ☠️ _This notebook accounts for a huge proportion of full test dependencies, but **doesn't always work**._

In [None]:
from wxyz.datagrid import SelectGrid
from wxyz.lab import DockBox

## Handle upstream API drift

> _A future release of [yellowbrick](https://github.com/DistrictDataLabs/yellowbrick/issues/1137) should fix this!_

In [None]:
import sklearn.utils
if not hasattr(sklearn.utils, "safe_indexing"):
    setattr(sklearn.utils, "safe_indexing", sklearn.utils)

## The rest of the imports

In [None]:
import os, re, yellowbrick.features, tpot, IPython as I, ipywidgets as W, numpy as np, pandas as pd, traitlets as T, dask.distributed, multiprocessing, sklearn, warnings
from lime.lime_tabular import LimeTabularExplainer 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

## Configure matplotlib

In [None]:
%config InlineBackend.figure_formats = ['svg']
%matplotlib inline

## Vanity CSS

In [None]:
%%html
<style>
.widget-hslider, .widget-hprogress, .widget-inline-hbox .widget-label{ width: auto;}
.jp-RenderedSVG img { max-width: 100%; max-height: 100%; }
</style>

## A master class 

In [None]:
class App(T.HasTraits):
    dataset = T.Any()
    dask = T.Instance(dask.distributed.Client, allow_none=True)
    df = T.Instance(pd.DataFrame, allow_none=True)
    train = T.Instance(np.ndarray, allow_none=True)
    test = T.Instance(np.ndarray, allow_none=True)
    labels_train = T.Instance(np.ndarray, allow_none=True) 
    labels_test = T.Instance(np.ndarray, allow_none=True)
    model = T.Instance(tpot.TPOTClassifier, allow_none=True)
    explainer = T.Instance(LimeTabularExplainer, allow_none=True)
app = App()

## The Dask

In [None]:
app.dask = dask.distributed.Client(n_workers=2, threads_per_worker=1)
daskboard_port = app.dask.scheduler_info()["services"]["dashboard"]

If we're running on JupyterHub (e.g. MyBinder), we will want to use the `JUPYTERHUB_SERVICE_PREFIX`. Otherwise, There's No Place Like `8888` 👠👠...

In [None]:
base_url = W.Text(os.environ.get("JUPYTERHUB_SERVICE_PREFIX", "http://127.0.0.1:8888/"), description="base_url")
base_url

In [None]:
dask_iframe = W.HTML()
T.dlink((base_url, "value"), (dask_iframe, "value"), lambda url: f"""
    <iframe src="{url}proxy/{daskboard_port}/tasks" width="100%" height="300" border="0" style="border: 0"></iframe>
""")
iframe_client = W.VBox([base_url, dask_iframe])

# Build grids

In [None]:
grid_train = SelectGrid(description="Training Data")
grid_test = SelectGrid(description="Test Data")
grid_labels_train = SelectGrid(description="Training Labels")
grid_labels_test = SelectGrid(description="Test Labels")

## Link Grids

In [None]:
for t in ["selections", "scroll_y"]:
    for p in [[grid_train, grid_labels_train], [grid_test, grid_labels_test]]:
        W.jslink((p[0], t), (p[1], t))

In [None]:
correlation = W.Output()
fitness = W.FloatSlider(0, min=0, max=1, description="🎯 Score", disabled=True)

train_output = W.Output(description="Training Output")

generations = W.IntSlider(10, min=1, max=100, description="👴 Generations", layout=dict(flex="1"))
population_size = W.IntSlider(7, min=1, max=100, description="👶 Population", layout=dict(flex="1"))
cv = W.IntSlider(2, min=1, max=10, description="❌ Cross Validation", layout=dict(flex="1"))
random_state = W.IntSlider(42, min=1, max=1e6, description="🎰 Random", layout=dict(flex="1"))
verbosity = W.IntSlider(2, min=1, max=11, description="📣 Verbosity", layout=dict(flex="1"))

## A dataset loader

In [None]:
@W.interact
def loader(
    dataset_name=W.SelectionSlider(options=["iris", "breast_cancer", "boston", "diabetes", "wine"], description="🗄 Dataset"),
    test_size=W.FloatSlider(0.8, min=0.000001, max=0.99999, step=0.01, description="⚖ Train/Test"),
    algorithm=W.SelectionSlider(options=['pearson', 'covariance', 'spearman'], description="🌡️ Correlation"),
):
    app.dataset = getattr(sklearn.datasets, f"load_{dataset_name}")()
    app.df = pd.DataFrame(app.dataset.data, columns=app.dataset.feature_names)
    (
        app.train, 
        app.test, 
        app.labels_train, 
        app.labels_test
    ) = sklearn.model_selection.train_test_split(
        app.dataset.data,
        app.dataset.target,
        train_size=1 - test_size,
        test_size=test_size
    )
    grid_train.value = pd.DataFrame(app.train)
    grid_test.value = pd.DataFrame(app.test)
    grid_labels_train.value = pd.DataFrame(app.labels_train)
    grid_labels_test.value = pd.DataFrame(app.labels_test)
    app.explainer = LimeTabularExplainer(
        app.train,
        feature_names=app.dataset.feature_names, 
        class_names=getattr(app.dataset, "target_names", None), 
        discretize_continuous=True
    )
    visualizer = yellowbrick.features.Rank2D(features=app.dataset.feature_names, algorithm=algorithm)
    visualizer.fit(app.train, app.labels_train)
    visualizer.transform(app.train)
    visualizer.poof()

## The Training Hyperparameters

In [None]:
btn_train = W.Button(description="🚂 Train", button_style="primary", layout=dict(width="100%"))

def trainer(*args, **kwargs):
    train_output.clear_output()
    with train_output:
        app.model = tpot.TPOTClassifier(
            generations=generations.value,
            population_size=population_size.value,
            cv=cv.value,
            n_jobs=-1,
            random_state=random_state.value,
            verbosity=verbosity.value,
            use_dask=True
        )

        _update = app.model._check_periodic_pipeline

        def update(gen):
            _update(gen)
            fitness.value = app.model._optimized_pipeline_score

        app.model._check_periodic_pipeline = update

    train_output.clear_output()

    with train_output, warnings.catch_warnings():
        fitness.value = 0
        warnings.simplefilter("ignore")
        app.model.fit(app.train, app.labels_train)
        fitness.value = app.model.score(app.test, app.labels_test)
        

btn_train.on_click(trainer)
        
box_train = W.VBox([
    btn_train,
    fitness,
    generations,
    population_size,
    cv,
    random_state,
    verbosity,
], description="Parameters", layout=dict(display="flex"))
display(box_train)
display(train_output)

## A fancy explainer

In [None]:
explain_instance = W.IntSlider(0, min=0, max=100, description="🤔 'Splain")
W.dlink((grid_test, "selections"), (explain_instance, "value"), lambda ts: ts[0][2] if ts else 0)
W.dlink((explain_instance, "value"), (grid_test, "selections"), lambda i: [[1, 1, i, i]])
W.dlink((grid_test, "value"), (explain_instance, "max"), len)
with W.Output():
    @W.interact
    def explain(instance=explain_instance):
        try: explain.widget.children[0].max = app.test.shape[0] - 1
        except: pass
        if app.explainer and app.model and hasattr(app.model, "predict_proba"):
            app.explainer.explain_instance(app.test[instance], app.model.predict_proba).show_in_notebook()

## A dock layout

In [None]:
dock_layout = {'type': 'split-area',
 'orientation': 'horizontal',
 'children': [{'type': 'split-area',
   'orientation': 'vertical',
   'children': [{'type': 'tab-area', 'widgets': [0], 'currentIndex': 0},
    {'type': 'tab-area', 'widgets': [5], 'currentIndex': 0}],
   'sizes': [1, 1]},
  {'type': 'split-area',
   'orientation': 'vertical',
   'children': [{'type': 'tab-area', 'widgets': [3], 'currentIndex': 0},
    {'type': 'tab-area', 'widgets': [1], 'currentIndex': 0}],
   'sizes': [1, 1]},
  {'type': 'split-area',
   'orientation': 'vertical',
   'children': [{'type': 'tab-area', 'widgets': [4], 'currentIndex': 0},
    {'type': 'tab-area', 'widgets': [2], 'currentIndex': 0}],
   'sizes': [1, 1]},
  {'type': 'split-area',
   'orientation': 'vertical',
   'children': [{'type': 'tab-area', 'widgets': [7], 'currentIndex': 0},
    {'type': 'tab-area', 'widgets': [6], 'currentIndex': 0},
    {'type': 'tab-area', 'widgets': [8], 'currentIndex': 0}],
   'sizes': [4, 1, 5]}],
 'sizes': [2, 2, 1, 4]}

## The Dock Panel

In [None]:
pg = DockBox(layout=dict(height="90vh"), dock_layout=dock_layout, children=[
    loader.widget, 
    grid_test, grid_labels_test, 
    grid_train, grid_labels_train, 
    box_train, train_output, iframe_client, explain.widget,
])
train_output.description = "Training Output"

## We made it

In [None]:
pg

In [None]:
if __name__ == "__main__":
    with __import__("importnb").Notebook():
        from wxyz.notebooks import Utils
        Utils.maybe_log_widget_counts()