In [None]:
from pathlib import Path

import pandas as pd

import joblib
import category_encoders
from sklearn.manifold import TSNE

import plotly.express as px

from jupyter_dash import JupyterDash

import dash_table as dt
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, ALL

from tqdm.auto import tqdm

# Parameters

In [None]:
fname_in = snakemake.input.fname

model_list = snakemake.params.model_list
dataset_list = snakemake.params.dataset_list

notebook_wd = Path('.')

# Read data

In [None]:
df = pd.read_csv(fname_in)
df.head()

# Create Dashboard

## Helper functions

In [None]:
def load_dataset(dataset):
    return pd.read_csv(notebook_wd / 'resources' / 'datasets' / f'{dataset}.csv')

In [None]:
def do_tsne(df):
    df_sub = df.filter(regex='^(?!target__)')  # remove response
    target = df.filter(regex='^target__').squeeze()
    
    df_sub.fillna(-1, inplace=True)
    df_sub = category_encoders.OrdinalEncoder().fit_transform(df_sub)
    
    tsne = TSNE(n_components=2, random_state=0)
    proj = tsne.fit_transform(df_sub)
    
    return tsne, pd.DataFrame(proj, columns=['TSNE_0', 'TSNE_1'], index=target)

In [None]:
def load_model(model, dataset):
    return joblib.load(notebook_wd / 'results' / 'models' / f'{model}__{dataset}.pkl')

## Setup environment

### Precompute t-SNE projections

In [None]:
TSNE_CACHE = {}

for dataset in tqdm(dataset_list):
    df_data = load_dataset(dataset)
    model, df_proj = do_tsne(df_data)
    
    TSNE_CACHE[dataset] = {
        'model': model,
        'df_proj': df_proj,
        'df_data': df_data
    }

### Preload models

In [None]:
MODEL_CACHE = {}

for dataset in tqdm(dataset_list):
    MODEL_CACHE[dataset] = {}  # yeah, defaultdicts exists
    for model in tqdm(model_list, leave=False):
        MODEL_CACHE[dataset][model] = load_model(model, dataset)

## The actual App

In [None]:
# plotting
def update_figure():
    return px.bar(
        df, x='model', y='accuracy',
        facet_col='dataset',
        barmode='group', title='Overall accuracy'
    )

# create app
app = JupyterDash(__name__)

app.layout = html.Div([
    html.H1('Auto-ML Benchmark results'),
    dcc.Graph(figure=update_figure()),
    html.Hr(),
    dcc.Dropdown(
        id='dataset-selector',
        options=[{'label': name, 'value': name} for name in dataset_list],
        value='iris'
    ),
    html.Div(id='covariate-container'),
    dcc.Graph(id='projection-container'),
    dt.DataTable(
        id='prediction-container',
        columns=[
            {'name': 'Model', 'id': 'model'},
            {'name': 'Prediction', 'id': 'prediction'},
            {'name': 'Performance', 'id': 'performance'}
        ],
        sort_action='native'
    )
])

# plots with callbacks
@app.callback(
    Output('covariate-container', 'children'),
    [Input('dataset-selector', 'value')]
)
def assmeble_input_fields(dataset):
    df_data = load_dataset(dataset)
    
    children = []
    for col in df_data.columns:
        if col.startswith('target__'):
            continue
        
        children.append(dcc.Input(
            id={'type': 'covariate', 'name': col},
            placeholder=col,
            debounce=True
        ))
    
    return children

@app.callback(
    Output('projection-container', 'figure'),
    [
        Input('dataset-selector', 'value'),
        Input({'type': 'covariate', 'name': ALL}, 'value')
    ]
)
def plot_tsne_projection(dataset, *covariates):
    model = TSNE_CACHE[dataset]['model']
    df_proj = TSNE_CACHE[dataset]['df_proj']
    df_data = TSNE_CACHE[dataset]['df_data']
    
    if len(covariates[0]) > 0 and None not in covariates[0]:
        # when adding a new point, we do have to refit the projection
        cov_columns = df_data.filter(regex='^(?!target__)').columns
        target_column = df_data.filter(regex='^target__').columns[0]
        
        df_data = df_data.append({
            target_column: 'MANUAL',
            **dict(zip(cov_columns, covariates[0]))
        }, ignore_index=True)

        _, df_proj = do_tsne(df_data)
    
    return px.scatter(
        df_proj, x='TSNE_0', y='TSNE_1',
        color=df_proj.index,
        title='t-SNE projection'
    )

@app.callback(
    Output('prediction-container', 'data'),
    [
        Input('dataset-selector', 'value'),
        Input({'type': 'covariate', 'name': ALL}, 'value')
    ]
)
def plot_model_predictions(dataset, *covariates):
    if len(covariates[0]) == 0 or None in covariates[0]:
        return []
    
    # assmble current input
    df_data = load_dataset(dataset)
    cov_columns = df_data.filter(regex='^(?!target__)').columns
    df_cur = pd.DataFrame({
        **dict(zip(cov_columns, covariates[0]))
    }, index=[0]).apply(pd.to_numeric)
    
    # fit models
    data = []
    for model in model_list:
        # model prediction
        clf = MODEL_CACHE[dataset][model]
        pred = clf.predict(df_cur)
        
        # model performance over whole dataset
        tmp = df.loc[(df['model'] == model) & (df['dataset'] == dataset), 'accuracy']
        assert tmp.shape[0] == 1
        perf = tmp.iloc[0]
        
        # assemble table
        data.append({
            'model': model,
            'prediction': pred,
            'performance': perf
        })
    
    return data

# run app
app.run_server(mode='jupyterlab')