# DashboardX Demo

This is a demo of the DashboardX project for visualising the learning curves during hyperparameter optimisation.

We will use a random classification generated by the sklearn's dataset package to demonstrate this tool for the output of the __GridSearchCV__. The interactive graph is based on __Plotly__ library.

#### Import libraries

In [12]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from bokeh.layouts import column, row
from bokeh.models import CustomJS, Slider, Band, Select, RadioGroup, CheckboxGroup, HoverTool, RadioButtonGroup
from bokeh.plotting import ColumnDataSource, figure, output_file, show
from bokeh.io import output_notebook
output_notebook()

from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve, recall_score, confusion_matrix

import time

#### Create dataset for classification

In [2]:
X, y = make_classification(n_samples=100000, n_features=50, n_informative=15, n_redundant=5,
                           n_classes=2, random_state=17)

# split train and test set 80-20%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=17)

### Perform GridSearchCV to tune the model

In [20]:
# set grid parameters
grid_param = {
    'n_estimators': range(20, 201, 20),
    'criterion': ['gini', 'entropy'],
    'max_features': range(4, 11),
    'max_depth': range(4, 7),
    'n_jobs': [4]
}

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=17)
scoring = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc']

gs = GridSearchCV(RandomForestClassifier(random_state=17), grid_param, scoring=scoring, n_jobs=6,
                  cv=skf, refit='roc_auc', verbose=2, return_train_score=True)

In [21]:
%%time
t1 = time.time()
gs.fit(X, y)
t2 = time.time()

Fitting 3 folds for each of 6 candidates, totalling 18 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  18 out of  18 | elapsed:   51.8s finished


Wall time: 59.8 s


In [None]:
# print results and best parameters
print(f"Time for grid search was {round((t2-t1)/60, 1)} seconds.")
print()

print(f"Best AUC score was {round(100*gs.best_score_, 2)} %.")
print()

print('Best parameters:')
print(gs.best_params_)

In [None]:
results = pd.DataFrame(gs.cv_results_)
results.head()[['param_criterion', 'param_max_depth', 'param_max_features', 'param_n_estimators', 'mean_train_roc_auc']]

In [4]:
# save/load results (pre-run etc.)
#results.to_pickle('cv_results.pkl')
results = pd.read_pickle('cv_results.pkl')

In [5]:
test = results[['param_criterion', 'param_max_depth', 'param_max_features',
                'param_n_estimators', 'mean_train_roc_auc', 'std_train_roc_auc']]
test.to_dict(orient='list')

{'param_criterion': ['gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
  'gini',
 

In [None]:
#import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [None]:
# Linear scale
df_gridsearch = results
trace = go.Scatter3d(
    x=df_gridsearch['param_n_estimators'],
    y=df_gridsearch['param_max_features'],
    z=df_gridsearch['param_max_depth'],
    mode='markers', 
    marker=dict(
        size=df_gridsearch.mean_fit_time ** (1 / 3),
        color=df_gridsearch.mean_train_roc_auc,
        opacity=0.99,
        colorscale='Viridis',
        colorbar=dict(title = 'Test score'),
        line=dict(color='rgb(140, 140, 170)')
    ),
)

data = [trace]
layout = go.Layout(
    title='3D visualization of the grid search results',
    margin=dict(
        l=30,
        r=30,
        b=30,
        t=30
    ),
#     height=600,
#     width=960,
    scene = dict(
        xaxis = dict(
            title='param_n_estimators',
            nticks=10
        ),
        yaxis = dict(
            title='mean_train_roc_auc',
        ),
        zaxis = dict(
            title='param_max_depth',

        ),
    ),
 
)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [6]:
results.columns

Index(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time',
       'param_criterion', 'param_max_depth', 'param_max_features',
       'param_n_estimators', 'param_n_jobs', 'params', 'split0_test_accuracy',
       'split1_test_accuracy', 'split2_test_accuracy', 'mean_test_accuracy',
       'std_test_accuracy', 'rank_test_accuracy', 'split0_train_accuracy',
       'split1_train_accuracy', 'split2_train_accuracy', 'mean_train_accuracy',
       'std_train_accuracy', 'split0_test_f1', 'split1_test_f1',
       'split2_test_f1', 'mean_test_f1', 'std_test_f1', 'rank_test_f1',
       'split0_train_f1', 'split1_train_f1', 'split2_train_f1',
       'mean_train_f1', 'std_train_f1', 'split0_test_precision',
       'split1_test_precision', 'split2_test_precision', 'mean_test_precision',
       'std_test_precision', 'rank_test_precision', 'split0_train_precision',
       'split1_train_precision', 'split2_train_precision',
       'mean_train_precision', 'std_train_precision', 'split0_

In [23]:
pd.DataFrame(gs1.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_n_jobs,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,11.386669,0.100669,0.353648,0.087794,gini,4,4,"{'criterion': 'gini', 'max_depth': 4, 'n_jobs'...",0.866713,0.867219,0.871059,0.86833,0.001941,5,0.871884,0.872621,0.871646,0.87205,0.000415
1,12.96494,0.055147,0.420443,0.012326,gini,5,4,"{'criterion': 'gini', 'max_depth': 5, 'n_jobs'...",0.886392,0.885099,0.891189,0.88756,0.00262,3,0.892629,0.890876,0.892481,0.891995,0.000794
2,13.955121,0.150355,0.423088,0.01273,gini,6,4,"{'criterion': 'gini', 'max_depth': 6, 'n_jobs'...",0.899562,0.902889,0.904539,0.90233,0.00207,1,0.907734,0.909115,0.9091,0.90865,0.000648
3,15.161724,0.055839,0.391178,0.061757,entropy,4,4,"{'criterion': 'entropy', 'max_depth': 4, 'n_jo...",0.865783,0.865989,0.868749,0.86684,0.001352,6,0.870714,0.869846,0.869351,0.86997,0.000563
4,18.72416,0.042106,0.388533,0.043045,entropy,5,4,"{'criterion': 'entropy', 'max_depth': 5, 'n_jo...",0.882732,0.883929,0.889239,0.8853,0.002828,4,0.890004,0.888551,0.891401,0.889985,0.001164
5,19.854448,0.14677,0.125536,0.000492,entropy,6,4,"{'criterion': 'entropy', 'max_depth': 6, 'n_jo...",0.899952,0.900159,0.903369,0.90116,0.001564,2,0.907914,0.905335,0.90688,0.90671,0.00106


In [7]:
test.columns

Index(['param_criterion', 'param_max_depth', 'param_max_features',
       'param_n_estimators', 'mean_train_roc_auc', 'std_train_roc_auc'],
      dtype='object')

In [29]:
results.columns

Index(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time',
       'param_criterion', 'param_max_depth', 'param_max_features',
       'param_n_estimators', 'param_n_jobs', 'params', 'split0_test_accuracy',
       'split1_test_accuracy', 'split2_test_accuracy', 'mean_test_accuracy',
       'std_test_accuracy', 'rank_test_accuracy', 'split0_train_accuracy',
       'split1_train_accuracy', 'split2_train_accuracy', 'mean_train_accuracy',
       'std_train_accuracy', 'split0_test_f1', 'split1_test_f1',
       'split2_test_f1', 'mean_test_f1', 'std_test_f1', 'rank_test_f1',
       'split0_train_f1', 'split1_train_f1', 'split2_train_f1',
       'mean_train_f1', 'std_train_f1', 'split0_test_precision',
       'split1_test_precision', 'split2_test_precision', 'mean_test_precision',
       'std_test_precision', 'rank_test_precision', 'split0_train_precision',
       'split1_train_precision', 'split2_train_precision',
       'mean_train_precision', 'std_train_precision', 'split0_

### Dashboard Design

In [17]:
#parameters
#check if results is dict(convert) or pandas
scoring = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc'] # Default will be "score" or None
train_score = True
colors = ['b', 'm'] #None default
plot_width=600
plot_height=600

#==================================
#check scoring
if scoring is None:
    scoring = ['score']
df_plot = test.copy()
cols = results.columns.to_numpy()
to_remove = ['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'params']
params = list()
params_ = list()
for col in cols:
    #skip non-important columns
    if ('split' in col) or (col in to_remove):
        continue
    #get params
    if 'param_' in col:
        if len(results[col].unique())==1:
            continue
        else:
            params_.append(col)
            col.replace('param_', '')
            params.append(col)

for item in params_[1:]:
    df_plot = df_plot[df_plot[item]==df_plot[item].unique()[0]]

#set data sources and tools
tools = "pan,wheel_zoom,box_zoom,hover,reset,save"
source = ColumnDataSource(data=test.to_dict(orient='list'))
xaxis_param = params_[0]


for score in scoring:
    df_plot['mean_test_'+score+'_lower'] = df_plot['mean_test_'+score]-df_plot['std_test_'+score]
    df_plot['mean_test_'+score+'_upper'] = df_plot['mean_test_'+score]+df_plot['std_test_'+score] 
source_plot = ColumnDataSource(data=df_plot[params_[1:]].to_dict(orient='list'))

plot = figure(y_range=(0, 1), plot_width=plot_width, plot_height=plot_height, tools=tools,
              x_axis_label=xaxis_param, y_axis_label='Score Performance')
for score in scoring:
    #test lines
    plot.line(xaxis_param, 'mean_test_'+score, source=source_plot, line_width=3, line_alpha=0.8)
    #test bands
    
    
    if train_score:
        #TODO: add training scores


        
        

plot.line('param_n_estimators', 'mean_train_roc_auc', source=source_plot, line_width=3, line_alpha=0.8)
#TODO: Add more metrics plots based on scoring???


#TODO: Add std as filling???
df_plot['lower'] = df_plot['mean_train_roc_auc']-df_plot['std_train_roc_auc']
df_plot['upper'] = df_plot['mean_train_roc_auc']+df_plot['std_train_roc_auc']
source_plot_band = ColumnDataSource(data=df_plot[['param_n_estimators', 'lower',
                                                  'upper']].to_dict(orient='list'))
band = Band(base='param_n_estimators', lower='lower', upper='upper', source=source_plot_band, 
            level='underlay', fill_alpha=1.0, line_width=1, line_color='black')
plot.add_layout(band)


#TODO: for changing axis name based on selection of RadioGroup
change_xaxis = CustomJS(args=dict(x_axis=plot.xaxis[0]),
                             code="""
                             x_axis.axis_label = "TEST";
                             """)


#TODO: Checkbox for all metrics needed???
checkbox_group = CheckboxGroup(labels=['LABELS','mean_train_roc_auc','mean_train_accuracy'], active=[0, 1])

'''
criterion_dict = {0: 'gini', 1: 'entropy'}
max_depth_dict = {0: 4, 1: 5, 2: 6}
max_feat_dict = {0: 4, 1: 5, 2: 6, 3: 7, 4: 8, 5: 9, 6: 10}
'''

#TODO: Dynamic slider(select) population?
criterion_select = Select(options=["gini", "entropy"], value='gini', title="Criterion")
max_depth_select = Select(options=['4','5','6'], value='4', title="Max Depth")
max_feat_select = Select(options=['4','5','6','7','8','9','10'], value='4', title="Max Features")

#TODO: Build Radio button group for x-axis parameter
radio_button_group = RadioButtonGroup(labels=['n_estimators','LABELS','LABELS1','LABELS2','LABELS3'], active=0)

#TODO: Checkbox for metrics dynamic???


#TODO: Dynamic slider population here too?
callback = CustomJS(args=dict(source=source, source_plot=source_plot, criterion=criterion_select,
                              max_depth=max_depth_select, max_feat=max_feat_select, elements=[max_feat_select]),
                    code="""
    function getAllIndexes(arr, val) {
        var indexes = [], i = -1;
        while ((i = arr.indexOf(val, i+1)) != -1){
            indexes.push(i);
        }
        return indexes;
    }
    
    const data = source.data;
    var plot_data = source_plot.data;
    const S1 = criterion.value;
    const S2 = max_depth.value;
    const S2_int = parseInt(S2, 10);
    const S3 = max_feat.value;
    const S3_int = parseInt(S3, 10);
    
    var indexes = getAllIndexes(data['param_criterion'], S1);
    var indexes2 = getAllIndexes(data['param_max_depth'], S2_int);
    var indexes3 = getAllIndexes(data['param_max_features'], S3_int);
    
    var filteredArray = indexes.filter(value => indexes2.includes(value));
    filteredArray = filteredArray.filter(value => indexes3.includes(value));
    
    var x_res = [];
    var y_res = [];
    for(var i = 0; i < filteredArray.length; i++) {
        x_res.push(data['param_n_estimators'][filteredArray[i]]);
        y_res.push(data['mean_train_roc_auc'][filteredArray[i]]);
    }
    
    plot_data['param_n_estimators'] = x_res;
    plot_data['mean_train_roc_auc'] = y_res;
    
    console.log(plot_data); //TODO: Remove
    source_plot.change.emit();
""")


'''
    for (var i = 0; i < x.length; i++) {
        y[i] = B + A*Math.sin(k*x[i]+phi);
    } 

listo = ['param_criterion', 'param_max_depth', 'param_max_features']

[['param_criterion', 'param_max_depth', 'param_max_features',
                'param_n_estimators', 'mean_train_roc_auc']]
'''

criterion_select.js_on_change('value', callback)
criterion_select.js_on_change('value', change_xaxis)

max_depth_select.js_on_change('value', callback)
max_feat_select.js_on_change('value', callback)

layout = row(
    column(plot, radio_button_group),
    column(criterion_select, max_depth_select, max_feat_select,
           checkbox_group), #TODO: Dynamic population?
)

#TODO: Save option
output_file("dashboardX.html", title="DashboardX")

#fig.show()
show(layout)

In [None]:
plot.xaxis[0]

In [None]:
df_plot

In [None]:
test['param_max_depth'].unique()

In [None]:
params_ = list()
cols = results.columns.to_numpy()
for col in cols: # maybe merge with next loop
    if 'param_' in col:
        if len(results[col].unique())==1 or 'n_estimators' in col: #TODO: remove testing parameter and add to dropbox??
            continue
        else:
            params_.append(col)
            col.replace('param_', '')
            params.append(col)

df_plot = test.copy()
for item in params_:
    df_plot = df_plot[df_plot[item]==df_plot[item].unique()[0]]

tools = "pan,wheel_zoom,box_zoom,hover,reset,save"
source = ColumnDataSource(data=test.to_dict(orient='list'))
source_plot = ColumnDataSource(data=df_plot[['param_n_estimators', 'mean_train_roc_auc',
                                             'std_train_roc_auc']].to_dict(orient='list'))

plot = figure(y_range=(0, 1), plot_width=600, plot_height=600, tools=tools,
              x_axis_label='n_estimators', y_axis_label='Score Performance')
plot.line('param_n_estimators', 'mean_train_roc_auc', source=source_plot, line_width=3, line_alpha=0.8)
#TODO: Add more metrics plots based on scoring???


#TODO: Add std as filling???
df_plot['lower'] = df_plot['mean_train_roc_auc']-df_plot['std_train_roc_auc']
df_plot['upper'] = df_plot['mean_train_roc_auc']+df_plot['std_train_roc_auc']
source_plot_band = ColumnDataSource(data=df_plot[['param_n_estimators', 'lower',
                                                  'upper']].to_dict(orient='list'))
band = Band(base='param_n_estimators', lower='lower', upper='upper', source=source_plot_band, 
            level='underlay', fill_alpha=1.0, line_width=1, line_color='black')
plot.add_layout(band)


#TODO: for changing axis name based on selection of RadioGroup
change_xaxis = CustomJS(args=dict(x_axis=plot.xaxis[0]),
                             code="""
                             x_axis.axis_label = "TEST";
                             """)


#TODO: Checkbox for all metrics needed???
checkbox_group = CheckboxGroup(labels=['LABELS','mean_train_roc_auc','mean_train_accuracy'], active=[0, 1])

'''
criterion_dict = {0: 'gini', 1: 'entropy'}
max_depth_dict = {0: 4, 1: 5, 2: 6}
max_feat_dict = {0: 4, 1: 5, 2: 6, 3: 7, 4: 8, 5: 9, 6: 10}
'''

#TODO: Dynamic slider(select) population?
criterion_select = Select(options=["gini", "entropy"], value='gini', title="Criterion")
max_depth_select = Select(options=['4','5','6'], value='4', title="Max Depth")
max_feat_select = Select(options=['4','5','6','7','8','9','10'], value='4', title="Max Features")

#TODO: Build Radio button group for x-axis parameter
radio_button_group = RadioButtonGroup(labels=['n_estimators','LABELS','LABELS1','LABELS2','LABELS3'], active=0)

#TODO: Checkbox for metrics dynamic???


#TODO: Dynamic slider population here too?
callback = CustomJS(args=dict(source=source, source_plot=source_plot, criterion=criterion_select,
                              max_depth=max_depth_select, max_feat=max_feat_select, elements=[max_feat_select]),
                    code="""
    function getAllIndexes(arr, val) {
        var indexes = [], i = -1;
        while ((i = arr.indexOf(val, i+1)) != -1){
            indexes.push(i);
        }
        return indexes;
    }
    
    const data = source.data;
    var plot_data = source_plot.data;
    const S1 = criterion.value;
    const S2 = max_depth.value;
    const S2_int = parseInt(S2, 10);
    const S3 = max_feat.value;
    const S3_int = parseInt(S3, 10);
    
    var indexes = getAllIndexes(data['param_criterion'], S1);
    var indexes2 = getAllIndexes(data['param_max_depth'], S2_int);
    var indexes3 = getAllIndexes(data['param_max_features'], S3_int);
    
    var filteredArray = indexes.filter(value => indexes2.includes(value));
    filteredArray = filteredArray.filter(value => indexes3.includes(value));
    
    var x_res = [];
    var y_res = [];
    for(var i = 0; i < filteredArray.length; i++) {
        x_res.push(data['param_n_estimators'][filteredArray[i]]);
        y_res.push(data['mean_train_roc_auc'][filteredArray[i]]);
    }
    
    plot_data['param_n_estimators'] = x_res;
    plot_data['mean_train_roc_auc'] = y_res;
    
    console.log(plot_data); //TODO: Remove
    source_plot.change.emit();
""")


'''
    for (var i = 0; i < x.length; i++) {
        y[i] = B + A*Math.sin(k*x[i]+phi);
    } 

listo = ['param_criterion', 'param_max_depth', 'param_max_features']

[['param_criterion', 'param_max_depth', 'param_max_features',
                'param_n_estimators', 'mean_train_roc_auc']]
'''

criterion_select.js_on_change('value', callback)
criterion_select.js_on_change('value', change_xaxis)

max_depth_select.js_on_change('value', callback)
max_feat_select.js_on_change('value', callback)

layout = row(
    column(plot, radio_button_group),
    column(criterion_select, max_depth_select, max_feat_select,
           checkbox_group), #TODO: Dynamic population?
)

#TODO: Save option
output_file("dashboardX.html", title="DashboardX")

#fig.show()
show(layout)

In [None]:
listo = ['param_criterion', 'param_max_depth', 'param_max_features']

def fun(): 
    letters = listo
    print(letters)

fun()

In [None]:
df_plot = test.copy()

for item in listo:
    df_plot = df_plot[df_plot[item]==df_plot[item].unique()[0]]

df_plot

In [None]:
test[(test['param_criterion']=='gini') & (test['param_max_depth']==4) & (test['param_max_features']==4)]

In [None]:
def listToString(s):  
    
    # initialize an empty string 
    str1 = " " 
    
    # return string   
    return (str1.join(s))

cols = results.columns.to_numpy()
listToString(cols)

In [None]:
sns.set_style('whitegrid')
fig = go.Figure()

if w==None:
    w = [None for i in range(len(models))]
theta_labels = ['Accuracy', 'Precision', 'Recall', 'Specificity',
                'F-Score', 'Accuracy']
color_pallete = ['peru', 'darkviolet', 'deepskyblue', 'black', 'yellow'
                 'red', 'green', 'blue']

for cutoff in np.arange(0.2, 0.825, 0.025):
    for i, model in enumerate(models):
        metrics = [0, 0, 0, 0, 0, 0]
        # calculate gini
        y_pred_proba = clf[i].predict_proba(X_test[i])[::, 1]
        fpr, tpr, _ = roc_curve(y_test[i], y_pred_proba, sample_weight=w[i])
        AUC = roc_auc_score(y_test[i], y_pred_proba, sample_weight=w[i])
        Gini = (2*AUC-1)*100

        # calculate other metrics based on cut-off point
        y_pred = np.where(y_pred_proba < cutoff, 0, 1)
        cm = confusion_matrix(y_test[i], y_pred)
        tn, fp, fn, tp = confusion_matrix(y_test[i], y_pred).ravel()
        print(cm)
        print(len(tn),len(fp),len(fn),len(tp))
        total = np.sum(cm)
        metrics[0] = (tp+tn)/total*100 # Accuracy
        metrics[1] = tp/(tp+fp)*100 # Precision
        metrics[2] = tp/(tp+fn)*100 # Recall/Sensitivity/TPR
        metrics[3] = tn/(tn+fp)*100 # Specificity
        metrics[4] = 2*metrics[1]*metrics[2]/(metrics[1]+metrics[2]) # F-Score
        metrics[5] = metrics[0]

        # add radar plots
        fig.add_trace(go.Scatterpolar(r=metrics, theta=theta_labels,
                                      mode='lines', visible=False,
                                      line_color=color_pallete[i],
                                      name=model+'(Gini:'+str(round(Gini, 1))+'%)'))

# make one step visible (n models in total)
for j in range(len(models)):
    fig.data[12*len(models)+j].visible = True

# Create and add slider
cutoffs = []
for i in range(int(len(fig.data)/len(models))):
    cutoff = dict(method='restyle', args=['visible', [False]*len(fig.data)],
                  label=str(20+i*2.5)+'%',)
    for j in range(len(models)):
        cutoff['args'][1][i*len(models)+j] = True # Toggle (i*n*j)'th trace to "visible"
    cutoffs.append(cutoff)

sliders = [dict(active=12, currentvalue={'prefix': 'Cut-off: '},
                pad={'t': 50}, steps=cutoffs)]

fig.update_layout(sliders=sliders, showlegend=True,
                  title='Model Comparison on Different Cut-offs',
                  polar=dict(radialaxis_angle=30, radialaxis_range=[20, 100],
                             angularaxis=dict(direction='clockwise', period=5)))

fig.show()

In [None]:
def metrics_slider_plot(X_test, y_test, clf, models, w=None):
    '''
    Interactive plot for metrics and Gini with slider for different
    cut-off points. Works with Plotly and Javascript (in Jupyter
    Notebooks).
    Parameters
    ----------
    X_test: list of all X_test datasets for prediction.
    
    y_test: list of all y_test datasets.
    
    clf: list of trained  classifiers.
    
    models: list of strings with a description about the model.
    
    w: list of sample weights, if applicable to the model.
    For mixed models, use [w1, None, w3, ...].
    '''
    sns.set_style('whitegrid')
    fig = go.Figure()

    if w==None:
        w = [None for i in range(len(models))]
    theta_labels = ['Accuracy', 'Precision', 'Recall', 'Specificity',
                    'F-Score', 'Accuracy']
    color_pallete = ['peru', 'darkviolet', 'deepskyblue', 'black', 'yellow'
                     'red', 'green', 'blue']
    
    for cutoff in np.arange(0.2, 0.825, 0.025):
        for i, model in enumerate(models):
            metrics = [0, 0, 0, 0, 0, 0]
            # calculate gini
            y_pred_proba = clf[i].predict_proba(X_test[i])[::, 1]
            fpr, tpr, _ = roc_curve(y_test[i], y_pred_proba, sample_weight=w[i])
            AUC = roc_auc_score(y_test[i], y_pred_proba, sample_weight=w[i])
            Gini = (2*AUC-1)*100

            # calculate other metrics based on cut-off point
            y_pred = np.where(y_pred_proba < cutoff, 0, 1)
            cm = confusion_matrix(y_test[i], y_pred)
            tn, fp, fn, tp = confusion_matrix(y_test[i], y_pred).ravel()
            total = np.sum(cm)
            metrics[0] = (tp+tn)/total*100 # Accuracy
            metrics[1] = tp/(tp+fp)*100 # Precision
            metrics[2] = tp/(tp+fn)*100 # Recall/Sensitivity/TPR
            metrics[3] = tn/(tn+fp)*100 # Specificity
            metrics[4] = 2*metrics[1]*metrics[2]/(metrics[1]+metrics[2]) # F-Score
            metrics[5] = metrics[0]

            # add radar plots
            fig.add_trace(go.Scatterpolar(r=metrics, theta=theta_labels,
                                          mode='lines', visible=False,
                                          line_color=color_pallete[i],
                                          name=model+'(Gini:'+str(round(Gini, 1))+'%)'))
    print(fig)
    #print(type(fig))
    
    # make one step visible (n models in total)
    for j in range(len(models)):
        fig.data[12*len(models)+j].visible = True
    
    # Create and add slider
    cutoffs = []
    for i in range(int(len(fig.data)/len(models))):
        cutoff = dict(method='restyle', args=['visible', [False]*len(fig.data)],
                      label=str(20+i*2.5)+'%',)
        for j in range(len(models)):
            cutoff['args'][1][i*len(models)+j] = True # Toggle (i*n*j)'th trace to "visible"
        cutoffs.append(cutoff)
    
    sliders = [dict(active=12, currentvalue={'prefix': 'Cut-off: '},
                    pad={'t': 50}, steps=cutoffs)]
    
    fig.update_layout(sliders=sliders, showlegend=True,
                      title='Model Comparison on Different Cut-offs',
                      polar=dict(radialaxis_angle=30, radialaxis_range=[20, 100],
                                 angularaxis=dict(direction='clockwise', period=5)))
    print('===================================')
    print(fig)
    
    fig.show()

In [None]:
metrics_slider_plot([X_test], [y_test], gs.best_estimator_, ['test'])

In [None]:
metrics_slider_plot([X_test], [y_test], gs.best_estimator_, ['test'])

In [None]:
import numpy as np

from bokeh.layouts import column, row
from bokeh.models import CustomJS, Slider
from bokeh.plotting import ColumnDataSource, figure, output_file, show
from bokeh.io import output_notebook

output_notebook()

x = np.linspace(0, 10, 500)
y = np.sin(x)

source = ColumnDataSource(data=dict(x=x, y=y))

plot = figure(y_range=(-10, 10), plot_width=600, plot_height=600)

plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)

amp_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Amplitude")
freq_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Frequency")
phase_slider = Slider(start=0, end=6.4, value=0, step=.1, title="Phase")
offset_slider = Slider(start=-5, end=5, value=0, step=.1, title="Offset")

callback = CustomJS(args=dict(source=source, amp=amp_slider, freq=freq_slider, phase=phase_slider, offset=offset_slider),
                    code="""
    const data = source.data;
    const A = amp.value;
    const k = freq.value;
    const phi = phase.value;
    const B = offset.value;
    const x = data['x']
    const y = data['y']
    for (var i = 0; i < x.length; i++) {
        y[i] = B + A*Math.sin(k*x[i]+phi);
    }
    source.change.emit();
""")

amp_slider.js_on_change('value', callback)
freq_slider.js_on_change('value', callback)
phase_slider.js_on_change('value', callback)
offset_slider.js_on_change('value', callback)

layout = row(
    plot,
    column(amp_slider, freq_slider, phase_slider, offset_slider),
)

output_file("slider.html", title="slider.py example")

show(layout)

In [None]:
import numpy as np

from bokeh.layouts import column, row
from bokeh.models import CustomJS, Slider
from bokeh.plotting import ColumnDataSource, figure, output_file, show
from bokeh.io import output_notebook

output_notebook()

x = np.linspace(0, 10, 500)
y = np.sin(x)

source = ColumnDataSource(data=dict(x=x, y=y))

tools = "pan,wheel_zoom,box_zoom,reset,save"
plot = figure(y_range=(-10, 10), plot_width=600, plot_height=600, tools=tools)

plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)

amp_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Amplitude")
freq_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Frequency")
phase_slider = Slider(start=0, end=6.4, value=0, step=.1, title="Phase")
offset_slider = Slider(start=-5, end=5, value=0, step=.1, title="Offset")

callback = CustomJS(args=dict(source=source, amp=amp_slider, freq=freq_slider, phase=phase_slider, offset=offset_slider),
                    code="""
    const data = source.data;
    const A = amp.value;
    const k = freq.value;
    const phi = phase.value;
    const B = offset.value;
    const x = data['x']
    const y = data['y']
    for (var i = 0; i < x.length; i++) {
        y[i] = B + A*Math.sin(k*x[i]+phi);
    }
    source.change.emit();
""")

amp_slider.js_on_change('value', callback)
freq_slider.js_on_change('value', callback)
phase_slider.js_on_change('value', callback)
offset_slider.js_on_change('value', callback)

layout = row(
    plot,
    column(amp_slider, freq_slider, phase_slider, offset_slider),
)

output_file("slider.html", title="slider.py example")

show(layout)