<img src="./qarnot_ligne.png" 
     width="30%" 
     align=right
     alt="Dask logo">
     

# MVP AutoML

## Add your Qarnot token

In [1]:
import os
import io
import pandas as pd
import ipywidgets as widgets
from tkinter import Tk, filedialog
from IPython.display import clear_output, display, HTML

In [2]:
token = widgets.Password(
    placeholder='Enter token',
    description='Qarnot token:',
    disabled=False
)
display(token)

Password(description='Qarnot token:', placeholder='Enter token')

# Upload your data to binder 

In [3]:
file = widgets.FileUpload(
    accept='.csv',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)
display(file)

FileUpload(value={}, accept='.csv', description='Upload')

## Specify Autosklearn training parameters

* Only the first two field, *Target Column* and *Positive Label*, are required so make sure to upload your data file and re-run the cell below. The rest are optional and have default values.

* Multiple values can be selected, for the lists of estimators and preprocessors to include/exclude, with <kbd>shift</kbd> and/or <kbd>ctrl</kbd> (or <kbd>command</kbd>) pressed and mouse clicks or arrow keys.

    > Note that the include and exclude parameters are incompatible with each other. Meaning that only one should be set .i.e you cannot include and exclude the same estimator.

* You can check out the [Auto-sklearn documentation](https://automl.github.io/auto-sklearn/master/manual.html) for more info.

In [4]:
estimators = [
    ('Default', 'None'),('Adaboost', 'adaboost'), ('Bernoulli Naive Bayes', 'bernoulli_nb'),
    ('Decision Tree', 'decision_tree'), ('Extra Trees', 'extra_trees'), ('Gaussian Naive Bayes', 'gaussian_nb'),
    ('Gradient Boosting', 'gradient_boosting'), ('K Nearest Neighbors', 'k_nearest_neighbors'), ('LDA', 'lda'),
    ('Linear SVC', 'liblinear_svc'), ('SVM SVC', 'libsvm_svc'), ('MLP', 'mlp'), 
    ('Multinominal Naive Bayes', 'multinomial_nb'), ('Passive Aggressive', 'passive_aggressive'), 
    ('QDA', 'qda'), ('Random Forest', 'random_forest'), ('SGD', 'sgd')
]

preprocessors = [
    ('Default', 'None'), ('Balancing', 'balancing'),('Extra Trees', 'extra_trees_preproc_for_classification'), 
    ('Fast ICA', 'fast_ica'), ('Feature Agglomeration','feature_agglomeration'), ('Kernel PCA', 'kernel_pca'),
    ('Kitchen Sinks', 'kitchen_sinks'), ('Linear SVM preprocessor', 'liblinear_svc_preprocessor'),
    ('No Preporcessing', 'no_preprocessor'), ('Nystroem Sampler', 'nystroem_sampler'), 
    ('One Hot Encoding', 'one_hot_encoding'), ('PCA', 'pca'), ('Polynomial', 'polynomial'),
    ('Random Trees Embedding', 'random_trees_embedding'), ('Select Percentile', 'select_percentile'), 
    ('Select Rates', 'select_rates_classification')
]

In [5]:
from ipywidgets import Layout, Button, Box, Label, BoundedIntText, IntSlider, Dropdown, SelectMultiple

form_item_layout = Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)

form_items = [
    Box([Label(value='Number of nodes in cluster:'),
        IntSlider(value=3,min=2,max=20,step=1)], layout=form_item_layout),
    Box([Label(value='Total training time (min):'), 
         BoundedIntText(value=15, min=0)], layout=form_item_layout),
    Box([Label(value='Per run training time (min):'), 
         BoundedIntText(value=5, min=0)], layout=form_item_layout),
    Box([Label(value='Number of cross validation folds:'),
        IntSlider(value=3,min=2,max=10,step=1)], layout=form_item_layout),
    Box([Label(value='Maximum ensemble size:'),
        BoundedIntText(value=50, min=1)], layout=form_item_layout),
    Box([Label(value='Ensemble nbest:'),
        BoundedIntText(value=50, min=0)], layout=form_item_layout),
    Box([Label(value='Include Estimators and Preprocessors:'),
        SelectMultiple(options=estimators,value=['None']),
        SelectMultiple(options=preprocessors,value=['None'])], layout=form_item_layout),
    Box([Label(value='Exclude Estimators and Preprocessors:'),
        SelectMultiple(options=estimators,value=['None']),
        SelectMultiple(options=preprocessors,value=['None'])], layout=form_item_layout)
]

if len(file.value)==0:
    target_col = Box([widgets.HTML(value = f"<b><font color=#f39c12>{'Import file and re-run cell'}</b>")],
                    layout=form_item_layout)
else:
    # create input folder
    ! mkdir -p input_binder/
    
    # write uploaded data file to input
    data = pd.read_csv(io.BytesIO(file.value[list(file.value)[0]]['content']))
    data.to_csv('input_binder/data.csv')
    
    target_col = Box([Label(value='Target column for classification:'),
                      Dropdown(options=data.columns)], layout=form_item_layout)
    
    labels = data['class'].unique()
    if len(labels) == 2:
        label_list = Box([Label(value='Positive Label:'),
                          Dropdown(options=labels)], layout=form_item_layout)
        form_items.insert(0, label_list)

form_items.insert(0, target_col)

form = Box(form_items, layout=Layout(
    display='flex',
    flex_flow='column',
    align_items='stretch',
    width='100%'
))
form

Box(children=(Box(children=(HTML(value='<b><font color=#f39c12>Import file and re-run cell</b>'),), layout=Lay…

## Launch the Qarnot computation

In [6]:
from run_qarnot import submit_task
from ipywidgets import Output

button = Button(description="Start Training on Qarnot!", layout=Layout(width='auto'))
output = Output()
display(button, output)

def on_button_clicked(b):
    
    # Dictionary for data storage
    param_dict = {
        'token':'', 'target':'', 'pos_label':'', 'nodes':'', 'total_time':'', 'per_run_time':'', 'cv':'', 
        'ensemble_size':'', 'ensemble_nbest':'', 'incl_estim':(), 'incl_preproc':(), 'excl_estim':(), 
        'excl_preproc':()
    }
    param_list = list(param_dict)
    
    # Retrieve data from form in dictionary
    param_dict['token'] = token.value
    for _, (key, elem) in enumerate(zip(param_list[1:-4], form.children[:-2])):
        param_dict[key] = str(elem.children[1].value)
    for _, (key, elem) in enumerate(zip(param_list[-4:-2], form.children[-2].children[1:])):
        param_dict[key] = elem.value
    for _, (key, elem) in enumerate(zip(param_list[-2:], form.children[-1].children[1:])):
        param_dict[key] = elem.value
    
    # Launch computation
    output.clear_output()
    with output:
        try:
            ! mkdir -p logs/ output_binder
            submit_task(param_dict)

        except AttributeError:
            print("Some fields were not properly filled") 
        
button.on_click(on_button_clicked)

Button(description='Start Training on Qarnot!', layout=Layout(width='auto'), style=ButtonStyle())

Output()

## Display outputs

In [7]:
from ipywidgets import HBox, VBox, Image

output_button = Button(description="Display outputs", layout=Layout(width='auto'))
image_output = Output()
display(output_button, image_output)

def on_output_button_clicked(b):
    
    image_output.clear_output()
    with image_output:
        try:
            vb = VBox()
            title = widgets.HTML(value='<{size}>AutoML results</{size}>'.format(size='h3'))
            vb.layout.align_items = 'center'
            # Read images from file
            img1 = open('output_binder/confusion_matrix.png', 'rb').read()
            img2 = open('output_binder/acc_over_time.png', 'rb').read()
            # Set image variable, image format and dimension.
            wi1 = Image(value=img1, format='png', width=500, height=500)
            wi2 = Image(value=img2, format='png', width=450, height=500)
            # Side by side display
            images = HBox([wi1, wi2])
            vb.children = [title, images]
            display(vb)

        except FileNotFoundError:
            print("Result files not available")
        
output_button.on_click(on_output_button_clicked)

Button(description='Display outputs', layout=Layout(width='auto'), style=ButtonStyle())

Output()

In [8]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')