In [12]:
%%html
<script>
    // AUTORUN ALL CELLS ON NOTEBOOK-LOAD!
    require(
        ['base/js/namespace', 'jquery'], 
        function(jupyter, $) {
            $(jupyter.events).on("kernel_ready.Kernel", function () {
                console.log("Auto-running all cells-below...");
                jupyter.actions.call('jupyter-notebook:run-all-cells-below');
                jupyter.actions.call('jupyter-notebook:save-notebook');
            });
        }
    );
</script>

In [None]:
%%html
<script>
    $([IPython.events]).on("kernel_ready.Kernel", function () {
        $('div#header-container').hide();
        $('div#maintoolbar').hide();
    });
</script>

In [None]:
%%html
<script>
    code_show=true; 
    function code_toggle() {
        if (code_show){
            $('div.input').hide();
        } else {
            $('div.input').show();
        }
        code_show = !code_show
    } 
    $( document ).ready(code_toggle);
</script>

<img src="./qarnot_ligne.png" 
     width="30%" 
     align=right
     alt="Dask logo">
     

# AutoML on Qarnot

## Add your Qarnot token

In [None]:
import os
import io
import pandas as pd
import ipywidgets as widgets
from tkinter import Tk, filedialog
from IPython.display import clear_output, display, HTML

In [None]:
token = widgets.Password(
    placeholder='Enter token',
    description='Qarnot token:',
    disabled=False
)
display(token)

# Upload your data to binder 

In [None]:
from IPython.display import Javascript, display

file = widgets.FileUpload(
    accept='.csv',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)
display(file)

def on_upload_change(change):
    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.ncells())'))

file.observe(on_upload_change, names='_counter')

## Specify Autosklearn training parameters

* The AutoML parameters are divided into two categories, **Basic** and **Optional**. You can choose to set only the basic parameters for your task to work, the rest are optional.

* Multiple values can be selected, for the lists of estimators and preprocessors to include/exclude, with <kbd>shift</kbd> and/or <kbd>ctrl</kbd> (or <kbd>command</kbd>) pressed and mouse clicks or arrow keys.

    > Note that the include and exclude parameters are incompatible with each other. Meaning that only one should be set. For example, you cannot include the `Adaboost` estimator and exclude the `Decision Tree` and `Extra Trees` as they are already excluded by setting the first include parameter.

* You can check out the [Auto-sklearn documentation](https://automl.github.io/auto-sklearn/master/manual.html) for more info.

In [None]:
estimators = [
    ('Default', 'None'),('Adaboost', 'adaboost'), ('Bernoulli Naive Bayes', 'bernoulli_nb'),
    ('Decision Tree', 'decision_tree'), ('Extra Trees', 'extra_trees'), ('Gaussian Naive Bayes', 'gaussian_nb'),
    ('Gradient Boosting', 'gradient_boosting'), ('K Nearest Neighbors', 'k_nearest_neighbors'), ('LDA', 'lda'),
    ('Linear SVC', 'liblinear_svc'), ('SVM SVC', 'libsvm_svc'), ('MLP', 'mlp'), 
    ('Multinominal Naive Bayes', 'multinomial_nb'), ('Passive Aggressive', 'passive_aggressive'), 
    ('QDA', 'qda'), ('Random Forest', 'random_forest'), ('SGD', 'sgd')
]

preprocessors = [
    ('Default', 'None'), ('Balancing', 'balancing'),('Extra Trees', 'extra_trees_preproc_for_classification'), 
    ('Fast ICA', 'fast_ica'), ('Feature Agglomeration','feature_agglomeration'), ('Kernel PCA', 'kernel_pca'),
    ('Kitchen Sinks', 'kitchen_sinks'), ('Linear SVM preprocessor', 'liblinear_svc_preprocessor'),
    ('No Preporcessing', 'no_preprocessor'), ('Nystroem Sampler', 'nystroem_sampler'), 
    ('One Hot Encoding', 'one_hot_encoding'), ('PCA', 'pca'), ('Polynomial', 'polynomial'),
    ('Random Trees Embedding', 'random_trees_embedding'), ('Select Percentile', 'select_percentile'), 
    ('Select Rates', 'select_rates_classification')
]

In [None]:
from ipywidgets import Layout, Button, Box, Label, BoundedIntText, IntSlider, Dropdown, SelectMultiple, Text

form_item_layout = Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)

form_items_1 = [
    Box([Label(value='Task name:'),
        Text(value='automl-binder', placeholder='Enter task name')], layout=form_item_layout),
    Box([Label(value='Input bucket name:'),
        Text(value='automl-binder-in', placeholder='Enter input bucket name')], layout=form_item_layout),
    Box([Label(value='Output bucket name:'),
        Text(value='automl-binder-out', placeholder='Enter input bucket name')], layout=form_item_layout),
    Box([Label(value='Number of nodes in cluster:'),
        IntSlider(value=3,min=2,max=20,step=1)], layout=form_item_layout),
    Box([Label(value='Total training time (min):'), 
         BoundedIntText(value=15, min=0)], layout=form_item_layout),
    Box([Label(value='Per run training time (min):'), 
         BoundedIntText(value=5, min=0)], layout=form_item_layout)
]

form_items_2 = [
    Box([Label(value='Number of cross validation folds:'),
        IntSlider(value=3,min=2,max=10,step=1)], layout=form_item_layout),
    Box([Label(value='Maximum ensemble size:'),
        BoundedIntText(value=50, min=1)], layout=form_item_layout),
    Box([Label(value='Ensemble nbest:'),
        BoundedIntText(value=50, min=0)], layout=form_item_layout),
    Box([Label(value='Include Estimators and Preprocessors:'),
        SelectMultiple(options=estimators,value=['None']),
        SelectMultiple(options=preprocessors,value=['None'])], layout=form_item_layout),
    Box([Label(value='Exclude Estimators and Preprocessors:'),
        SelectMultiple(options=estimators,value=['None']),
        SelectMultiple(options=preprocessors,value=['None'])], layout=form_item_layout)
]

if len(file.value)==0:
    target_col = Box([widgets.HTML(value = f"<b style='color:orange;font-size:15px;'>{'Target column will be available once you have uploaded your file'}</b>")],
                    layout=form_item_layout)
else :
    # create input folder
    ! mkdir -p input_binder/
    
    # write uploaded data file to input
    data = pd.read_csv(io.BytesIO(file.value[list(file.value)[0]]['content']))
    data.to_csv('input_binder/data.csv', index=False)
    
    target_col = Box([Label(value='Target column for classification:'),
                      Dropdown(options=data.columns)], layout=form_item_layout)

form_items_1.insert(0, target_col)

form_1 = Box(form_items_1, layout=Layout(
    display='flex',
    flex_flow='column',
    align_items='stretch',
    width='100%'
))

form_2 = Box(form_items_2, layout=Layout(
    display='flex',
    flex_flow='column',
    align_items='stretch',
    width='100%'
))

tab = widgets.Tab()
tab.children = [form_1, form_2]
tab.set_title(index = 0, title = 'Basic Parameters')
tab.set_title(index = 1, title = 'Optional Parameters')
tab

## Launch the Qarnot computation

In [None]:
from run_qarnot import submit_task
from ipywidgets import Output

button = Button(description="Start Training on Qarnot!", layout=Layout(width='auto'))
output = Output()
display(button, output)

def on_button_clicked(b):
    
    # Dictionary for data storage
    param_dict = {
        'token':'', 'target':'', 'task':'', 'in_bucket':'', 'out_bucket':'','nodes':'', 'total_time':'',
        'per_run_time':'', 'cv':'', 'ensemble_size':'', 'ensemble_nbest':'', 'incl_estim':(),
        'incl_preproc':(), 'excl_estim':(), 'excl_preproc':()
    }
    param_list = list(param_dict)
    
    # Retrieve data from form in dictionary
    try:
        param_dict['token'] = token.value
        for _, (key, elem) in enumerate(zip(param_list[1:8], form_1.children)):
            param_dict[key] = str(elem.children[1].value)
        for _, (key, elem) in enumerate(zip(param_list[8:-4], form_2.children[:-2])):
            param_dict[key] = str(elem.children[1].value)
        for _, (key, elem) in enumerate(zip(param_list[-4:-2], form_2.children[-2].children[1:])):
            param_dict[key] = elem.value
        for _, (key, elem) in enumerate(zip(param_list[-2:], form_2.children[-1].children[1:])):
            param_dict[key] = elem.value
    except IndexError:
        print("Some fields were not properly filled")
    
    # Launch computation
    output.clear_output()
    with output:
        ! mkdir -p logs/
        submit_task(param_dict)
        
button.on_click(on_button_clicked)

## Abort Task

If you want to monitor the task's progress or abort it, you can do so from the [Console](https://console.qarnot.com/app/tasks).

## Display outputs

In [None]:
from ipywidgets import HBox, VBox, Image

output_button = Button(description="Display outputs", layout=Layout(width='auto'))
image_output = Output()
display(output_button, image_output)

def on_output_button_clicked(b):
    
    image_output.clear_output()
    with image_output:
        try:
            vb = VBox()
            title = widgets.HTML(value='<{size}>AutoML results</{size}>'.format(size='h3'))
            vb.layout.align_items = 'center'
            # Read images from file
            img1 = open('outputs/confusion_matrix.png', 'rb').read()
            img2 = open('outputs/acc_over_time.png', 'rb').read()
            # Set image variable, image format and dimension.
            wi1 = Image(value=img1, format='png', width=500, height=500)
            wi2 = Image(value=img2, format='png', width=450, height=500)
            # Side by side display
            images = HBox([wi1, wi2])
            vb.children = [title, images]
            display(vb)

        except FileNotFoundError:
            print("Output files not available")
        
output_button.on_click(on_output_button_clicked)

## Download outputs

In [None]:
from shutil import make_archive
from IPython.display import FileLink

download_button = Button(description="Download outputs", layout=Layout(width='auto'))
download_output = Output()
display(download_button, download_output)

def on_download_button_clicked(b):
    download_output.clear_output()
    with download_output:
        try:
            print('Compressing outputs into .zip file...')
            make_archive('output', 'zip', 'outputs/', verbose = 10)
            link = FileLink(
                path='output.zip', 
                result_html_prefix='Your output .zip file is ready ! \
                                    Click the following link to download it: '
            )
            display(link)
            
        except FileNotFoundError:
            print("Output files not available")
        
download_button.on_click(on_download_button_clicked)