In [1]:
from sklearn.kernel_approximation import Nystroem
from sklearn.kernel_approximation import RBFSampler
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import BaggingClassifier

from sklearn.pipeline import Pipeline

import ipywidgets as widgets
from ipywidgets import Layout
import matplotlib.pyplot as plt

In [2]:
def get_model(model,
              sampler = None,
              pca = False,
              ensemble = None,
              box_type = None):
    '''
    Parameters
    ----------
    model: string, 'dt', 'linear_svc' or 'logit'
    sampler: string, 'rbf' or 'nystroem', or  None
    pca: bool
    ensemble: integer or None
    box_type: string, 'black', 'grey' or None (ignored if ensemble = None)
    
    Returns
    -------
    clf: a model with the parameters specified
    '''
    if model not in ['dt', 'linear_svc', 'logit']:
        raise ValueError('model {0} is not supported'.format(model))
    if sampler not in ['rbf', 'nystroem', None]:
        raise ValueError('sampler {0} is not supported'.format(sampler))
    if type(pca) != bool:
        raise ValueError('pca is a boolean')
    if type(ensemble) not in [int, type(None)]:
        raise ValueError('Wrong value for ensemble')
    if isinstance(ensemble,int) and ensemble < 1:
        raise ValueError('Number of estimators must be greater than 0')
    if box_type not in ['black', 'grey', None]:
        raise ValueError("box_type must be 'black', 'grey' or None")
    if box_type is not None and ensemble is None:
        raise ValueError("box_type doesn't match with ensemble")
    
    #s = RBFSampler() if sampler == 'rbf' else  
    #   Nystroem() if sampler == 'nystroem' else FunctionTransformer(None, validate = False)
    #p = PCA() if pca else FunctionTransformer(None, validate = False)
    #m = DecisionTreeClassifier() if model == 'dt' 
    #      else LinearSVC() if model == 'linear_svc' else LogisticRegression()
    
    
    if sampler == 'rbf':
        s = RBFSampler(gamma = 0.2)
    elif sampler == 'nystroem':
        s = Nystroem(gamma = 0.2)
    elif sampler is None:
        s = FunctionTransformer(None, validate = False)
        
    
    
    if pca:
        p = PCA(n_components = 0.9, svd_solver = "full")
    else:
        p = FunctionTransformer(None, validate = False)
    
    if model == 'dt':
        m = DecisionTreeClassifier()
    elif model == 'linear_svc':
        m = LinearSVC(C = 1)
    elif model == 'logit':
        m = LogisticRegression(C = 1, multi_class = 'multinomial', solver = 'lbfgs')
    
    
    if not ensemble:
        clf = Pipeline([
            ('sampler', s),
            ('pca', p),
            ('model', m),
        ])
    elif box_type == 'black':
        bag = BaggingClassifier(base_estimator = m, n_estimators = ensemble)
        clf = Pipeline([
            ('sampler', s),
            ('pca', p),
            ('model', bag),
        ])
    elif box_type == 'grey':
        pipe = Pipeline([
            ('sampler', s),
            ('pca', p),
            ('model', m),
        ])
        clf = BaggingClassifier(base_estimator = pipe, n_estimators = ensemble)
    
    return clf

In [3]:
def get_label(model, sampler, pca, box_type, train_test):
    '''Returns a string with the correct label
    Parameters
    ----------
    model: string, model name
    sampler: string, 'rbf' or 'nystroem', or  None
    pca: bool
    box_type: string, 'black', 'grey' or None
    train_test: string, 'train' or 'test'
    '''
    if sampler is not None and sampler not in ['rbf', 'nystroem']:
        raise ValueError("sampler must be 'rbf', 'nystroem' or None")
    if train_test not in ['train', 'test']:
        raise ValueError("train_test must be 'train' or 'test'")
    m = model + "_"
    s = "" if sampler is None else sampler + "_"
    p = "" if not pca else "pca_"
    b = "" if box_type is None else box_type + "_"
    t =  train_test + " score"
    
    r = m + s + p + b + t
    return r

In [4]:
def get_model_scores(model,
              dataset,
              features = None,
              sampler = None,
              pca = False,
              ensemble = None,
              box_type = None):
    '''
    Parameters
    ----------
    model: string, 'dt', 'rf, 'linear_svc' or 'logit'
    dataset: dictionary with keys 'data_train', 'data_test', 'target_train', 'target_test'
    features: array with features to test or None
    sampler: string, 'rbf' or 'nystroem', or  None
    pca: bool
    ensemble: integer or None
    box_type: string, 'black', 'grey' or None (ignored if ensemble = None)
    
    Returns
    -------
    A tuple of two dictionarys, (train_dic,test_dic), each one with
    keys 'abs', 'ord', 'label'
    '''
    if not isinstance(features, (list, type(None))):
        raise ValueError('features must be an ordered list of integers or None')
        
    if features is None and sampler is not None:
        raise ValueError('features is needed with sampler')
    
    data_train = dataset['data_train']
    data_test = dataset['data_test']
    target_train = dataset['target_train']
    target_test = dataset['target_test']
    
    clf = get_model(model, sampler, pca, ensemble, box_type)
    if features is None:
        clf.fit(data_train, target_train)
        train_score = clf.score(data_train, target_train)
        test_score = clf.score(data_test, target_test)
        train_dic = {
            'absi': [-1,-1],
            'ord': [train_score, train_score],
            'label': get_label(model, sampler, pca, box_type, 'train')
        }
        test_dic = {
            'absi': [-1,-1],
            'ord': [test_score, test_score],
            'label': get_label(model, sampler, pca, box_type, 'test')
        }
    else:
        train_scores = []
        test_scores = []
        for f in features:
            clf.set_params(sampler__n_components = f)
            clf.fit(data_train, target_train)
            train_score = clf.score(data_train, target_train)
            test_score = clf.score(data_test, target_test)
            
            train_scores.append(train_score)
            test_scores.append(test_score)
            
        train_dic = {
            'absi': features,
            'ord': train_scores,
            'label': get_label(model, sampler, pca, box_type, 'train')
        }
        test_dic = {
            'absi': features,
            'ord': test_scores,
            'label': get_label(model, sampler, pca, box_type, 'test')
        }
    return train_dic, test_dic

In [5]:
# currentwork
def get_params_from_models_bar(mod_bar):
    '''
    Returns a dictionary with the needed keys to pass to get_model_scores
    
    Parameters
    ----------
    mod_bar: a HBox with the widgets
    
    Return
    ------
    A dictionary with keys [model, dataset, features, sampler, pca, ensemble, box_type] 
    '''
    # Todo Ahora mismo es funcional, pero es feo y poco seguro. Habrá que retocarlo
    '''
        hb = widgets.HBox([
        model_selector,
        sampler_selector,
        box_type_selector,
        n_estimators_selector,
        pca_checkbox,
    ])
    '''
    # Todo poner el dataset que toca
    data = np.arange(35).reshape(5,7)
    target = [1,2,1,1,2]
    di = {
        'data_train': data,
        'data_test': data,
        'target_train': target,
        'target_test': target,
    }
    d = {
        'model': mod_bar.children[0].value,
        'dataset': di,
        'features':  None if mod_bar.children[1].value is None else np.linspace(*(features_selector.value), dtype = np.int64).tolist(),
        'sampler': mod_bar.children[1].value,
        'pca': mod_bar.children[4].value,
        'ensemble': None if mod_bar.children[1].value is None else mod_bar.children[3].value,
        'box_type': None if mod_bar.children[1].value is None else mod_bar.children[2].value,
    }
    return d

In [6]:
# currentwork
def get_all_model_scores():
    '''
    Return two lists, one for all the train_dicts, and the other for all the test_dicts,
    based on the models bars in the GUI
    
    Returns
    -------
    A tuple with (test_dicts, train_dicts), where each one is a list with dictionarys
    '''
    train_dics = []
    test_dics = []
    for c in models_bar.children:
        # c es un HBox
        train_dic, test_dic =  get_model_scores(**get_params_from_models_bar(c))
        train_dics.append(train_dic)
        test_dics.append(test_dic)
    
    fig = plt.figure(figsize = (12.8,4.8))
    test_sp = fig.add_subplot(121)
    train_sp = fig.add_subplot(122)
    
    
    test_sp.set_title("Test scores")
    train_sp.set_title("Train scores")
    # Todo add the correct title
    fig.suptitle("Esto es un subtítulo")
    for te, tr in zip(test_dics, train_dics):
        test_sp.plot(te['absi'], te['ord'], label = te['label'])
        train_sp.plot(tr['absi'], tr['ord'], label = tr['label'])
    test_sp.legend()
    train_sp.legend()
    plt.close()
    return fig

In [7]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np
from sklearn.decomposition import PCA

In [8]:
data = np.arange(35).reshape(5,7)
target = [1,2,1,1,2]

di = {
    'data_train': data,
    'data_test': data,
    'target_train': target,
    'target_test': target,
}

In [9]:
train_dic, test_dic = get_model_scores('linear_svc', di)



In [10]:
train_dic

{'absi': [-1, -1], 'ord': [0.6, 0.6], 'label': 'linear_svc_train score'}

In [11]:
#from IPython.display import HTML, display

In [12]:
'''
model_selector = widgets.Dropdown(
    options=['dt', 'logit', 'linear_scv'],
    value='dt',
    layout = Layout(flex = '0 3 auto'),
    #description=':',
)

sampler_selector = widgets.Dropdown(
    options={'None': None, 'rbf': 'rbf', 'nystroem': 'nystroem'},
    value='rbf',
    layout = Layout(flex = '1 3 auto'),
    #description=':',
)
features_selector = widgets.IntRangeSlider(
    value=[30, 100],
    min=30,
    max=400,
    step=10,
    layout = Layout(flex = '0 1 auto'),
    #description=':',
)
n_estimators_selector = widgets.IntSlider(
    value=30,
    min=2,
    max=200,
    step=1,
    layout = Layout(flex = '1 1 auto'),
    #description=':',
)
pca_checkbox = widgets.Checkbox(
    value=False,
    layout = Layout(flex = '0 3 auto'),
    #description='',
)
'''

"\nmodel_selector = widgets.Dropdown(\n    options=['dt', 'logit', 'linear_scv'],\n    value='dt',\n    layout = Layout(flex = '0 3 auto'),\n    #description=':',\n)\n\nsampler_selector = widgets.Dropdown(\n    options={'None': None, 'rbf': 'rbf', 'nystroem': 'nystroem'},\n    value='rbf',\n    layout = Layout(flex = '1 3 auto'),\n    #description=':',\n)\nfeatures_selector = widgets.IntRangeSlider(\n    value=[30, 100],\n    min=30,\n    max=400,\n    step=10,\n    layout = Layout(flex = '0 1 auto'),\n    #description=':',\n)\nn_estimators_selector = widgets.IntSlider(\n    value=30,\n    min=2,\n    max=200,\n    step=1,\n    layout = Layout(flex = '1 1 auto'),\n    #description=':',\n)\npca_checkbox = widgets.Checkbox(\n    value=False,\n    layout = Layout(flex = '0 3 auto'),\n    #description='',\n)\n"

In [13]:
def get_new_model_bar():
    '''
    Returns
    -------
    Returns a new HBox with the widgets to define a new training model
    '''
    model_selector = widgets.Dropdown(
        options=['dt', 'logit', 'linear_svc'],
        value='dt',
        layout = Layout(flex = '0 3 auto'),
        #description=':',
    )

    sampler_selector = widgets.Dropdown(
        options={'None': None, 'rbf': 'rbf', 'nystroem': 'nystroem'},
        value='rbf',
        layout = Layout(flex = '1 3 auto'),
        #description=':',
    )
    box_type_selector = widgets.Dropdown(
        options={'None': None, 'black': 'black', 'grey': 'grey'},
        value='black',
        layout = Layout(flex = '1 3 auto'),
        #description=':',
    )
    '''
    features_selector = widgets.IntRangeSlider(
        value=[30, 100],
        min=30,
        max=400,
        step=10,
        layout = Layout(flex = '0 1 auto'),
        #description=':',
    )
    '''
    n_estimators_selector = widgets.IntSlider(
        value=30,
        min=2,
        max=200,
        step=1,
        layout = Layout(flex = '1 1 auto'),
        #description=':',
    )
    pca_checkbox = widgets.Checkbox(
        value=False,
        layout = Layout(flex = '0 3 auto'),
        #description='',
    )
    hb = widgets.HBox([
        model_selector,
        sampler_selector,
        box_type_selector,
        #features_selector,
        n_estimators_selector,
        pca_checkbox,
    ])
    return hb

In [14]:
def add_model_bar(m):
    '''
    Append a new model bar to m, which has the same values as the last
    model bar in m
    Parameters
    ----------
    m: Is a VBox containing 1 or more HBox describing the new model
    '''
    if len(m.children) < 1:
        raise ValueError('At least one model bar is needed')
    copy_bar = m.children[-1]
    
    new_model_bar = get_new_model_bar()
    
    # TODO hacer que sea una copia
    # De momento es uno totalmente nuevo
    m.children = tuple(list(m.children) + [new_model_bar])

In [15]:
def remove_model_bar(m):
    '''
    Remove the las model bar of m, if there are at least 2
    Parameters
    ----------
    m: Is a VBox containing 2 or more HBox describing models
    '''
    if len(m.children) < 2:
        raise ValueError('minimum number of model bars reached')
    m.children = tuple(list(m.children)[:-1])

hb = widgets.HBox([
    model_selector,
    sampler_selector,
    features_selector,
    n_estimators_selector,
    pca_checkbox
])

for c in hb.children:
    c.layout = Layout(
        #margin = '0px 0px 0px 0px',
        border =  '1px solid red')
    pass

In [16]:
'''
hb = widgets.HBox([
    model_selector,
    sampler_selector,
    features_selector,
    n_estimators_selector,
    pca_checkbox,
])
'''

'\nhb = widgets.HBox([\n    model_selector,\n    sampler_selector,\n    features_selector,\n    n_estimators_selector,\n    pca_checkbox,\n])\n'

In [17]:
'''
b = widgets.Box([
    model_selector,
    sampler_selector,
    features_selector,
    n_estimators_selector,
    pca_checkbox,
], layout = Layout(flex_flow = 'row nowrap', justify_content = 'space-between'))
'''

"\nb = widgets.Box([\n    model_selector,\n    sampler_selector,\n    features_selector,\n    n_estimators_selector,\n    pca_checkbox,\n], layout = Layout(flex_flow = 'row nowrap', justify_content = 'space-between'))\n"

In [18]:
#display(hb)

In [19]:
#display(b)

In [20]:
headers = widgets.HBox([
    widgets.Label("Model"),
    widgets.Label("Sampling"),
    widgets.Label("Box Type"),
    widgets.Label("Number estimators"),
    widgets.Label("PCA"),
], layout = widgets.Layout(justify_content = 'space-between'))

In [21]:
features_selector = widgets.IntRangeSlider(
    value=[30, 100],
    min=30,
    max=400,
    step=10,
    layout = Layout(flex = '0 1 auto'),
    #description=':',
)

In [22]:
#display(headers)

In [23]:
models_bar = widgets.VBox([get_new_model_bar()])

In [24]:
cool_models_bar = widgets.VBox([headers, models_bar])

In [25]:
def add_model_bar_wraper(e):
    add_model_bar(models_bar)

add_model_bar_bt = widgets.Button(
    description='Add model',
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Add a new moder bar to train',
    #icon='check'
)
add_model_bar_bt.on_click(add_model_bar_wraper)

In [26]:
def remove_model_bar_wraper(e):
    if len(models_bar.children) > 1:
        remove_model_bar(models_bar)

remove_model_bar_bt = widgets.Button(
    description='Remove model',
    button_style='warning', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Remove the las model bar, if possible',
    #icon='check'
)
remove_model_bar_bt.on_click(remove_model_bar_wraper)

In [27]:
def calculate_bt_wrapper(e):
    l1 = get_all_model_scores()
    # Todo no siempre se borrará, dependerá de un parámetro
    # Todo que no haga flicker
    graphs_output.clear_output(wait = True)
    with graphs_output:
        display(l1)
    #display(l1)
    #print(l1)
    #print(l2)

In [28]:
calculate_bt = widgets.Button(
    description='Calculate',
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Calculate the models',
    #icon='check'
)
calculate_bt.on_click(calculate_bt_wrapper)

In [29]:
gui = widgets.VBox([
    widgets.HBox([add_model_bar_bt, remove_model_bar_bt]),
    features_selector,
    cool_models_bar,
    calculate_bt
])

In [30]:
graphs_output = widgets.Output(layout={'border': '1px solid black'})

In [31]:
display(gui)
display(graphs_output)

VBox(children=(HBox(children=(Button(button_style='success', description='Add model', style=ButtonStyle(), too…

Output(layout=Layout(border='1px solid black'))