In [None]:
from demo_utils.learning import get_model
from demo_utils.general import get_data
from demo_utils.general import gamest
from demo_utils.general import SUPPORTED_DATASETS
from sklearn.model_selection import GridSearchCV
from IPython.display import Markdown as md

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false
}

In [None]:
def get_prefixes(box_name):
    # Retorna el prefijo adecuado para acceder a parámetros del sampler y
    # de modelo, en función de la caja

    if box_name == 'none':
        sampler_prefix = 'sampler__'
        model_prefix = 'model__'
    elif box_name in ['grey_bag', 'grey_ens']:
        sampler_prefix = 'base_estimator__sampler__'
        model_prefix = 'base_estimator__model__'
    elif box_name in ['black_bag', 'black_ens']:
        sampler_prefix = 'sampler__'
        model_prefix = 'model__base_estimator__'

    return sampler_prefix, model_prefix

In [None]:
box_name = 'none'
dts_size = 1000
n_components = 500
#n_estim = 50
model_h_params = {
    'dt': {'min_impurity_decrease': [10**i for i in range(-4, 0)]},
#     'dt': {'min_impurity_decrease': [0]}, # sobreajustamos adrede
    'logit': {'C': [10**i for i in [3]]}, # (3) fijo (una C grande para no regularizar)
    'linear_svc': {'C': [10**i for i in range(-1, 3)]} # (-1, 3)
#     'linear_svc': {'C': [10**i for i in [3]]}
}
gamma_options = {
#     'rbf': {'gamma': [10**i for i in range(-5, 0)]}, # (-5, 0)
#     'nystroem': {'gamma': [10**i for i in range(-5, 0)]},
    'rbf': {'gamma': [i for i in [5]]},
    'nystroem': {'gamma': [i for i in [5]]},
}

In [None]:
def test_no_sampler(dts_name, box_name, model_name):
    display(md('#'*4 + ' ' + 'no sampler'))
    model_params = {
        'model_name': model_name,
        'model_params': {},
        'rbfsampler_gamma': None,
        'nystroem_gamma': None,
        'sampler_name': 'identity',
        'pca_bool': False,
        'pca_first': None,
        'n_estim': n_estim,
        'box_type': box_name
    }
    model = get_model(**model_params)
    # model.set_params(sampler__n_components=n_components)
    tunning_params = dict(model_h_params[model_name])
    sampler_prefix, model_prefix = get_prefixes(box_name=box_name)
    param_k = next(iter(tunning_params))
    #new_param_k = 'model__' + param_k
    new_param_k = model_prefix + param_k
    tunning_params[new_param_k] = tunning_params.pop(param_k)
    clf = GridSearchCV(model, tunning_params, cv=10, iid=False)

    data = get_data(dts_name, n_ins=dts_size)
    data_train = data['data_train']
    target_train = data['target_train']

    clf.fit(data_train, target_train)

    bp = clf.best_params_

    bp[param_k] = bp.pop(new_param_k)

    return bp


def test_with_sampler(dts_name, box_name, model_name, sampler_name):
    display(md('#'*4 + ' ' + 'with sampler'))
    model_params = {
        'model_name': model_name,
        'model_params': {},
        'rbfsampler_gamma': None,
        'nystroem_gamma': None,
        'sampler_name': sampler_name,
        'pca_bool': False,
        'pca_first': None,
        'n_estim': n_estim,
        'box_type': box_name
    }
    model = get_model(**model_params)
    tunning_params = dict(model_h_params[model_name])

    sampler_prefix, model_prefix = get_prefixes(box_name=box_name)

    param_k = next(iter(tunning_params))
    #new_param_k = 'model__' + param_k
    new_param_k = model_prefix + param_k
    tunning_params[new_param_k] = tunning_params.pop(param_k)

    sampler_params = dict(gamma_options[sampler_name])
    sampler_k = next(iter(sampler_params))
    #new_sampler_k = 'sampler__' + sampler_k
    new_sampler_k = sampler_prefix + sampler_k

    sampler_params[new_sampler_k] = sampler_params.pop(sampler_k)

    tunning_params.update(sampler_params)

    clf = GridSearchCV(model, tunning_params, cv=10, iid=False)

    data = get_data(dts_name, n_ins=dts_size)
    data_train = data['data_train']
    target_train = data['target_train']
    
    ###############
    gamma_estimation = gamest(data_train)
    sampler_params[new_sampler_k] = gamma_estimation
    ###############

    clf.fit(data_train, target_train)

    bp = clf.best_params_

    bp[param_k] = bp.pop(new_param_k)
    bp[sampler_k] = bp.pop(new_sampler_k)
    return bp

In [None]:
def test_model(dts_name, box_name, model_name):
    display(md('#'*3 + ' ' + model_name))
    d_no_sampler = test_no_sampler(
        dts_name=dts_name, box_name=box_name, model_name=model_name)
    d_rff = test_with_sampler(
        dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='rbf')
    d_nystroem = test_with_sampler(
        dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='nystroem')

    ret_dic = {
        'identity': d_no_sampler,
        'rff': d_rff,
        'nystrem': d_nystroem,
    }

    return ret_dic

In [None]:
def test_box(dts_name, box_name):
    display(md('#'*2 + ' ' + box_name))
    d_dt = test_model(dts_name=dts_name, box_name=box_name, model_name='dt')
    d_logit = test_model(
        dts_name=dts_name, box_name=box_name, model_name='logit')
    d_linear_svc = test_model(
        dts_name=dts_name, box_name=box_name, model_name='linear_svc')

    ret_dic = {
        'dt': d_dt,
        'logit': d_logit,
        'linear_svc': d_linear_svc,
    }
    return ret_dic

In [None]:
def test_dataset(dts_name):
    display(md('#'*1 + ' ' + dts_name))

    d_none = test_box(dts_name=dts_name, box_name='none')
    d_black_bag = test_box(dts_name=dts_name, box_name='black_bag')
    d_grey_bag = test_box(dts_name=dts_name, box_name='grey_bag')
    d_grey_ens = test_box(dts_name=dts_name, box_name='grey_ens')

    ret_dic = {
        'none': d_none,
        'black_bag': d_black_bag,
        'grey_bag': d_grey_bag,
        'grey_ens': d_grey_ens,
    }

    return ret_dic

In [None]:
def test_everything():
    ret_dic = {}
    for dts_name in SUPPORTED_DATASETS:
        d = test_dataset(dts_name=dts_name)
        ret_dic[dts_name] = d
        print(d)
    return ret_dic

In [None]:
dts_name = 'segment'
segment_none = test_box(dts_name=dts_name, box_name=box_name)
print(dts_name)
segment_none

In [None]:
dts_name = 'mnist'
mnist_none = test_box(dts_name=dts_name, box_name=box_name)
print(dts_name)
mnist_none

In [None]:
dts_name = 'digits'
digits_none = test_box(dts_name=dts_name, box_name=box_name)
print(dts_name)
digits_none

In [None]:
dts_name = 'covertype'
covertype_none = test_box(dts_name=dts_name, box_name=box_name)
print(dts_name)
covertype_none

In [None]:
dts_name = 'fall_detection'
fall_detection_none = test_box(dts_name=dts_name, box_name=box_name)
print(dts_name)
fall_detection_none

In [None]:
dts_name = 'pen_digits'
pen_digits_none = test_box(dts_name=dts_name, box_name=box_name)
print(dts_name)
pen_digits_none

In [None]:
dts_name = 'satellite'
satellite_none = test_box(dts_name=dts_name, box_name=box_name)
print(dts_name)
satellite_none

In [None]:
dts_name = 'vowel'
vowel_none = test_box(dts_name=dts_name, box_name=box_name)
print(dts_name)
vowel_none