In [1]:
from demo_utils.learning import get_model
from demo_utils.general import get_data
from demo_utils.general import SUPPORTED_DATASETS
from sklearn.model_selection import GridSearchCV
from IPython.display import Markdown as md

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false
}

<IPython.core.display.Javascript object>

In [4]:
def get_prefixes(box_name):
    # Retorna el prefijo adecuado para acceder a parámetros del sampler y
    # de modelo, en función de la caja

    if box_name == 'none':
        sampler_prefix = 'sampler__'
        model_prefix = 'model__'
    elif box_name in ['grey_bag', 'grey_ens']:
        sampler_prefix = 'base_estimator__sampler__'
        model_prefix = 'base_estimator__model__'
    elif box_name in ['black_bag', 'black_ens']:
        sampler_prefix = 'sampler__'
        model_prefix = 'model__base_estimator__'

    return sampler_prefix, model_prefix

In [5]:
dts_size = 1000
n_components = 500
n_estim = 20
model_h_params = {
    'dt': {'min_impurity_decrease': [10**i for i in range(-10, 1)]},
    'logit': {'C': [10**i for i in range(-5, 4)]}, # (3) fijo (una C grande para no regularizar)
    'linear_svc': {'C': [10**i for i in range(-5, 4)]} # (-1, 3)
}
gamma_options = {
    'rbf': {'gamma': [10**i for i in range(-5, 2)]}, # (-5, 0)
    'nystroem': {'gamma': [10**i for i in range(-5, 2)]},
}

In [6]:
def test_no_sampler_generic_pca(dts_name, box_name, model_name, with_pca):
    display(md('#'*5 + ' ' + 'with pca=' + str(with_pca)))
    model_params = {
        'model_name': model_name,
        'model_params': {},
        'rbfsampler_gamma': None,
        'nystroem_gamma': None,
        'sampler_name': 'identity',
        'pca_bool': with_pca,
        'pca_first': None,
        'n_estim': n_estim,
        'box_type': box_name
    }
    model = get_model(**model_params)
    # model.set_params(sampler__n_components=n_components)
    tunning_params = dict(model_h_params[model_name])
    sampler_prefix, model_prefix = get_prefixes(box_name=box_name)
    param_k = next(iter(tunning_params))
    #new_param_k = 'model__' + param_k
    new_param_k = model_prefix + param_k
    tunning_params[new_param_k] = tunning_params.pop(param_k)
    clf = GridSearchCV(model, tunning_params, cv=10, iid=False)

    data = get_data(dts_name, n_ins=dts_size)
    data_train = data['data_train']
    target_train = data['target_train']

    clf.fit(data_train, target_train)

    bp = clf.best_params_

    bp[param_k] = bp.pop(new_param_k)

    return bp


def test_with_sampler_no_pca(dts_name, box_name, model_name, sampler_name):
    display(md('#'*5 + ' ' + 'without pca'))
    model_params = {
        'model_name': model_name,
        'model_params': {},
        'rbfsampler_gamma': None,
        'nystroem_gamma': None,
        'sampler_name': sampler_name,
        'pca_bool': False,
        'pca_first': None,
        'n_estim': n_estim,
        'box_type': box_name
    }
    model = get_model(**model_params)
    tunning_params = dict(model_h_params[model_name])

    sampler_prefix, model_prefix = get_prefixes(box_name=box_name)

    param_k = next(iter(tunning_params))
    #new_param_k = 'model__' + param_k
    new_param_k = model_prefix + param_k
    tunning_params[new_param_k] = tunning_params.pop(param_k)

    sampler_params = dict(gamma_options[sampler_name])
    sampler_k = next(iter(sampler_params))
    #new_sampler_k = 'sampler__' + sampler_k
    new_sampler_k = sampler_prefix + sampler_k

    sampler_params[new_sampler_k] = sampler_params.pop(sampler_k)

    tunning_params.update(sampler_params)

    clf = GridSearchCV(model, tunning_params, cv=10, iid=False)

    data = get_data(dts_name, n_ins=dts_size)
    data_train = data['data_train']
    target_train = data['target_train']

    clf.fit(data_train, target_train)

    bp = clf.best_params_

    bp[param_k] = bp.pop(new_param_k)
    bp[sampler_k] = bp.pop(new_sampler_k)
    return bp


def test_with_sampler_with_pca(dts_name, box_name, model_name, sampler_name, pca_first):
    display(md('#'*5 + ' ' + 'with pca'))
    model_params = {
        'model_name': model_name,
        'model_params': {},
        'rbfsampler_gamma': None,
        'nystroem_gamma': None,
        'sampler_name': sampler_name,
        'pca_bool': True,
        'pca_first': pca_first,
        'n_estim': n_estim,
        'box_type': box_name,
    }
    model = get_model(**model_params)
    tunning_params = dict(model_h_params[model_name])

    sampler_prefix, model_prefix = get_prefixes(box_name=box_name)

    param_k = next(iter(tunning_params))
    #new_param_k = 'model__' + param_k
    new_param_k = model_prefix + param_k
    tunning_params[new_param_k] = tunning_params.pop(param_k)

    sampler_params = dict(gamma_options[sampler_name])
    sampler_k = next(iter(sampler_params))
    #new_sampler_k = 'sampler__' + sampler_k
    new_sampler_k = sampler_prefix + sampler_k

    sampler_params[new_sampler_k] = sampler_params.pop(sampler_k)

    tunning_params.update(sampler_params)

    clf = GridSearchCV(model, tunning_params, cv=10, iid=False)

    data = get_data(dts_name, n_ins=dts_size)
    data_train = data['data_train']
    target_train = data['target_train']

    clf.fit(data_train, target_train)

    bp = clf.best_params_

    bp[param_k] = bp.pop(new_param_k)
    bp[sampler_k] = bp.pop(new_sampler_k)
    return bp
    pass

In [7]:
def test_no_sampler(dts_name, box_name, model_name):
    display(md('#'*4 + ' ' + 'without sampler'))
    d_no_pca = test_no_sampler_generic_pca(
        dts_name=dts_name, box_name=box_name, model_name=model_name, with_pca=False)
    d_with_pca = test_no_sampler_generic_pca(
        dts_name=dts_name, box_name=box_name, model_name=model_name, with_pca=True)
    ret_dic = {
        'no_pca': d_no_pca,
        'pca': d_with_pca,
    }
    return ret_dic


def test_with_sampler(dts_name, box_name, model_name, sampler_name):
    display(md('#'*4 + ' ' + 'with sampler'))
    d_no_pca = test_with_sampler_no_pca(
        dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name=sampler_name)
    d_with_pca_first = test_with_sampler_with_pca(dts_name=dts_name, box_name=box_name, model_name=model_name,
                                                  sampler_name=sampler_name, pca_first=True)
    d_with_pca_last = test_with_sampler_with_pca(dts_name=dts_name, box_name=box_name, model_name=model_name,
                                                 sampler_name=sampler_name, pca_first=False)
    ret_dic = {
        'no_pca': d_no_pca,
        'pca_first': d_with_pca_first,
        'pca_last': d_with_pca_last,
    }
    return ret_dic

In [8]:
def test_model(dts_name, box_name, model_name):
    display(md('#'*3 + ' ' + model_name))
    d_no_sampler = test_no_sampler(
        dts_name=dts_name, box_name=box_name, model_name=model_name)
    d_rff = test_with_sampler(
        dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='rbf')
    d_nystroem = test_with_sampler(
        dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='nystroem')

    ret_dic = {
        'no_sampler': d_no_sampler,
        'rff': d_rff,
        'nystrem': d_nystroem,
    }

    return ret_dic

In [9]:
def test_box(dts_name, box_name):
    display(md('#'*2 + ' ' + box_name))
    d_dt = test_model(dts_name=dts_name, box_name=box_name, model_name='dt')
    d_logit = test_model(
        dts_name=dts_name, box_name=box_name, model_name='logit')
    d_linear_svc = test_model(
        dts_name=dts_name, box_name=box_name, model_name='linear_svc')

    ret_dic = {
        'dt': d_dt,
        'logit': d_logit,
        'linear_svc': d_linear_svc,
    }
    return ret_dic

In [10]:
def test_dataset(dts_name):
    display(md('#'*1 + ' ' + dts_name))

    d_none = test_box(dts_name=dts_name, box_name='none')
    d_black_bag = test_box(dts_name=dts_name, box_name='black_bag')
    d_grey_bag = test_box(dts_name=dts_name, box_name='grey_bag')
    d_grey_ens = test_box(dts_name=dts_name, box_name='grey_ens')

    ret_dic = {
        'box_none': d_none,
        'box_black_bag': d_black_bag,
        'box_grey_bag': d_grey_bag,
        'box_grey_ens': d_grey_ens,
    }

    return ret_dic

In [11]:
def test_everything():
    ret_dic = {}
    for dts_name in SUPPORTED_DATASETS:
        d = test_dataset(dts_name=dts_name)
        ret_dic[dts_name] = d
        print(d)
    return ret_dic

In [None]:
d = test_everything()

In [12]:
fail

NameError: name 'fail' is not defined

In [31]:
dts_name = 'mnist'
box_name = 'none'
model_name = 'dt'

In [32]:
d1 = test_no_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name)

#### without sampler

##### with pca=False

##### with pca=True

In [33]:
d2 = test_with_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='rbf')

#### with sampler

##### without pca

##### with pca

##### with pca

In [34]:
d3 = test_with_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='nystroem')

#### with sampler

##### without pca

##### with pca

##### with pca

In [36]:
dt_dic = {
    'no_sampler': d1,
    'rff': d2,
    'nystroem': d3,
}

In [37]:
dt_dic

{'no_sampler': {'no_pca': {'min_impurity_decrease': 1e-05},
  'pca': {'min_impurity_decrease': 0.01}},
 'rff': {'no_pca': {'min_impurity_decrease': 1e-06, 'gamma': 1e-05},
  'pca_first': {'min_impurity_decrease': 0.0001, 'gamma': 1e-05},
  'pca_last': {'min_impurity_decrease': 0.01, 'gamma': 0.0001}},
 'nystroem': {'no_pca': {'min_impurity_decrease': 1e-08, 'gamma': 0.001},
  'pca_first': {'min_impurity_decrease': 0.0001, 'gamma': 0.0001},
  'pca_last': {'min_impurity_decrease': 1e-06, 'gamma': 0.001}}}

In [38]:
dts_name = 'mnist'
box_name = 'none'
model_name = 'logit'

In [39]:
d1 = test_no_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name)

#### without sampler

##### with pca=False

##### with pca=True

In [40]:
d2 = test_with_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='rbf')

#### with sampler

##### without pca

##### with pca

##### with pca

In [41]:
d3 = test_with_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='nystroem')

#### with sampler

##### without pca

##### with pca

##### with pca

In [42]:
logit_dic = {
    'no_sampler': d1,
    'rff': d2,
    'nystroem': d3,
}

In [43]:
logit_dic

{'no_sampler': {'no_pca': {'C': 0.01}, 'pca': {'C': 0.01}},
 'rff': {'no_pca': {'C': 1000, 'gamma': 1e-05},
  'pca_first': {'C': 100, 'gamma': 0.0001},
  'pca_last': {'C': 100, 'gamma': 0.0001}},
 'nystroem': {'no_pca': {'C': 100, 'gamma': 0.0001},
  'pca_first': {'C': 1000, 'gamma': 1e-05},
  'pca_last': {'C': 100, 'gamma': 0.0001}}}

In [44]:
dts_name = 'mnist'
box_name = 'none'
model_name = 'linear_svc'

In [45]:
d1 = test_no_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name)

#### without sampler

##### with pca=False

##### with pca=True

In [46]:
d2 = test_with_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='rbf')

#### with sampler

##### without pca

##### with pca

##### with pca

In [47]:
d3 = test_with_sampler(dts_name=dts_name, box_name=box_name, model_name=model_name, sampler_name='nystroem')

#### with sampler

##### without pca

##### with pca

##### with pca

In [48]:
linear_svc_dic = {
    'no_sampler': d1,
    'rff': d2,
    'nystroem': d3,
}

In [49]:
linear_svc_dic

{'no_sampler': {'no_pca': {'C': 0.0001}, 'pca': {'C': 0.0001}},
 'rff': {'no_pca': {'C': 100, 'gamma': 1e-05},
  'pca_first': {'C': 100, 'gamma': 1e-05},
  'pca_last': {'C': 10, 'gamma': 0.0001}},
 'nystroem': {'no_pca': {'C': 10, 'gamma': 0.0001},
  'pca_first': {'C': 1, 'gamma': 0.001},
  'pca_last': {'C': 10, 'gamma': 0.001}}}

In [50]:
none_dic = {
    'dt': dt_dic,
    'logit': logit_dic,
    'linear_svc': linear_svc_dic,
}

In [51]:
none_dic

{'dt': {'no_sampler': {'no_pca': {'min_impurity_decrease': 1e-05},
   'pca': {'min_impurity_decrease': 0.01}},
  'rff': {'no_pca': {'min_impurity_decrease': 1e-06, 'gamma': 1e-05},
   'pca_first': {'min_impurity_decrease': 0.0001, 'gamma': 1e-05},
   'pca_last': {'min_impurity_decrease': 0.01, 'gamma': 0.0001}},
  'nystroem': {'no_pca': {'min_impurity_decrease': 1e-08, 'gamma': 0.001},
   'pca_first': {'min_impurity_decrease': 0.0001, 'gamma': 0.0001},
   'pca_last': {'min_impurity_decrease': 1e-06, 'gamma': 0.001}}},
 'logit': {'no_sampler': {'no_pca': {'C': 0.01}, 'pca': {'C': 0.01}},
  'rff': {'no_pca': {'C': 1000, 'gamma': 1e-05},
   'pca_first': {'C': 100, 'gamma': 0.0001},
   'pca_last': {'C': 100, 'gamma': 0.0001}},
  'nystroem': {'no_pca': {'C': 100, 'gamma': 0.0001},
   'pca_first': {'C': 1000, 'gamma': 1e-05},
   'pca_last': {'C': 100, 'gamma': 0.0001}}},
 'linear_svc': {'no_sampler': {'no_pca': {'C': 0.0001}, 'pca': {'C': 0.0001}},
  'rff': {'no_pca': {'C': 100, 'gamma': 1e-

In [18]:
dt_dic

{'no_sampler': {'no_pca': {'min_impurity_decrease': 0.0001},
  'pca': {'min_impurity_decrease': 1e-05}},
 'rff': {'no_pca': {'min_impurity_decrease': 1e-09, 'gamma': 1e-05},
  'pca_first': {'min_impurity_decrease': 1e-06, 'gamma': 1e-05},
  'pca_last': {'min_impurity_decrease': 0.001, 'gamma': 0.01}},
 'nystroem': {'no_pca': {'min_impurity_decrease': 0.001, 'gamma': 0.0001},
  'pca_first': {'min_impurity_decrease': 1e-05, 'gamma': 1e-05},
  'pca_last': {'min_impurity_decrease': 1e-08, 'gamma': 0.1}}}