In [None]:
import pandas as pd
import os
import numpy as np
import time

from BPt import *
from BPt.extensions import SurfLabels, SurfMaps
from sklearn.linear_model import ElasticNetCV

import nevergrad as ng
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [None]:
def get_l_model(parcel):
    
    param_search = Param_Search(search_type='TwoPointsDE',
                                n_iter=180,
                                splits=.25,
                                n_repeats=1,
                                cv=CV(groups='rel_family_id'))
    

    model = Model('light gbm', params=1, param_search=param_search)

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    loader = Loader(rois, cache_loc='/home/sage/cache/'+parcel)

    pipeline = Model_Pipeline(imputers = None,
                              loaders = loader,
                              scalers = Scaler('robust'),
                              model = model)

    return Model(pipeline)

In [None]:
def get_p_model(parcel):
    
    param_search = Param_Search(search_type='RandomSearch',
                                n_iter=60,
                                splits=3,
                                n_repeats=1,
                                cv=CV(groups='rel_family_id'))

    elastic = Model('elastic', params=1,
                    param_search=param_search,
                    extra_params={'tol': 1e-3})

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    loader = Loader(rois, cache_loc='/home/sage/cache/'+parcel)

    pipeline = Model_Pipeline(imputers = None,
                              loaders = loader,
                              scalers = Scaler('robust'),
                              model = elastic)

    return Model(pipeline)

In [None]:
def get_voting(parcels):
    
    models = [get_p_model(parcel) for parcel in parcels]
    
    voting_ensemble = Ensemble(obj = "voting regressor",
                               models = models,
                               n_jobs_type = 'models')
    
    pipeline = Model_Pipeline(imputers=None,
                              model=voting_ensemble)
    
    return pipeline

def get_stacking(parcels):
    
    stack_param_search = Param_Search(search_type='RandomSearch',
                                      n_iter=60,
                                      splits=3,
                                      n_repeats=1)
    
    stack_model = Model('ridge', params=1, param_search=stack_param_search)

    stack_splits = CV_Splits(cv=CV(groups='rel_family_id'), splits=3, n_repeats=1)
    
    models = [get_p_model(parcel) for parcel in parcels]

    stacking_ensemble = Ensemble(obj = "stacking regressor",
                                 models = models,
                                 cv_splits = stack_splits,
                                 base_model = stack_model,
                                 n_jobs_type = 'models')

    pipeline = Model_Pipeline(imputers=None,
                              model=stacking_ensemble)
    
    return pipeline

def get_loader(parcel, scope):

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    return Loader(rois, cache_loc='/home/sage/cache/'+parcel, scope=scope)

def get_stacking_alt(parcels, search_type='RandomSearch', n_iter=60):
    
    
    cv = CV(groups='rel_family_id')
    
    param_search = Param_Search(search_type=search_type,
                                n_iter=n_iter,
                                splits=3,
                                n_repeats=1)
    
    models = [Model('elastic', params=1, extra_params={'tol': 1e-3}, scope=str(i+1))
              for i in range(len(parcels))]

    stacking_ensemble = Ensemble(obj = "stacking regressor",
                                 models = models,
                                 cv_splits = CV_Splits(cv=cv, splits=3, n_repeats=1),
                                 base_model = Model('ridge', params=1),
                                 param_search=param_search)

    loaders = [get_loader(parcel, str(i+1)) for i, parcel in enumerate(parcels)]
               
    pipeline = Model_Pipeline(imputers=None,
                              loaders=loaders,
                              model=stacking_ensemble)
    
    return pipeline


def evaluate(pipeline, base_dtype='float32', target=0):
    
    return ML.Evaluate(pipeline,
                       Problem_Spec(target=target),
                       splits=5,
                       n_repeats=1,
                       cv=CV(groups='rel_family_id'),
                       only_fold=0,
                       base_dtype=base_dtype)

In [None]:
def eval_choice(choices, search_type='RandomSearch', n_iter=180):
    
    param_search = Param_Search(search_type=search_type,
                                n_iter=n_iter,
                                splits=3,
                                n_repeats=1)
    
    # Init w/ whatever
    roi = SurfLabels(labels=choices[0])
    
    loader = Loader(roi,
                    params={'labels': ng.p.Choice(choices)},
                    cache_loc='/home/sage/cache/search_test')
    
    # Elastic net
    model = Model('elastic', params=1, extra_params={'tol': 1e-3})
    
    pipeline = Model_Pipeline(imputers=None,
                              loaders=loader,
                              model=model,
                              scalers=Scaler('robust'),
                              param_search=param_search)
    
    # Eval
    evaluate(pipeline)

09:26
38:45

Scorer:  matthews
Mean Validation score:  0.5562196772456983
Std in Validation score:  0.0

Scorer:  roc_auc
Mean Validation score:  0.8673422488518034
Std in Validation score:  0.0

Scorer:  balanced_accuracy
Mean Validation score:  0.7780889467083174
Std in Validation score:  0.0


In [None]:
stop

### Parcel as a hyper-parameter

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8
choices = ['../extra_random_parcels/random_300_' + str(i) + '.npy' for i in range(10)]

In [None]:
ML = Load('../data/Base_consol.ML')

param_search = Param_Search(search_type='RandomSearch',
                            n_iter=60,
                            splits=3,
                            n_repeats=1)

roi = SurfLabels(labels='../extra_random_parcels/random_100_0.npy')
loader = Loader(roi, params={'labels': ng.p.Choice(choices)}, cache_loc='/home/sage/cache/search_test')


model = Model('elastic', params=1, extra_params={'tol': 1e-3}, param_search=param_search)

pipeline = Model_Pipeline(loaders=loader,
                          model=model,
                          scalers=Scaler('robust'),
                          param_search=Param_Search('grid'))

evaluate(pipeline)

In [None]:
eval_choice(choices, search_type='RandomSearch', n_iter=360)

In [None]:
eval_choice(choices, search_type='TwoPointsDE', n_iter=360)

In [None]:
stop

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8
parcels = ['random_100_' + str(i) for i in range(10)]

In [None]:
pipeline = get_voting(parcels)
results = evaluate(pipeline)

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

In [None]:
parcels = ['random_200_' + str(i) for i in range(10)]
pipeline = get_voting(parcels)
results = evaluate(pipeline)

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

In [None]:
parcels = ['random_300_' + str(i) for i in range(10)]
pipeline = get_voting(parcels)
results = evaluate(pipeline)

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8

parcels = ['random_' + str(i+1) + '00_0' for i in range(10)]

In [None]:
pipeline = get_voting(parcels)
results = evaluate(pipeline)

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

Keep for now~~~

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8

ML.all_data['1'] = ML.all_data['consolidated'].copy()
ML.all_data['2'] = ML.all_data['consolidated'].copy()
ML.all_data['3'] = ML.all_data['consolidated'].copy()
ML.all_data.drop('consolidated', axis=1, inplace=True)
ML.Data_Scopes.data_keys = ['1', '2', '3']

pipeline = get_stacking_alt(parcels, search_type='RandomSearch', n_iter=180)
results = evaluate(pipeline)