In [1]:
import pandas as pd
import os
import numpy as np
import time

from BPt import *
from BPt.extensions import SurfLabels, SurfMaps
from sklearn.linear_model import ElasticNetCV

import nevergrad as ng
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [2]:
def get_l_model(parcel):
    
    param_search = Param_Search(search_type='TwoPointsDE',
                                n_iter=180,
                                splits=.25,
                                n_repeats=1,
                                cv=CV(groups='rel_family_id'))
    

    model = Model('light gbm', params=1, param_search=param_search)

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    loader = Loader(rois, cache_loc='/home/sage/cache/'+parcel)

    pipeline = Model_Pipeline(imputers = None,
                              loaders = loader,
                              scalers = Scaler('robust'),
                              model = model)

    return Model(pipeline)

In [3]:
def get_p_model(parcel):
    
    param_search = Param_Search(search_type='RandomSearch',
                                n_iter=60,
                                splits=3,
                                n_repeats=1,
                                cv=CV(groups='rel_family_id'))

    elastic = Model('elastic', params=1,
                    param_search=param_search,
                    extra_params={'tol': 1e-3})

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    loader = Loader(rois, cache_loc='/home/sage/cache/'+parcel)

    pipeline = Model_Pipeline(imputers = None,
                              loaders = loader,
                              scalers = Scaler('robust'),
                              model = elastic)

    return Model(pipeline)

In [4]:
def get_voting(parcels):
    
    models = [get_p_model(parcel) for parcel in parcels]
    
    voting_ensemble = Ensemble(obj = "voting regressor",
                               models = models,
                               n_jobs_type = 'models')
    
    pipeline = Model_Pipeline(imputers=None,
                              model=voting_ensemble)
    
    return pipeline

def get_stacking(parcels):
    
    stack_param_search = Param_Search(search_type='RandomSearch',
                                      n_iter=60,
                                      splits=3,
                                      n_repeats=1)
    
    stack_model = Model('ridge', params=1, param_search=stack_param_search)

    stack_splits = CV_Splits(cv=CV(groups='rel_family_id'), splits=3, n_repeats=1)
    
    models = [get_p_model(parcel) for parcel in parcels]

    stacking_ensemble = Ensemble(obj = "stacking regressor",
                                 models = models,
                                 cv_splits = stack_splits,
                                 base_model = stack_model,
                                 n_jobs_type = 'models')

    pipeline = Model_Pipeline(imputers=None,
                              model=stacking_ensemble)
    
    return pipeline

def get_loader(parcel, scope):

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    return Loader(rois, cache_loc='/home/sage/cache/'+parcel, scope=scope)

def get_stacking_alt(parcels, search_type='RandomSearch', n_iter=60):
    
    
    cv = CV(groups='rel_family_id')
    
    param_search = Param_Search(search_type=search_type,
                                n_iter=n_iter,
                                splits=3,
                                n_repeats=1)
    
    models = [Model('elastic', params=1, extra_params={'tol': 1e-3}, scope=str(i+1))
              for i in range(len(parcels))]

    stacking_ensemble = Ensemble(obj = "stacking regressor",
                                 models = models,
                                 cv_splits = CV_Splits(cv=cv, splits=3, n_repeats=1),
                                 base_model = Model('ridge', params=1),
                                 param_search=param_search)

    loaders = [get_loader(parcel, str(i+1)) for i, parcel in enumerate(parcels)]
               
    pipeline = Model_Pipeline(imputers=None,
                              loaders=loaders,
                              model=stacking_ensemble)
    
    return pipeline


def evaluate(pipeline, base_dtype='float32', target=0):
    
    return ML.Evaluate(pipeline,
                       Problem_Spec(target=target),
                       splits=5,
                       n_repeats=1,
                       cv=CV(groups='rel_family_id'),
                       only_fold=0,
                       base_dtype=base_dtype)

In [5]:
def eval_choice(choices, search_type='RandomSearch', n_iter=180):
    
    param_search = Param_Search(search_type=search_type,
                                n_iter=n_iter,
                                splits=3,
                                n_repeats=1)
    
    # Init w/ whatever
    roi = SurfLabels(labels=choices[0])
    
    loader = Loader(roi,
                    params={'labels': ng.p.Choice(choices)},
                    cache_loc='/home/sage/cache/search_test')
    
    # Elastic net
    model = Model('elastic', params=1, extra_params={'tol': 1e-3})
    
    pipeline = Model_Pipeline(imputers=None,
                              loaders=loader,
                              model=model,
                              scalers=Scaler('robust'),
                              param_search=param_search)
    
    # Eval
    evaluate(pipeline)

In [6]:
ML = Load('../data/Base_consol.ML')

ML object loaded from save!


In [8]:
ML.all_data

Unnamed: 0_level_0,consolidated,rel_family_id_Strat,anthro_height_calc,anthro_weight_calc,anthro_waist_cm,cbcl_scr_syn_rulebreak_r,interview_age,pea_wiscv_trs,neighb_phenx_ss_mean_p,macvs_ss_r_p,...,devhx_18_mnths_breast_fed_p_binary,devhx_distress_at_birth_binary,devhx_mother_probs_binary,devhx_ss_alcohol_avg_p_binary,devhx_ss_marijuana_amt_p_binary,screentime_week_p_binary,screentime_weekend_p_binary,ksads_adhd_composite_binary,ksads_bipolar_composite_binary,ksads_OCD_composite_binary
src_subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV003RTV85,3052,7326,56.5,93.000000,31.00,1.0,131.0,21.0,5.000000,4.14,...,1,1,1,0,0,0,0,0,0,0
NDAR_INV007W6H7B,492,3976,56.5,82.800000,26.75,0.0,126.0,18.0,4.666667,2.00,...,1,1,0,0,0,0,0,0,0,0
NDAR_INV00BD7VDC,7764,3143,57.5,76.800000,23.50,0.0,112.0,21.0,5.000000,1.29,...,1,0,0,1,0,0,0,1,0,1
NDAR_INV00CY2MDM,5921,4548,56.5,91.500000,30.00,3.0,130.0,16.0,3.666667,3.86,...,0,0,0,0,0,0,0,1,0,0
NDAR_INV00HEV6HB,6819,1937,57.3,70.866667,28.00,2.0,124.0,13.0,3.000000,4.57,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVZZLZCKAY,2661,7843,59.5,123.000000,31.00,2.0,110.0,15.0,2.666667,1.00,...,0,0,0,1,0,0,0,1,0,1
NDAR_INVZZNX6W2P,6203,3132,56.0,73.000000,26.00,3.0,131.0,,5.000000,1.29,...,0,0,1,0,0,0,0,0,0,0
NDAR_INVZZZ2ALR6,3739,5912,54.7,59.500000,19.00,0.0,120.0,23.0,4.000000,1.71,...,1,0,0,1,0,0,0,1,0,0
NDAR_INVZZZNB0XC,5571,5599,49.0,63.000000,25.00,0.0,108.0,12.0,2.000000,4.29,...,0,0,1,0,0,1,0,1,1,0


In [10]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 16

base_param_search =\
        Param_Search(search_type='RandomSearch', n_iter=60,
                     splits=3, n_repeats=1)

    
feat_selector =\
            [Feat_Selector('variance threshold'),
             Feat_Selector('univariate selection', params=2)]

# For svm param search add special search only parameter
base_param_search.search_only_params = {'svm classifier__probability': False}

# Create nested SVM
nested_svm_pipe =\
    Model_Pipeline(imputers=None,
                    feat_selectors=feat_selector,
                    model=Model('svm', params=1, extra_params={'cache_size': 2000}),
                    param_search=base_param_search)

model = Model(nested_svm_pipe)

loader = get_loader('random_1000_0', scope='all')

pipeline = Model_Pipeline(imputers=None,
                          loaders=loader,
                          model=model)
evaluate(pipeline)

Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][AML object loaded from save!
problem_spec problem_type ==  default, setting as: regression
problem_spec scorer ==  default, setting as: ['explained_variance', 'neg_mean_squared_error']
Model_Pipeline
--------------
loaders=\
Loader(cache_loc='/home/sage/cache/random_1000_0',
       obj=SurfLabels(labels='../extra_random_parcels/random_1000_0.npy'),
       scope='all')

model=\
Model(obj=Model_Pipeline(feat_selectors=[Feat_Selector(obj='variance '
                                                           'threshold'),
                                         Feat_Selector(obj='univariate '
                                                           'selection',
                                                       params=2)],
                         imputers=None,
                         model=Model(extra_params={'cache_size': 2000},
                                     obj='svm', params=1),


KeyboardInterrupt: 

In [None]:
ML.evaluator.model['Custom 1'].param_distributions

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 16

base_param_search =\
        Param_Search(search_type='RandomSearch', n_iter=60,
                     splits=3, n_repeats=1)

    
feat_selector =\
            [Feat_Selector('variance threshold'),
             Feat_Selector('univariate selection', params=1)]

# For svm param search add special search only parameter
base_param_search.search_only_params = {'svm classifier__probability': False}

# Create nested SVM
nested_svm_pipe =\
    Model_Pipeline(imputers=None,
                    feat_selectors=feat_selector,
                    model=Model('svm', params=1, extra_params={'cache_size': 2000}),
                    param_search=base_param_search)

model = Model(nested_svm_pipe)

loader = get_loader('random_1000_0', scope='all')

pipeline = Model_Pipeline(imputers=None,
                          loaders=loader,
                          model=model)
evaluate(pipeline)

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 16

base_param_search =\
        Param_Search(search_type='RandomSearch', n_iter=60,
                     splits=3, n_repeats=1)

    
feat_selector =\
            [Feat_Selector('variance threshold'),
             Feat_Selector('univariate selection', params={'percentile': 10})]

# For svm param search add special search only parameter
base_param_search.search_only_params = {'svm classifier__probability': False}

# Create nested SVM
nested_svm_pipe =\
    Model_Pipeline(imputers=None,
                    feat_selectors=feat_selector,
                    model=Model('svm', params=1, extra_params={'cache_size': 2000}),
                    param_search=base_param_search)

model = Model(nested_svm_pipe)

loader = get_loader('random_1000_0', scope='all')

pipeline = Model_Pipeline(imputers=None,
                          loaders=loader,
                          model=model)
evaluate(pipeline)

### Parcel as a hyper-parameter

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8
choices = ['../extra_random_parcels/random_300_' + str(i) + '.npy' for i in range(10)]

In [None]:
ML = Load('../data/Base_consol.ML')

param_search = Param_Search(search_type='RandomSearch',
                            n_iter=60,
                            splits=3,
                            n_repeats=1)

roi = SurfLabels(labels='../extra_random_parcels/random_100_0.npy')
loader = Loader(roi, params={'labels': ng.p.Choice(choices)}, cache_loc='/home/sage/cache/search_test')


model = Model('elastic', params=1, extra_params={'tol': 1e-3}, param_search=param_search)

pipeline = Model_Pipeline(loaders=loader,
                          model=model,
                          scalers=Scaler('robust'),
                          param_search=Param_Search('grid'))

evaluate(pipeline)

In [None]:
eval_choice(choices, search_type='RandomSearch', n_iter=360)

In [None]:
eval_choice(choices, search_type='TwoPointsDE', n_iter=360)

In [None]:
stop

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8
parcels = ['random_100_' + str(i) for i in range(10)]

In [None]:
pipeline = get_voting(parcels)
results = evaluate(pipeline)

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

In [None]:
parcels = ['random_200_' + str(i) for i in range(10)]
pipeline = get_voting(parcels)
results = evaluate(pipeline)

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

In [None]:
parcels = ['random_300_' + str(i) for i in range(10)]
pipeline = get_voting(parcels)
results = evaluate(pipeline)

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8

parcels = ['random_' + str(i+1) + '00_0' for i in range(10)]

In [None]:
pipeline = get_voting(parcels)
results = evaluate(pipeline)

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

Keep for now~~~

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8

ML.all_data['1'] = ML.all_data['consolidated'].copy()
ML.all_data['2'] = ML.all_data['consolidated'].copy()
ML.all_data['3'] = ML.all_data['consolidated'].copy()
ML.all_data.drop('consolidated', axis=1, inplace=True)
ML.Data_Scopes.data_keys = ['1', '2', '3']

pipeline = get_stacking_alt(parcels, search_type='RandomSearch', n_iter=180)
results = evaluate(pipeline)