In [1]:
import pandas as pd
import os
import numpy as np
import time

from BPt import *
from BPt.extensions import SurfLabels, SurfMaps
from sklearn.linear_model import ElasticNetCV

import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [2]:
def get_l_model(parcel):
    
    param_search = Param_Search(search_type='TwoPointsDE',
                                n_iter=180,
                                splits=.25,
                                n_repeats=1,
                                cv=CV(groups='rel_family_id'))
    

    model = Model('light gbm', params=1, param_search=param_search)

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    loader = Loader(rois, cache_loc='/home/sage/cache/'+parcel)

    pipeline = Model_Pipeline(imputers = None,
                              loaders = loader,
                              scalers = Scaler('robust'),
                              model = model)

    return Model(pipeline)

In [3]:
def get_p_model(parcel):
    
    param_search = Param_Search(search_type='RandomSearch',
                                n_iter=60,
                                splits=3,
                                n_repeats=1,
                                cv=CV(groups='rel_family_id'))

    elastic = Model('elastic', params=1,
                    param_search=param_search,
                    extra_params={'tol': 1e-3})

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    loader = Loader(rois, cache_loc='/home/sage/cache/'+parcel)

    pipeline = Model_Pipeline(imputers = None,
                              loaders = loader,
                              scalers = Scaler('robust'),
                              model = elastic)

    return Model(pipeline)

In [4]:
def get_voting(parcels):
    
    models = [get_p_model(parcel) for parcel in parcels]
    
    voting_ensemble = Ensemble(obj = "voting regressor",
                               models = models,
                               n_jobs_type = 'models')
    
    pipeline = Model_Pipeline(imputers=None,
                              model=voting_ensemble)
    
    return pipeline

def get_stacking(parcels):
    
    stack_param_search = Param_Search(search_type='RandomSearch',
                                      n_iter=60,
                                      splits=3,
                                      n_repeats=1)
    
    stack_model = Model('ridge', params=1, param_search=stack_param_search)

    stack_splits = CV_Splits(cv=CV(groups='rel_family_id'), splits=3, n_repeats=1)
    
    models = [get_p_model(parcel) for parcel in parcels]

    stacking_ensemble = Ensemble(obj = "stacking regressor",
                                 models = models,
                                 cv_splits = stack_splits,
                                 base_model = stack_model,
                                 n_jobs_type = 'models')

    pipeline = Model_Pipeline(imputers=None,
                              model=stacking_ensemble)
    
    return pipeline

def get_loader(parcel, scope):

    rois = SurfLabels(labels = '../extra_random_parcels/' + parcel + '.npy')
    return Loader(rois, cache_loc='/home/sage/cache/'+parcel, scope=scope)

def get_stacking_alt(parcels, search_type='RandomSearch', n_iter=60):
    
    
    cv = CV(groups='rel_family_id')
    
    param_search = Param_Search(search_type=search_type,
                                n_iter=n_iter,
                                splits=3,
                                n_repeats=1)
    
    models = [Model('elastic', params=1, extra_params={'tol': 1e-3}, scope=str(i+1))
              for i in range(len(parcels))]

    stacking_ensemble = Ensemble(obj = "stacking regressor",
                                 models = models,
                                 cv_splits = CV_Splits(cv=cv, splits=3, n_repeats=1),
                                 base_model = Model('ridge', params=1),
                                 param_search=param_search)

    loaders = [get_loader(parcel, str(i+1)) for i, parcel in enumerate(parcels)]
               
    pipeline = Model_Pipeline(imputers=None,
                              loaders=loaders,
                              model=stacking_ensemble)
    
    return pipeline


def evaluate(pipeline, base_dtype='float32'):
    
    return ML.Evaluate(pipeline,
                       Problem_Spec(target=0),
                       splits=5,
                       n_repeats=1,
                       cv=CV(groups='rel_family_id'),
                       only_fold=0,
                       base_dtype=base_dtype)

In [5]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8
parcels = ['random_100_' + str(i) for i in range(10)]

ML object loaded from save!


In [6]:
pipeline = get_voting(parcels)
results = evaluate(pipeline)

problem_spec problem_type ==  default, setting as: regression
problem_spec scorer ==  default, setting as: ['explained_variance', 'neg_mean_squared_error']
Model_Pipeline
--------------
model=\
Ensemble(models=[Model(obj=Model_Pipeline(imputers=None,
                                          loaders=Loader(cache_loc='/home/sage/cache/random_100_0',
                                                         obj=SurfLabels(labels='../extra_random_parcels/random_100_0.npy')),
                                          model=Model(extra_params={'tol': 0.001},
                                                      obj='elastic',
                                                      param_search=Param_Search(cv=CV(groups='rel_family_id'),
                                                                                n_iter=60),
                                                      params=1),
                                          scalers=Scaler(obj='robust'))),
                 Model(obj...


Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A


Repeat: 1/1 Fold: 1/1


Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A

Train shape: (7528, 2)
Val/Test shape: (1896, 2)
Making predictions for additional target NaN subjects: 1
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   2.1s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  19.7s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.7s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  19.6s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.7s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  21.5s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.7s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, 

Repeats: 100%|██████████| 1/1 [03:45<00:00, 225.34s/it]

Folds: 100%|██████████| 1/1 [03:45<00:00, 225.34s/it][A


Validation Scores
_________________
Scorer:  explained_variance
Mean Validation score:  0.13684583130936823
Std in Validation score:  0.0

Scorer:  neg_mean_squared_error
Mean Validation score:  -8.43279246654004
Std in Validation score:  0.0






In [7]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

problem_spec problem_type ==  default, setting as: regression
problem_spec scorer ==  default, setting as: ['explained_variance', 'neg_mean_squared_error']
Model_Pipeline
--------------
model=\
Ensemble(base_model=Model(obj='ridge', param_search=Param_Search(n_iter=60),
                          params=1),
         cv_splits=CV_Splits(cv=CV(groups='rel_family_id')),
         models=[Model(obj=Model_Pipeline(imputers=None,
                                          loaders=Loader(cache_loc='/home/sage/cache/random_100_0',
                                                         obj=SurfLabels(labels='../extra_random_parcels/random_100_0.npy')),
                                          model=Model(extra_params={'tol': 0.001},
                                                      obj=...
                 Model(obj=Model_Pipeline(imputers=None,
                                          loaders=Loader(cache_loc='/home/sage/cache/random_100_9',
                                               

Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A


Repeat: 1/1 Fold: 1/1


Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A

Train shape: (7528, 2)
Val/Test shape: (1896, 2)
Making predictions for additional target NaN subjects: 1
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.7s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  19.4s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.7s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  19.9s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.7s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  22.3s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.8s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, 

[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  12.9s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.5s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  12.9s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.5s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  14.3s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.5s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  15.4s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.5s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.1s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  16.5s
[Pipeli

Repeats: 100%|██████████| 1/1 [11:20<00:00, 680.45s/it]

Folds: 100%|██████████| 1/1 [11:20<00:00, 680.45s/it][A


Validation Scores
_________________
Scorer:  explained_variance
Mean Validation score:  0.14536304698003222
Std in Validation score:  0.0

Scorer:  neg_mean_squared_error
Mean Validation score:  -8.343733407524642
Std in Validation score:  0.0






In [8]:
parcels = ['random_200_' + str(i) for i in range(10)]
pipeline = get_voting(parcels)
results = evaluate(pipeline)

problem_spec problem_type ==  default, setting as: regression
problem_spec scorer ==  default, setting as: ['explained_variance', 'neg_mean_squared_error']
Model_Pipeline
--------------
model=\
Ensemble(models=[Model(obj=Model_Pipeline(imputers=None,
                                          loaders=Loader(cache_loc='/home/sage/cache/random_200_0',
                                                         obj=SurfLabels(labels='../extra_random_parcels/random_200_0.npy')),
                                          model=Model(extra_params={'tol': 0.001},
                                                      obj='elastic',
                                                      param_search=Param_Search(cv=CV(groups='rel_family_id'),
                                                                                n_iter=60),
                                                      params=1),
                                          scalers=Scaler(obj='robust'))),
                 Model(obj...


Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A


Repeat: 1/1 Fold: 1/1


Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A

Train shape: (7528, 2)
Val/Test shape: (1896, 2)
Making predictions for additional target NaN subjects: 1
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=  21.5s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.3s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  32.9s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=  22.1s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.3s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  41.9s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=  22.2s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.2s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  41.5s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=  22.3s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.3s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, 

Repeats: 100%|██████████| 1/1 [11:16<00:00, 676.52s/it]

Folds: 100%|██████████| 1/1 [11:16<00:00, 676.52s/it][A


Validation Scores
_________________
Scorer:  explained_variance
Mean Validation score:  0.14257420139154486
Std in Validation score:  0.0

Scorer:  neg_mean_squared_error
Mean Validation score:  -8.376816396678711
Std in Validation score:  0.0






In [9]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

problem_spec problem_type ==  default, setting as: regression
problem_spec scorer ==  default, setting as: ['explained_variance', 'neg_mean_squared_error']
Model_Pipeline
--------------
model=\
Ensemble(base_model=Model(obj='ridge', param_search=Param_Search(n_iter=60),
                          params=1),
         cv_splits=CV_Splits(cv=CV(groups='rel_family_id')),
         models=[Model(obj=Model_Pipeline(imputers=None,
                                          loaders=Loader(cache_loc='/home/sage/cache/random_200_0',
                                                         obj=SurfLabels(labels='../extra_random_parcels/random_200_0.npy')),
                                          model=Model(extra_params={'tol': 0.001},
                                                      obj=...
                 Model(obj=Model_Pipeline(imputers=None,
                                          loaders=Loader(cache_loc='/home/sage/cache/random_200_9',
                                               

Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A


Repeat: 1/1 Fold: 1/1


Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A

Train shape: (7528, 2)
Val/Test shape: (1896, 2)
Making predictions for additional target NaN subjects: 1
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.6s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.3s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  34.0s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.5s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.3s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  42.2s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.5s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.2s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  41.2s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.6s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.3s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, 

[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  27.0s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.3s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.2s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  25.7s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.3s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.2s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  28.6s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.3s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.2s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  28.9s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=   0.3s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.2s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  26.4s
[Pipeli

Repeats: 100%|██████████| 1/1 [20:32<00:00, 1232.95s/it]

Folds: 100%|██████████| 1/1 [20:32<00:00, 1232.96s/it][A


Validation Scores
_________________
Scorer:  explained_variance
Mean Validation score:  0.14894385413099842
Std in Validation score:  0.0

Scorer:  neg_mean_squared_error
Mean Validation score:  -8.309845896233016
Std in Validation score:  0.0






In [None]:
parcels = ['random_300_' + str(i) for i in range(10)]
pipeline = get_voting(parcels)
results = evaluate(pipeline)

problem_spec problem_type ==  default, setting as: regression
problem_spec scorer ==  default, setting as: ['explained_variance', 'neg_mean_squared_error']
Model_Pipeline
--------------
model=\
Ensemble(models=[Model(obj=Model_Pipeline(imputers=None,
                                          loaders=Loader(cache_loc='/home/sage/cache/random_300_0',
                                                         obj=SurfLabels(labels='../extra_random_parcels/random_300_0.npy')),
                                          model=Model(extra_params={'tol': 0.001},
                                                      obj='elastic',
                                                      param_search=Param_Search(cv=CV(groups='rel_family_id'),
                                                                                n_iter=60),
                                                      params=1),
                                          scalers=Scaler(obj='robust'))),
                 Model(obj...


Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A


Repeat: 1/1 Fold: 1/1


Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/1 [00:00<?, ?it/s][A

Train shape: (7528, 2)
Val/Test shape: (1896, 2)
Making predictions for additional target NaN subjects: 1
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=  29.9s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.4s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  54.3s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=  30.9s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.4s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  54.9s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=  31.0s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.4s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, total=  55.7s
[Pipeline] .......... (step 1 of 3) Processing Custom 0, total=  30.7s
[Pipeline] ........ (step 2 of 3) Processing col_robust, total=   0.4s
[Pipeline]  (step 3 of 3) Processing elastic net regressor_NGSearchCV, 

In [None]:
pipeline = get_stacking(parcels)
results = evaluate(pipeline)

In [None]:
stop

Keep for now~~~

In [None]:
ML = Load('../data/Base_consol.ML')
ML.n_jobs = 8

ML.all_data['1'] = ML.all_data['consolidated'].copy()
ML.all_data['2'] = ML.all_data['consolidated'].copy()
ML.all_data['3'] = ML.all_data['consolidated'].copy()
ML.all_data.drop('consolidated', axis=1, inplace=True)
ML.Data_Scopes.data_keys = ['1', '2', '3']

pipeline = get_stacking_alt(parcels, search_type='RandomSearch', n_iter=180)
results = evaluate(pipeline)