In [1]:
from BPt import BPt_ML, ProblemSpec, ParamSearch, Loader, Pipe, Model, 
import nibabel as nib
import numpy as np
import pandas as pd
import os
import nevergrad as ng
from BPt.extensions import SurfLabels, SingleConnectivityMeasure, ThresholdNetworkMeasures

In [2]:
def my_load_func(loc):
    data = pd.read_csv(loc, sep='\t', header=None)
    data=data.drop(data.columns[0], axis=1)
    return np.array(data)

def file_to_subject_func(file):
    subject = file.split('/')[-1].split('_')[0]
    return subject

ML = BPt_ML(log_dr=None, verbose=False)

# This is data from the developing Human Connectome Project
# the text files represent derived SST volumetric timeseries ROIs
data_loc = '/home/sage/Downloads/TS/'
files = {'run1': [os.path.join(data_loc, f) for f in os.listdir(data_loc) if '_01.txt' in f]}

file_to_subject = file_to_subject_func

ML.Load_Data_Files(files = files,
                   file_to_subject = file_to_subject,
                   clear_existing=True,
                   load_func=my_load_func)

copy = ML.data.copy()
copy['target'] = np.random.random(len(ML.data))

ML.Load_Targets(df=copy,
                col_name=['target'],
                data_type='f',
                clear_existing=True)

ML.Train_Test_Split(test_size=0.1)

INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [3]:
problem_spec = ProblemSpec(problem_type = 'regression',
                            n_jobs=16, random_state=2)

connectivity = SingleConnectivityMeasure(vectorize=False, kind='correlation', discard_diagonal=True)

nets_params= {'threshold': ng.p.Choice([.1,.2,.3,.4,.5,.6,.7,.8,.9])}

nets = ThresholdNetworkMeasures(threshold=.1,
                threshold_method='density',
                to_compute=['avg_degree','avg_cluster'])

loader = Loader(obj = Pipe([connectivity, nets]),
                params = [0, nets_params],
                cache_loc = '/home/sage/test')

dt = Model('dt regressor', params=1)

search=ParamSearch(search_type='RandomSearch', n_iter=10)

model_pipeline = (loaders=loader,
                                scalers=None,
                                model=dt,
                                param_search=search)

results = ML.Evaluate(model_pipeline, problem_spec, n_repeats=1, splits=2)

Passed default imputers, setting to: [Imputer(obj='mean', scope='float'), Imputer(obj='median', scope='cat')]


HBox(children=(HTML(value='Repeats'), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value='Folds'), FloatProgress(value=0.0, max=2.0), HTML(value='')))



