In [1]:
from BPt import BPt_ML, Problem_Spec, Param_Search, Loader, Pipe, Model, Model_Pipeline
import nibabel as nib
import numpy as np
import pandas as pd
import os
import nevergrad as ng
from BPt.extensions import SurfLabels, Connectivity, Networks

In [2]:
def my_load_func(loc):
    data = pd.read_csv(loc, sep='\t', header=None)
    data=data.drop(data.columns[0], axis=1)
    return np.array(data)

def file_to_subject_func(file):
    subject = file.split('/')[-1].split('_')[0]
    return subject

ML = BPt_ML(log_dr=None, verbose=False)

# This is data from the developing Human Connectome Project
# the text files represent derived SST volumetric timeseries ROIs
data_loc = '/home/sage/Downloads/TS/'
files = {'run1': [os.path.join(data_loc, f) for f in os.listdir(data_loc) if '_01.txt' in f]}

file_to_subject = file_to_subject_func

ML.Load_Data_Files(files = files,
                   file_to_subject = file_to_subject,
                   clear_existing=True,
                   load_func=my_load_func)

copy = ML.data.copy()
copy['target'] = np.random.random(len(ML.data))

ML.Load_Targets(df=copy,
                col_name=['target'],
                data_type='f',
                clear_existing=True)

ML.Train_Test_Split(test_size=0.1)

In [3]:
problem_spec = Problem_Spec(problem_type = 'regression',
                            n_jobs=16, random_state=2)

connectivity = Connectivity(vectorize=False, kind='correlation', discard_diagonal=True)

nets_params= {'threshold': ng.p.Choice([.1,.2,.3,.4,.5,.6,.7,.8,.9])}

nets = Networks(threshold=.1,
                threshold_method='density',
                to_compute=['avg_degree','avg_cluster'])

loader = Loader(obj = Pipe([connectivity, nets]),
                params = [0, nets_params],
                cache_loc = '/home/sage/test')

dt = Model('dt regressor', params=1)

search=Param_Search(search_type='RandomSearch', n_iter=10)

model_pipeline = Model_Pipeline(loaders=loader,
                                scalers=None,
                                model=dt,
                                param_search=search)

results = ML.Evaluate(model_pipeline, problem_spec, n_repeats=1, splits=2)

[Imputer(obj='mean', scope='float'), Imputer(obj='median', scope='cat')]
Passed default imputers, setting to: None


HBox(children=(FloatProgress(value=0.0, description='Repeats', max=1.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Folds', max=2.0, style=ProgressStyle(description_width='i…



