In [1]:
from BPt import *
from sklearn.cluster import FeatureAgglomeration
from nilearn.surface import load_surf_data
from nilearn.surface import load_surf_mesh
import numpy as np
import nibabel as nib
import nilearn.datasets
import os
import networkx as nx
from sklearn.base import BaseEstimator, TransformerMixin

In [2]:
def get_conn(loc):
    
    mesh = load_surf_mesh(loc)[1]
    G = nx.Graph()
    
    for tri in mesh:
        G.add_edge(tri[0], tri[1])
        G.add_edge(tri[0], tri[2])
        G.add_edge(tri[1], tri[2])
        
    return nx.adjacency_matrix(G)

class SplitFeatureAgglomeration(FeatureAgglomeration):
        
    def fit(self, X, y=None, **params):
        
        X = X.reshape(X.shape[0], X.shape[1] // 4, 4)
        self.lhs, self.rhs = [], []
        for i in range(4):
        
            self.lhs.append(FeatureAgglomeration(
                **self.get_params()).fit(X=X[:,:32492,i], y=y, **params))
            self.rhs.append(FeatureAgglomeration(
                **self.get_params()).fit(X=X[:,32492:,i], y=y, **params))
        
    def transform(self, X):
        
        X = X.reshape(X.shape[0], X.shape[1] // 4, 4)
        
        X_trans_list = []
        for i in range(4):
            X_trans_list.append(self.lhs[i].transform(X[:,:32492,i]))
            X_trans_list.append(self.rhs[i].transform(X[:,32492:,i]))
            
        X_trans = np.hstack(X_trans_list)
            
        return X_trans

In [3]:
standard_mesh_loc = '../raw/standard_mesh_atlases/'
loc = os.path.join(standard_mesh_loc, 'L.sphere.32k_fs_LR.surf.gii')
con = get_conn(loc)

ML = Load('../data/Base_consol.ML')
ML.log_dr = None
ML.n_jobs = 1

ML object loaded from save!


In [4]:
loader = Loader('identity')

feat_agg = SplitFeatureAgglomeration(connectivity=con, n_clusters=50, compute_full_tree=True, linkage='ward')
transformer = Transformer(feat_agg)

pipeline = Model_Pipeline(loaders=loader,
                          imputers=None,
                          transformers=transformer)

ps = Problem_Spec(target=0)

Model(obj='ridge')
Passed default model, setting to: None


In [None]:
results = ML.Evaluate(pipeline, ps, return_models=True, splits=5, n_repeats=1)

Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/5 [00:00<?, ?it/s][A

problem_spec problem_type ==  default, setting as: regression
problem_spec scorer ==  default, setting as: ['explained_variance', 'neg_mean_squared_error']
Model_Pipeline
--------------
loaders=\
Loader(obj='identity')

transformers=\
Transformer(obj=SplitFeatureAgglomeration(compute_full_tree=True,
                                          connectivity=<32492x32492 sparse matrix of type '<class 'numpy.longlong'>'
	with 194940 stored elements in Compressed Sparse Row format>,
                                          n_clusters=50))

model=\
Model(obj='ridge')

param_search=\
None



Problem_Spec
------------
problem_type = regression
target = anthro_height_calc
scorer = ['explained_variance', 'neg_mean_squared_error']
weight_scorer = False
scope = all
subjects = all
len(subjects) = 9432 (before overlap w/ train/test subjects)
n_jobs = 1
random_state = 5

Evaluate Params
---------------
splits = 5
n_repeats = 1
cv = default
train_subjects = train
feat_importances = None
len(train_subje

Repeats:   0%|          | 0/1 [00:00<?, ?it/s]
Folds:   0%|          | 0/5 [00:00<?, ?it/s][A

Train shape: (7539, 2)
Val/Test shape: (1885, 2)
Making predictions for additional target NaN subjects: 2
[Pipeline] .......... (step 1 of 3) Processing identity, total= 1.2min
[Pipeline] .......... (step 2 of 3) Processing Custom 0, total= 2.9min
[Pipeline] ... (step 3 of 3) Processing ridge regressor, total=   0.1s
Time Elapsed: 00:04:28
val explained_variance: 0.04249796872170497
val neg_mean_squared_error: -9.661683069645614

Repeat: 1/1 Fold: 2/5


Repeats:   0%|          | 0/1 [04:28<?, ?it/s]
Folds:  20%|██        | 1/5 [04:28<17:53, 268.44s/it][A

Train shape: (7538, 2)
Val/Test shape: (1886, 2)
Making predictions for additional target NaN subjects: 1
[Pipeline] .......... (step 1 of 3) Processing identity, total= 1.1min


Could make a custom transformer, that builds in the loading that would allow caching the full X input, as Data Files, and load the clustered... OR... skip altogether for this paper.