In [1]:
import numpy as np
import polpo.preprocessing.dict as ppdict
import polpo.preprocessing.pd as ppd
from polpo.model_eval import (
    MeshEuclideanR2Score,
    MeshR2Score,
    MultiEvaluator,
    OlsPValues,
    PcaEvaluator,
    R2Score,
    ReconstructionError,
    ResultsExtender,
    VertexReconstructionError,
    collect_eval_results,
)
from polpo.models import ObjectRegressor, SupervisedEmbeddingRegressor
from polpo.preprocessing import (
    PartiallyInitializedStep,
)
from polpo.preprocessing.learning import DictsToXY
from polpo.preprocessing.load.pregnancy import (
    DenseMaternalCsvDataLoader,
    DenseMaternalMeshLoader,
)
from polpo.preprocessing.mesh.conversion import PvFromData
from polpo.preprocessing.mesh.io import FreeSurferReader
from polpo.preprocessing.mesh.registration import PvAlign
from polpo.sklearn.adapter import AdapterPipeline, EvaluatedModel
from polpo.sklearn.mesh import BiMeshesToVertices
from polpo.sklearn.np import BiFlattenButFirst
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import FunctionTransformer, StandardScaler



## Load data

In [2]:
subject_id = "01"

# TODO: per structure
struct = "Hipp"
left = True

Weeks.

In [3]:
pilot = subject_id == "01"

pipe = DenseMaternalCsvDataLoader(pilot=pilot, subject_id=subject_id)

df = pipe()

INFO: Data has already been downloaded... using cached file ('/home/luisfpereira/.herbrain/data/maternal/raw/28Baby_Hormones.csv').


In [4]:
session_selector = ppd.DfIsInFilter("stage", ["post"], negate=True)


predictor_selector = (
    session_selector + ppd.ColumnsSelector("gestWeek") + ppd.SeriesToDict()
)

x_dict = predictor_selector(df)

Meshes.

In [5]:
file_finder = DenseMaternalMeshLoader(
    subject_id=subject_id, as_dict=True, left=left, struct=struct, derivative="enigma"
)

mesh_reader = ppdict.DictMap(FreeSurferReader() + PvFromData())

prep_pipe = PartiallyInitializedStep(
    Step=lambda **kwargs: ppdict.DictMap(PvAlign(**kwargs)),
    _target=lambda meshes: meshes[list(meshes.keys())[0]],
    max_iterations=500,
)

pipe = file_finder + mesh_reader + prep_pipe

meshes = pipe()

## Stats (PCA)

In [6]:
pca = EvaluatedModel(
    PCA(n_components=4),
    MultiEvaluator(
        [
            PcaEvaluator(),
            ReconstructionError(),
            VertexReconstructionError(prefix="vertex"),
        ]
    ),
)

objs2y = AdapterPipeline(
    steps=[
        BiMeshesToVertices(index=0),
        FunctionTransformer(func=np.stack),
        BiFlattenButFirst(),
        StandardScaler(with_std=False),
        ("y-pca", pca),
    ],
)

linear_model = EvaluatedModel(
    LinearRegression(), MultiEvaluator([OlsPValues(), R2Score()])
)

obj_model = EvaluatedModel(
    ObjectRegressor(linear_model, objs2y),
    MultiEvaluator(
        [MeshEuclideanR2Score(), MeshR2Score()],
        extender=ResultsExtender(),
    ),
)

In [7]:
obj_model.named_steps

{'pre': AdapterPipeline(steps=[('step_0',
                         TransformerAdapter(step=<built-in function asarray>)),
                        ('step_1',
                         TransformerAdapter(step=<function atleast_2d at 0x76ff32d8dab0>))]),
 'model': ObjectBasedTransformedTargetRegressor(check_inverse=False,
                                       regressor=EvaluatedModel(evaluator=<polpo.model_eval.MultiEvaluator object at 0x76ff5016b740>,
                                                                model=LinearRegression()),
                                       transformer=AdapterPipeline(steps=[('step_0',
                                                                           BiMeshesToVertices()),
                                                                          ('step_1',
                                                                           FunctionTransformer(func=<function stack at 0x76ff32dc47f0>)),
                                                 

In [8]:
dataset_pipe = DictsToXY()

X, meshes_ = dataset_pipe((x_dict, meshes))

X.shape, len(meshes_)


obj_model.fit(X, meshes_)

0,1,2
,model,ObjectRegress...onents=4)))]))
,evaluator,<polpo.model_...x76fde387a720>

0,1,2
,steps,"[('step_0', ...), ('step_1', ...)]"

0,1,2
,step,<built-in function asarray>

0,1,2
,step,<function atl...x76ff32d8dab0>

0,1,2
,regressor,EvaluatedMode...rRegression())
,transformer,AdapterPipeli...ponents=4)))])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,index,0

0,1,2
,func,<function sta...x76ff32dc47f0>
,inverse_func,
,validate,False
,accept_sparse,False
,check_inverse,True
,feature_names_out,
,kw_args,
,inv_kw_args,

0,1,2
,copy,True
,with_mean,True
,with_std,False

0,1,2
,model,PCA(n_components=4)
,evaluator,<polpo.model_...x76fde387ae70>

0,1,2
,n_components,4
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,


In [9]:
eval_results = collect_eval_results(obj_model, unnest=True, outer_key="obj_regr")

print(list(eval_results.keys()))

['obj_regr', 'obj_regr/model/regr', 'obj_regr/model/transformer/y-pca']


In [10]:
eval_results["obj_regr"]

{'vertexwise_r2': array([-0.03228643, -0.05362358, -0.27612588, ..., -0.09031338,
        -0.10056753, -0.05165041], shape=(2502,)),
 'featurewise_r2': array([0.05660715, 0.1512915 , 0.0363127 , ..., 0.08767026, 0.1592111 ,
        0.30784691], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-0.17853002547948998),
 'vertexwise_r2-max': np.float64(0.33028468177563786),
 'vertexwise_r2-min': np.float64(-0.4521713731169097),
 'featurewise_r2-mean': np.float64(0.09335550296759608),
 'featurewise_r2-max': np.float64(0.7197365633275299),
 'featurewise_r2-min': np.float64(-0.07354767878398749)}

In [11]:
eval_results["obj_regr/model/regr"]

{'mse': array([150.60038666,  45.37364532,  41.45630666,  32.3456453 ]),
 'res_var': array([168.31807921,  50.71172124,  46.33351921,  36.15101534]),
 'std_err': array([[0.24474753],
        [0.13434055],
        [0.12841051],
        [0.11342615]]),
 't': array([[0.27230154],
        [3.33566207],
        [2.02617786],
        [1.47878226]]),
 'pvals': array([[0.78866988],
        [0.0039165 ],
        [0.05873375],
        [0.15748895]]),
 'adj-pvals': array([[1.        ],
        [0.01566602],
        [0.234935  ],
        [0.6299558 ]]),
 'r2': array([0.00434271, 0.39559083, 0.19451881, 0.11397405])}

In [12]:
eval_results["obj_regr/model/transformer/y-pca"]

{'expl_var': array([159.66043428,  79.241689  ,  54.32707248,  38.53456623]),
 'expl_var_ratio': array([0.30365417, 0.15070778, 0.10332329, 0.07328792]),
 'expl_var_ratio-cum': array([0.30365417, 0.45436195, 0.55768524, 0.63097316]),
 'featurewise_rec_error': array([0.35178353, 1.08138066, 0.34126734, ..., 0.65839818, 0.47742466,
        0.41839553], shape=(7506,)),
 'rec_error_sum': np.float64(3492.5973145702014),
 'rec_error_mse': np.float64(0.024489862948730148),
 'vertex-vertexwise_rec_error': array([1.77443153, 1.74282216, 1.90990074, ..., 1.60032016, 1.58820905,
        1.55421837], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(3492.597314570202),
 'vertex-rec_error_mse': np.float64(0.07346958884619045)}

## Stats (PLS)

In [13]:
objs2y = AdapterPipeline(
    steps=[
        BiMeshesToVertices(index=0),
        FunctionTransformer(func=np.stack),
        BiFlattenButFirst(),
    ],
)

model = SupervisedEmbeddingRegressor(
    EvaluatedModel(
        PLSRegression(n_components=2),
        MultiEvaluator(
            [
                ReconstructionError(),
                VertexReconstructionError(prefix="vertex"),
            ]
        ),
    ),
    EvaluatedModel(
        LinearRegression(),
        MultiEvaluator([OlsPValues(), R2Score()]),
    ),
)

obj_model = EvaluatedModel(
    ObjectRegressor(model, objs2y),
    MultiEvaluator(
        [MeshEuclideanR2Score(), MeshR2Score()],
        extender=ResultsExtender(),
    ),
)

In [14]:
dataset_pipe = DictsToXY()

X, meshes_ = dataset_pipe((x_dict, meshes))

X.shape, len(meshes_)


obj_model.fit(X, meshes_)

0,1,2
,model,ObjectRegress...ButFirst())]))
,evaluator,<polpo.model_...x76fde38e3770>

0,1,2
,steps,"[('step_0', ...), ('step_1', ...)]"

0,1,2
,step,<built-in function asarray>

0,1,2
,step,<function atl...x76ff32d8dab0>

0,1,2
,regressor,SupervisedEmb...Regression()))
,transformer,AdapterPipeli...nButFirst())])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,n_components,2
,scale,True
,max_iter,500
,tol,1e-06
,copy,True

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,index,0

0,1,2
,func,<function sta...x76ff32dc47f0>
,inverse_func,
,validate,False
,accept_sparse,False
,check_inverse,True
,feature_names_out,
,kw_args,
,inv_kw_args,


In [15]:
eval_results = collect_eval_results(obj_model, unnest=True, outer_key="obj_regr")

print(list(eval_results.keys()))

['obj_regr', 'obj_regr/model/regr/regr', 'obj_regr/model/regr/encoder']


In [16]:
eval_results["obj_regr"]

{'vertexwise_r2': array([-0.01845181, -0.03854586, -0.27739872, ..., -0.10147212,
        -0.11228732, -0.0597578 ], shape=(2502,)),
 'featurewise_r2': array([0.05092154, 0.16428687, 0.05251928, ..., 0.074925  , 0.15747433,
        0.30590612], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-0.18043143604330505),
 'vertexwise_r2-max': np.float64(0.3319811967352385),
 'vertexwise_r2-min': np.float64(-0.4589236191509354),
 'featurewise_r2-mean': np.float64(0.09320214015000124),
 'featurewise_r2-max': np.float64(0.7365367160291725),
 'featurewise_r2-min': np.float64(-0.04670595673033584)}

In [17]:
eval_results["obj_regr/model/regr/regr"]

{'mse': array([198.06906719, 487.14422257]),
 'res_var': array([221.37131039, 544.45530758]),
 'std_err': array([[0.28068138],
        [0.4401838 ]]),
 't': array([[7.83323888],
        [1.45359719]]),
 'pvals': array([[4.85655816e-07],
        [1.64269951e-01]]),
 'adj-pvals': array([[9.71311631e-07],
        [3.28539903e-01]]),
 'r2': array([0.78305156, 0.11055046])}

In [18]:
eval_results["obj_regr/model/regr/encoder"]

{'featurewise_rec_error': array([0.39485296, 3.14490972, 0.67029197, ..., 1.21611889, 0.95001583,
        0.47812717], shape=(7506,)),
 'rec_error_sum': np.float64(6819.855029070748),
 'rec_error_mse': np.float64(0.04782037548256656),
 'vertex-vertexwise_rec_error': array([4.21005465, 4.17553701, 5.68375541, ..., 2.58543428, 2.65112276,
        2.64426189], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(6819.855029070746),
 'vertex-rec_error_mse': np.float64(0.14346112644769965)}