In [1]:
import numpy as np
import polpo.preprocessing.dict as ppdict
import polpo.preprocessing.pd as ppd
from polpo.model_eval import (
    MeshEuclideanR2Score,
    MeshR2Score,
    MultiEvaluator,
    OlsPValues,
    PcaEvaluator,
    R2Score,
    ReconstructionError,
    ResultsExtender,
    ShapeCollector,
    VertexReconstructionError,
    collect_eval_results,
)
from polpo.models import ObjectRegressor, SupervisedEmbeddingRegressor
from polpo.preprocessing import (
    PartiallyInitializedStep,
)
from polpo.preprocessing.learning import DictsToXY
from polpo.preprocessing.load.pregnancy import (
    DenseMaternalCsvDataLoader,
    DenseMaternalMeshLoader,
)
from polpo.preprocessing.mesh.conversion import PvFromData
from polpo.preprocessing.mesh.io import FreeSurferReader
from polpo.preprocessing.mesh.registration import PvAlign
from polpo.sklearn.adapter import AdapterPipeline, EvaluatedModel
from polpo.sklearn.mesh import BiMeshesToVertices
from polpo.sklearn.np import BiFlattenButFirst
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import FunctionTransformer, StandardScaler



## Load data

In [2]:
subject_id = "01"

# TODO: per structure
struct = "Hipp"
left = True

Weeks.

In [3]:
pilot = subject_id == "01"

pipe = DenseMaternalCsvDataLoader(pilot=pilot, subject_id=subject_id)

df = pipe()

INFO: Data has already been downloaded... using cached file ('/home/luisfpereira/.herbrain/data/maternal/raw/28Baby_Hormones.csv').


In [4]:
session_selector = ppd.DfIsInFilter("stage", ["post"], negate=True)


predictor_selector = (
    session_selector + ppd.ColumnsSelector("gestWeek") + ppd.SeriesToDict()
)

x_dict = predictor_selector(df)

Meshes.

In [5]:
file_finder = DenseMaternalMeshLoader(
    subject_id=subject_id, as_dict=True, left=left, struct=struct, derivative="enigma"
)

mesh_reader = ppdict.DictMap(FreeSurferReader() + PvFromData())

prep_pipe = PartiallyInitializedStep(
    Step=lambda **kwargs: ppdict.DictMap(PvAlign(**kwargs)),
    _target=lambda meshes: meshes[list(meshes.keys())[0]],
    max_iterations=500,
)

pipe = file_finder + mesh_reader + prep_pipe

meshes = pipe()

## Stats (PCA)

In [6]:
pca = EvaluatedModel(
    PCA(n_components=4),
    MultiEvaluator(
        [
            PcaEvaluator(),
            ReconstructionError(),
            VertexReconstructionError(prefix="vertex"),
            ShapeCollector(),
        ]
    ),
)

objs2y = AdapterPipeline(
    steps=[
        BiMeshesToVertices(index=0),
        FunctionTransformer(func=np.stack),
        BiFlattenButFirst(),
        StandardScaler(with_std=False),
        ("y-pca", pca),
    ],
)

linear_model = EvaluatedModel(
    LinearRegression(), MultiEvaluator([OlsPValues(), R2Score(), ShapeCollector()])
)

model = EvaluatedModel(
    ObjectRegressor(linear_model, objs2y),
    MultiEvaluator(
        [MeshEuclideanR2Score(), MeshR2Score()],
        extender=ResultsExtender(),
    ),
)

model

0,1,2
,model,ObjectRegress...onents=4)))]))
,evaluator,<polpo.model_...x71af04979970>

0,1,2
,steps,"[('step_0', ...), ('step_1', ...)]"

0,1,2
,step,<built-in function asarray>

0,1,2
,step,<function atl...x71b068395ff0>

0,1,2
,regressor,EvaluatedMode...rRegression())
,transformer,AdapterPipeli...ponents=4)))])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,index,0

0,1,2
,func,<function sta...x71b0683a4bf0>
,inverse_func,
,validate,False
,accept_sparse,False
,check_inverse,True
,feature_names_out,
,kw_args,
,inv_kw_args,

0,1,2
,copy,True
,with_mean,True
,with_std,False

0,1,2
,model,PCA(n_components=4)
,evaluator,<polpo.model_...x71af049a98b0>

0,1,2
,n_components,4
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,


In [7]:
dataset_pipe = DictsToXY()

X, y = dataset_pipe((x_dict, meshes))

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    train_size=0.8,
    shuffle=False,
)


model.fit(X_train, y_train);

In [8]:
model.predict_eval(X_test, y_test);

In [9]:
eval_res_train = collect_eval_results(model, unnest=True, outer_key="obj_regr")

print(list(eval_res_train.keys()))

['obj_regr', 'obj_regr/model/regr', 'obj_regr/model/transformer/y-pca']


In [10]:
eval_res_test = collect_eval_results(
    model, unnest=True, outer_key="obj_regr", train=False
)

print(list(eval_res_test.keys()))

['obj_regr', 'obj_regr/model/regr', 'obj_regr/model/transformer/y-pca']


Eval results of full pipeline.

In [11]:
eval_res_train["obj_regr"]

{'vertexwise_r2': array([-0.07626004, -0.08528793, -0.31559072, ..., -0.11659424,
        -0.12178401, -0.07376704], shape=(2502,)),
 'featurewise_r2': array([0.07124301, 0.08158696, 0.02095568, ..., 0.05770743, 0.07815725,
        0.33783253], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-0.14526338604612252),
 'vertexwise_r2-max': np.float64(0.4178066143367052),
 'vertexwise_r2-min': np.float64(-0.5029289728511106),
 'featurewise_r2-mean': np.float64(0.12259985820474104),
 'featurewise_r2-max': np.float64(0.7506972073373914),
 'featurewise_r2-min': np.float64(-0.06094162565950367)}

In [12]:
eval_res_test["obj_regr"]

{'vertexwise_r2': array([-4.68838863, -2.45410471, -2.69875114, ..., -0.64134903,
        -0.80206404, -0.43824218], shape=(2502,)),
 'featurewise_r2': array([-0.60415621, -0.10245889, -5.46139403, ...,  0.07845252,
         0.08019206, -0.64427971], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-1.9081412927743833),
 'vertexwise_r2-max': np.float64(-0.08694855117109013),
 'vertexwise_r2-min': np.float64(-17.13388654056056),
 'featurewise_r2-mean': np.float64(-2.130184758116297),
 'featurewise_r2-max': np.float64(0.5754567452582793),
 'featurewise_r2-min': np.float64(-303.4245859860113)}

Eval results of inner regression.

In [13]:
eval_res_train["obj_regr/model/regr"]

{'mse': array([169.79004935,  46.96187643,  41.01027604,  33.37429034]),
 'res_var': array([195.9115954 ,  54.1867805 ,  47.31954928,  38.50879655]),
 'std_err': array([[0.38247402],
        [0.20114931],
        [0.18797157],
        [0.1695711 ]]),
 't': array([[0.36420616],
        [3.23860746],
        [2.59120045],
        [0.2280466 ]]),
 'pvals': array([[0.72155939],
        [0.00646853],
        [0.02237639],
        [0.82315772]]),
 'adj-pvals': array([[1.        ],
        [0.02587413],
        [0.08950555],
        [1.        ]]),
 'r2': array([0.01010049, 0.44653951, 0.34058085, 0.00398446]),
 'X-shape': (15, 1),
 'y-shape': (15, 4),
 'y_pred-shape': (15, 4)}

In [14]:
eval_res_test["obj_regr/model/regr"]

{'mse': array([ 68.14771819, 136.85642586,  70.72347099,  26.05570977]),
 'res_var': array([136.29543638, 273.71285173, 141.44694199,  52.11141954]),
 'std_err': array([[2.25724533],
        [3.19878979],
        [2.29950781],
        [1.39574011]]),
 't': array([[0.06171212],
        [0.20365316],
        [0.21181577],
        [0.02770581]]),
 'pvals': array([[0.95640443],
        [0.85746577],
        [0.85187585],
        [0.98041279]]),
 'adj-pvals': array([[1.],
        [1.],
        [1.],
        [1.]]),
 'r2': array([ -0.05978541,  -4.5278892 , -13.89800874,  -1.04718884]),
 'X-shape': (4, 1),
 'y-shape': (4, 4),
 'y_pred-shape': (4, 4)}

Eval results of PCA reconstruction.

In [15]:
eval_res_train["obj_regr/model/transformer/y-pca"]

{'expl_var': array([183.77411817,  90.91217384,  66.63376609,  35.90121535]),
 'expl_var_ratio': array([0.33087022, 0.16367991, 0.11996863, 0.06463719]),
 'expl_var_ratio-cum': array([0.33087022, 0.49455013, 0.61451876, 0.67915595]),
 'featurewise_rec_error': array([0.30704027, 0.78957371, 0.23944828, ..., 0.48069691, 0.27782666,
        0.36018969], shape=(7506,)),
 'rec_error_sum': np.float64(2494.874425509467),
 'rec_error_mse': np.float64(0.022158934412554107),
 'vertex-vertexwise_rec_error': array([1.33606225, 1.33088348, 1.70634746, ..., 1.22835423, 1.10048143,
        1.11871325], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(2494.874425509467),
 'vertex-rec_error_mse': np.float64(0.06647680323766232),
 'X-shape': (15, 7506)}

In [16]:
eval_res_test["obj_regr/model/transformer/y-pca"]

{'expl_var': array([183.77411817,  90.91217384,  66.63376609,  35.90121535]),
 'expl_var_ratio': array([0.33087022, 0.16367991, 0.11996863, 0.06463719]),
 'expl_var_ratio-cum': array([0.33087022, 0.49455013, 0.61451876, 0.67915595]),
 'featurewise_rec_error': array([0.04856432, 0.39915214, 0.24070289, ..., 0.1930572 , 0.276112  ,
        0.10926995], shape=(7506,)),
 'rec_error_sum': np.float64(1233.2635753551988),
 'rec_error_mse': np.float64(0.04107592510508922),
 'vertex-vertexwise_rec_error': array([0.68841935, 0.6115182 , 0.29745092, ..., 0.52078645, 0.61485071,
        0.57843915], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(1233.263575355199),
 'vertex-rec_error_mse': np.float64(0.12322777531526768),
 'X-shape': (4, 7506)}

## Stats (PLS)

In [17]:
objs2y = AdapterPipeline(
    steps=[
        BiMeshesToVertices(index=0),
        FunctionTransformer(func=np.stack),
        BiFlattenButFirst(),
    ],
)

inner_model = SupervisedEmbeddingRegressor(
    EvaluatedModel(
        PLSRegression(n_components=2),
        MultiEvaluator(
            [
                ReconstructionError(),
                VertexReconstructionError(prefix="vertex"),
                ShapeCollector(),
            ]
        ),
    ),
    EvaluatedModel(
        LinearRegression(),
        MultiEvaluator(
            [
                OlsPValues(),
                R2Score(),
                ShapeCollector(),
            ],
        ),
    ),
)

model = EvaluatedModel(
    ObjectRegressor(inner_model, objs2y),
    MultiEvaluator(
        [MeshEuclideanR2Score(), MeshR2Score()],
        extender=ResultsExtender(),
    ),
)

model

0,1,2
,model,ObjectRegress...ButFirst())]))
,evaluator,<polpo.model_...x71af04500410>

0,1,2
,steps,"[('step_0', ...), ('step_1', ...)]"

0,1,2
,step,<built-in function asarray>

0,1,2
,step,<function atl...x71b068395ff0>

0,1,2
,regressor,SupervisedEmb...Regression()))
,transformer,AdapterPipeli...nButFirst())])
,func,
,inverse_func,
,check_inverse,False

0,1,2
,n_components,2
,scale,True
,max_iter,500
,tol,1e-06
,copy,True

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,index,0

0,1,2
,func,<function sta...x71b0683a4bf0>
,inverse_func,
,validate,False
,accept_sparse,False
,check_inverse,True
,feature_names_out,
,kw_args,
,inv_kw_args,


In [18]:
dataset_pipe = DictsToXY()

dataset_pipe = DictsToXY()

X, y = dataset_pipe((x_dict, meshes))

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    train_size=0.8,
    shuffle=False,
)


model.fit(X_train, y_train);

In [19]:
model.predict_eval(X_test, y_test);

In [20]:
eval_res_train = collect_eval_results(model, unnest=True, outer_key="obj_regr")

print(list(eval_res_train.keys()))

['obj_regr', 'obj_regr/model/regr/regr', 'obj_regr/model/regr/encoder']


In [21]:
eval_res_test = collect_eval_results(
    model, unnest=True, outer_key="obj_regr", train=False
)

print(list(eval_res_test.keys()))

['obj_regr', 'obj_regr/model/regr/regr', 'obj_regr/model/regr/encoder']


Eval results of full pipeline.

In [22]:
eval_res_train["obj_regr"]

{'vertexwise_r2': array([-0.07712323, -0.08579493, -0.31994874, ..., -0.12173731,
        -0.12478516, -0.07599897], shape=(2502,)),
 'featurewise_r2': array([0.07041269, 0.08086461, 0.02014693, ..., 0.04903384, 0.08468547,
        0.33846019], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-0.1439592749948356),
 'vertexwise_r2-max': np.float64(0.42065345176808044),
 'vertexwise_r2-min': np.float64(-0.4693111974550044),
 'featurewise_r2-mean': np.float64(0.12520694278642142),
 'featurewise_r2-max': np.float64(0.7519565312498093),
 'featurewise_r2-min': np.float64(-0.029595787030437082)}

In [23]:
eval_res_test["obj_regr"]

{'vertexwise_r2': array([-4.91033046, -2.44306112, -3.66442101, ..., -0.64462169,
        -0.81065766, -0.46646868], shape=(2502,)),
 'featurewise_r2': array([-0.37174233,  0.07180676, -5.98327261, ..., -0.02056241,
         0.04966358, -0.51750956], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-2.1322589496395374),
 'vertexwise_r2-max': np.float64(-0.10872163777181565),
 'vertexwise_r2-min': np.float64(-19.60840664788509),
 'featurewise_r2-mean': np.float64(-2.439392128217504),
 'featurewise_r2-max': np.float64(0.586847796942294),
 'featurewise_r2-min': np.float64(-320.6458062609668)}

Eval results of inner regression.

In [24]:
eval_res_train["obj_regr/model/regr/regr"]

{'mse': array([137.5427467 , 588.55193263]),
 'res_var': array([158.70316927, 679.0983838 ]),
 'std_err': array([[0.34424265],
        [0.71209547]]),
 't': array([[9.2912069 ],
        [1.03353929]]),
 'pvals': array([[4.19298647e-07],
        [3.20196510e-01]]),
 'adj-pvals': array([[8.38597293e-07],
        [6.40393020e-01]]),
 'r2': array([0.86911855, 0.07593034]),
 'X-shape': (15, 1),
 'y-shape': (15, 2),
 'y_pred-shape': (15, 2)}

In [25]:
eval_res_test["obj_regr/model/regr/regr"]

{'mse': array([2998.50444003,  642.67548474]),
 'res_var': array([5997.00888006, 1285.35096949]),
 'std_err': array([[14.97288375],
        [ 6.93184705]]),
 't': array([[0.21361481],
        [0.10617353]]),
 'pvals': array([[0.85064572],
        [0.92513467]]),
 'adj-pvals': array([[1.],
        [1.]]),
 'r2': array([-12.59531868,  -1.035036  ]),
 'X-shape': (4, 1),
 'y-shape': (4, 2),
 'y_pred-shape': (4, 2)}

Eval results of PLS reconstruction.

In [26]:
eval_res_train["obj_regr/model/regr/encoder"]

{'featurewise_rec_error': array([0.33692486, 2.95584937, 0.52982969, ..., 0.86962626, 0.50298584,
        0.3990546 ], shape=(7506,)),
 'rec_error_sum': np.float64(5290.451377417203),
 'rec_error_mse': np.float64(0.04698864355108982),
 'vertex-vertexwise_rec_error': array([3.82260393, 3.75296039, 5.41181497, ..., 1.91428711, 1.80805478,
        1.77166669], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(5290.451377417203),
 'vertex-rec_error_mse': np.float64(0.14096593065326946),
 'X-shape': (15, 7506),
 'y-shape': (15, 1),
 'y_pred-shape': (15, 1)}

In [27]:
eval_res_test["obj_regr/model/regr/encoder"]

{'featurewise_rec_error': array([0.0394429 , 0.3541551 , 0.30241622, ..., 0.22544856, 0.3757194 ,
        0.07282806], shape=(7506,)),
 'rec_error_sum': np.float64(1430.5086823678805),
 'rec_error_mse': np.float64(0.0476455063405236),
 'vertex-vertexwise_rec_error': array([0.69601422, 0.57176883, 0.3810604 , ..., 0.60287419, 0.64291558,
        0.67399602], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(1430.5086823678805),
 'vertex-rec_error_mse': np.float64(0.14293651902157078),
 'X-shape': (4, 7506),
 'y_pred-shape': (4, 1)}