In [1]:
import numpy as np
import polpo.preprocessing.dict as ppdict
import polpo.preprocessing.pd as ppd
from polpo.model_eval import (
    MeshEuclideanR2Score,
    MeshR2Score,
    MultiEvaluator,
    OlsPValues,
    PcaEvaluator,
    R2Score,
    ReconstructionError,
    ResultsExtender,
    VertexReconstructionError,
    collect_obj_regr_eval_results,
)
from polpo.models import ObjectRegressor, SupervisedEmbeddingRegressor
from polpo.preprocessing import (
    PartiallyInitializedStep,
)
from polpo.preprocessing.learning import DictsToXY
from polpo.preprocessing.load.pregnancy import (
    DenseMaternalCsvDataLoader,
    DenseMaternalMeshLoader,
)
from polpo.preprocessing.mesh.conversion import PvFromData
from polpo.preprocessing.mesh.io import FreeSurferReader
from polpo.preprocessing.mesh.registration import PvAlign
from polpo.sklearn.adapter import AdapterPipeline, EvaluatedModel
from polpo.sklearn.mesh import BiMeshesToVertices
from polpo.sklearn.np import BiFlattenButFirst
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import FunctionTransformer, StandardScaler



## Load data

In [2]:
subject_id = "01"

# TODO: per structure
struct = "Hipp"
left = True

Weeks.

In [3]:
pilot = subject_id == "01"

pipe = DenseMaternalCsvDataLoader(pilot=pilot, subject_id=subject_id)

df = pipe()

INFO: Data has already been downloaded... using cached file ('/home/luisfpereira/.herbrain/data/maternal/raw/28Baby_Hormones.csv').


In [4]:
session_selector = ppd.DfIsInFilter("stage", ["post"], negate=True)


predictor_selector = (
    session_selector + ppd.ColumnsSelector("gestWeek") + ppd.SeriesToDict()
)

x_dict = predictor_selector(df)

Meshes.

In [5]:
file_finder = DenseMaternalMeshLoader(
    subject_id=subject_id, as_dict=True, left=left, struct=struct, derivative="enigma"
)

mesh_reader = ppdict.DictMap(FreeSurferReader() + PvFromData())

prep_pipe = PartiallyInitializedStep(
    Step=lambda **kwargs: ppdict.DictMap(PvAlign(**kwargs)),
    _target=lambda meshes: meshes[list(meshes.keys())[0]],
    max_iterations=500,
)

pipe = file_finder + mesh_reader + prep_pipe

meshes = pipe()

## Stats (PCA)

In [6]:
pca = EvaluatedModel(
    PCA(n_components=4),
    MultiEvaluator(
        [
            PcaEvaluator(),
            ReconstructionError(),
            VertexReconstructionError(prefix="vertex"),
        ]
    ),
)

objs2y = AdapterPipeline(
    steps=[
        BiMeshesToVertices(index=0),
        FunctionTransformer(func=np.stack),
        BiFlattenButFirst(),
        StandardScaler(with_std=False),
        ("y-pca", pca),
    ],
)

linear_model = EvaluatedModel(
    LinearRegression(), MultiEvaluator([OlsPValues(), R2Score()])
)

obj_model = EvaluatedModel(
    ObjectRegressor(linear_model, objs2y),
    MultiEvaluator(
        [MeshEuclideanR2Score(), MeshR2Score()],
        extender=ResultsExtender(),
    ),
)

In [7]:
dataset_pipe = DictsToXY()

X, meshes_ = dataset_pipe((x_dict, meshes))

X.shape, len(meshes_)


obj_model.fit(X, meshes_)

In [8]:
eval_results = collect_obj_regr_eval_results(obj_model)

eval_results.keys()

dict_keys(['obj_regr', 'regr', 'y-pca'])

In [9]:
eval_results["obj_regr"]

{'vertexwise_r2': array([-0.03240006, -0.0537206 , -0.27616035, ..., -0.09033173,
        -0.10056075, -0.0516526 ], shape=(2502,)),
 'featurewise_r2': array([0.05652044, 0.15122018, 0.0360839 , ..., 0.08766353, 0.15921599,
        0.30784527], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-0.17857131642747998),
 'vertexwise_r2-max': np.float64(0.3302593966975834),
 'vertexwise_r2-min': np.float64(-0.4521754152624069),
 'featurewise_r2-mean': np.float64(0.09331713362819538),
 'featurewise_r2-max': np.float64(0.7195687620127902),
 'featurewise_r2-min': np.float64(-0.07390080400106536)}

In [10]:
eval_results["regr"]

{'mse': array([150.60038841,  45.37330149,  41.45054695,  32.36675619]),
 'res_var': array([168.31808117,  50.71133695,  46.32708189,  36.17460986]),
 'std_err': array([[0.24474754],
        [0.13434004],
        [0.12840159],
        [0.11346316]]),
 't': array([[0.27230117],
        [3.33569406],
        [2.02690121],
        [1.47448236]]),
 'pvals': array([[0.78867016],
        [0.00391623],
        [0.05865263],
        [0.15863006]]),
 'adj-pvals': array([[1.        ],
        [0.01566494],
        [0.23461052],
        [0.63452026]]),
 'r2': array([0.0043427 , 0.39559542, 0.19463069, 0.11338725])}

In [11]:
eval_results["y-pca"]

{'expl_var': array([159.66043428,  79.2416896 ,  54.3270702 ,  38.53419571]),
 'expl_var_ratio': array([0.30365417, 0.15070778, 0.10332329, 0.07328722]),
 'expl_var_ratio-cum': array([0.30365417, 0.45436195, 0.55768524, 0.63097245]),
 'featurewise_rec_error': array([0.35175787, 1.08139102, 0.34153269, ..., 0.65838701, 0.47756534,
        0.41834242], shape=(7506,)),
 'rec_error_sum': np.float64(3492.604014255106),
 'rec_error_mse': np.float64(0.02448990992648061),
 'vertex-vertexwise_rec_error': array([1.77468158, 1.74279175, 1.91025922, ..., 1.60045836, 1.58819548,
        1.55429477], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(3492.604014255106),
 'vertex-rec_error_mse': np.float64(0.07346972977944184)}

## Stats (PLS)

In [12]:
objs2y = AdapterPipeline(
    steps=[
        BiMeshesToVertices(index=0),
        FunctionTransformer(func=np.stack),
        BiFlattenButFirst(),
    ],
)

model = SupervisedEmbeddingRegressor(
    EvaluatedModel(
        PLSRegression(n_components=2),
        MultiEvaluator(
            [
                ReconstructionError(),
                VertexReconstructionError(prefix="vertex"),
            ]
        ),
    ),
    EvaluatedModel(
        LinearRegression(),
        MultiEvaluator([OlsPValues(), R2Score()]),
    ),
)

obj_model = EvaluatedModel(
    ObjectRegressor(model, objs2y),
    MultiEvaluator(
        [MeshEuclideanR2Score(), MeshR2Score()],
        extender=ResultsExtender(),
    ),
)

In [13]:
dataset_pipe = DictsToXY()

X, meshes_ = dataset_pipe((x_dict, meshes))

X.shape, len(meshes_)


obj_model.fit(X, meshes_)

In [14]:
eval_results = collect_obj_regr_eval_results(obj_model)

eval_results.keys()

dict_keys(['obj_regr', 'regr-regr', 'regr-encoder'])

In [15]:
eval_results["obj_regr"]

{'vertexwise_r2': array([-0.01845181, -0.03854586, -0.27739872, ..., -0.10147212,
        -0.11228732, -0.0597578 ], shape=(2502,)),
 'featurewise_r2': array([0.05092154, 0.16428687, 0.05251928, ..., 0.074925  , 0.15747433,
        0.30590612], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-0.18043143604330505),
 'vertexwise_r2-max': np.float64(0.3319811967352385),
 'vertexwise_r2-min': np.float64(-0.4589236191509354),
 'featurewise_r2-mean': np.float64(0.09320214015000124),
 'featurewise_r2-max': np.float64(0.7365367160291725),
 'featurewise_r2-min': np.float64(-0.04670595673033584)}

In [16]:
# NB: pvals do not make sense in this context, just for examplification
eval_results["regr-regr"]

{'mse': array([198.06906719, 487.14422257]),
 'res_var': array([221.37131039, 544.45530758]),
 'std_err': array([[0.28068138],
        [0.4401838 ]]),
 't': array([[7.83323888],
        [1.45359719]]),
 'pvals': array([[4.85655816e-07],
        [1.64269951e-01]]),
 'adj-pvals': array([[9.71311631e-07],
        [3.28539903e-01]]),
 'r2': array([0.78305156, 0.11055046])}

In [17]:
eval_results["regr-encoder"]

{'featurewise_rec_error': array([0.39485296, 3.14490972, 0.67029197, ..., 1.21611889, 0.95001583,
        0.47812717], shape=(7506,)),
 'rec_error_sum': np.float64(6819.855029070748),
 'rec_error_mse': np.float64(0.04782037548256656),
 'vertex-vertexwise_rec_error': array([4.21005465, 4.17553701, 5.68375541, ..., 2.58543428, 2.65112276,
        2.64426189], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(6819.855029070746),
 'vertex-rec_error_mse': np.float64(0.14346112644769965)}