In [1]:
import numpy as np
import polpo.preprocessing.dict as ppdict
import polpo.preprocessing.pd as ppd
from polpo.model_eval import (
    MeshEuclideanR2Score,
    MeshR2Score,
    MultiEvaluator,
    OlsPValues,
    PcaEvaluator,
    R2Score,
    ReconstructionError,
    ResultsExtender,
    VertexReconstructionError,
    collect_obj_regr_eval_results,
)
from polpo.models import ObjectRegressor
from polpo.preprocessing import (
    PartiallyInitializedStep,
)
from polpo.preprocessing.learning import DictsToXY
from polpo.preprocessing.load.pregnancy import (
    DenseMaternalCsvDataLoader,
    DenseMaternalMeshLoader,
)
from polpo.preprocessing.mesh.conversion import PvFromData
from polpo.preprocessing.mesh.io import FreeSurferReader
from polpo.preprocessing.mesh.registration import PvAlign
from polpo.sklearn.adapter import AdapterPipeline, EvaluatedModel
from polpo.sklearn.mesh import BiMeshesToVertices
from polpo.sklearn.np import BiFlattenButFirst
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import FunctionTransformer, StandardScaler



## Load data

In [2]:
subject_id = "01"

# TODO: per structure
struct = "Hipp"
left = True

Weeks.

In [3]:
pilot = subject_id == "01"

pipe = DenseMaternalCsvDataLoader(pilot=pilot, subject_id=subject_id)

df = pipe()

INFO: Data has already been downloaded... using cached file ('/home/luisfpereira/.herbrain/data/maternal/raw/28Baby_Hormones.csv').


In [4]:
session_selector = ppd.DfIsInFilter("stage", ["post"], negate=True)


predictor_selector = (
    session_selector + ppd.ColumnsSelector("gestWeek") + ppd.SeriesToDict()
)

x_dict = predictor_selector(df)

Meshes.

In [5]:
file_finder = DenseMaternalMeshLoader(
    subject_id=subject_id, as_dict=True, left=left, struct=struct, derivative="enigma"
)

mesh_reader = ppdict.DictMap(FreeSurferReader() + PvFromData())

prep_pipe = PartiallyInitializedStep(
    Step=lambda **kwargs: ppdict.DictMap(PvAlign(**kwargs)),
    _target=lambda meshes: meshes[list(meshes.keys())[0]],
    max_iterations=500,
)

pipe = file_finder + mesh_reader + prep_pipe

meshes = pipe()

## Stats

In [6]:
pca = EvaluatedModel(
    PCA(n_components=4),
    MultiEvaluator(
        [
            PcaEvaluator(),
            ReconstructionError(),
            VertexReconstructionError(prefix="vertex"),
        ]
    ),
)

objs2y = AdapterPipeline(
    steps=[
        BiMeshesToVertices(index=0),
        FunctionTransformer(func=np.stack),
        BiFlattenButFirst(),
        StandardScaler(with_std=False),
        ("y-pca", pca),
    ],
)

linear_model = EvaluatedModel(
    LinearRegression(), MultiEvaluator([OlsPValues(), R2Score()])
)

obj_model = EvaluatedModel(
    ObjectRegressor(linear_model, objs2y),
    MultiEvaluator(
        [MeshEuclideanR2Score(), MeshR2Score()],
        extender=ResultsExtender(),
    ),
)

In [7]:
dataset_pipe = DictsToXY()

X, meshes_ = dataset_pipe((x_dict, meshes))

X.shape, len(meshes_)


obj_model.fit(X, meshes_)

In [8]:
eval_results = collect_obj_regr_eval_results(obj_model)

eval_results.keys()

dict_keys(['obj_regr', 'regr', 'y-pca'])

In [9]:
eval_results["obj_regr"]

{'vertexwise_r2': array([-0.03227287, -0.05362399, -0.27610936, ..., -0.09031142,
        -0.10054417, -0.05165244], shape=(2502,)),
 'featurewise_r2': array([0.0565667 , 0.15127948, 0.03647594, ..., 0.08765756, 0.15921566,
        0.30785551], shape=(7506,)),
 'vertexwise_r2-mean': np.float64(-0.1784779738971237),
 'vertexwise_r2-max': np.float64(0.3303222939485332),
 'vertexwise_r2-min': np.float64(-0.4521633941713281),
 'featurewise_r2-mean': np.float64(0.09340302887787998),
 'featurewise_r2-max': np.float64(0.7200362344640026),
 'featurewise_r2-min': np.float64(-0.0729484571827892)}

In [10]:
eval_results["regr"]

{'mse': array([150.60038314,  45.37191193,  41.45184563,  32.33291826]),
 'res_var': array([168.31807528,  50.70978392,  46.32853335,  36.13679099]),
 'std_err': array([[0.24474753],
        [0.13433798],
        [0.1284036 ],
        [0.11340383]]),
 't': array([[0.27230227],
        [3.33582311],
        [2.02673772],
        [1.48127591]]),
 'pvals': array([[0.78866933],
        [0.00391514],
        [0.05867096],
        [0.1568303 ]]),
 'adj-pvals': array([[1.        ],
        [0.01566057],
        [0.23468383],
        [0.62732118]]),
 'r2': array([0.00434274, 0.39561392, 0.1946054 , 0.11431478])}

In [11]:
eval_results["y-pca"]

{'expl_var': array([159.66043428,  79.24168847,  54.32706653,  38.53422278]),
 'expl_var_ratio': array([0.30365417, 0.15070778, 0.10332328, 0.07328727]),
 'expl_var_ratio-cum': array([0.30365417, 0.45436194, 0.55768523, 0.6309725 ]),
 'featurewise_rec_error': array([0.35178003, 1.08137788, 0.34105258, ..., 0.65854283, 0.47796508,
        0.41840021], shape=(7506,)),
 'rec_error_sum': np.float64(3492.603613290653),
 'rec_error_mse': np.float64(0.024489907114944205),
 'vertex-vertexwise_rec_error': array([1.77421048, 1.74229868, 1.90977017, ..., 1.60099045, 1.58878095,
        1.55490812], shape=(2502,)),
 'vertex-rec_error_sum': np.float64(3492.603613290653),
 'vertex-rec_error_mse': np.float64(0.07346972134483262)}