# Choroid Plexus and CSF

### Set Up

#### Imports

In [1]:
from warnings import simplefilter

import pandas as pd

simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
simplefilter(action="ignore", category=DeprecationWarning)
simplefilter(action="ignore", category=FutureWarning)

import re
import textwrap
from pathlib import Path
from pyprocessmacro import Process

import numpy as np
import pyperclip
import statsmodels.api as sm
from matplotlib import colormaps
from scipy import stats
from IPython.display import Markdown, HTML, display
from io import StringIO
import matplotlib.pyplot as plt
from collections import defaultdict

from reload_recursive import reload_recursive
import sys

sys.path.insert(0, "/home/srs-9/Projects/ms_mri/analysis/thalamus/helpers")

import helpers
import utils
import regression_utils as regutils
import my_namespace


In [2]:
reload_recursive(regutils)
reload_recursive(helpers)
reload_recursive(utils)
reload_recursive(my_namespace)

import utils
from utils import load_df, zscore, get_colors
from my_namespace import presentation_cols
from my_namespace import *

import regression_utils as regutils
from regression_utils import (
    quick_regression,
    quick_regression2,
    residualize_vars,
    run_regressions,
)

#### Load Data

##### Clinical and Volumes

In [3]:
fig_path = Path(
    "/home/srs-9/Projects/ms_mri/analysis/thalamus/results/figures_tables/3-CP_CSF_analysis"
)

data = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/analysis/thalamus/results/data.csv", index_col="subid"
)
# dataT = pd.read_csv(
#     "/home/srs-9/Projects/ms_mri/analysis/thalamus/results/data_transformed.csv",
#     index_col="subid",
# )

MS_patients = data["dz_type2"] == "MS"
NONMS_patients = data["dz_type2"] == "!MS"
NIND_patients = data["dz_type5"] == "NIND"
OIND_patients = data["dz_type5"] == "OIND"
RMS_patients = data["dz_type5"] == "RMS"
PMS_patients = data["dz_type5"] == "PMS"

#### Functions

In [4]:
def thalamus_influence_analysis(model_data, influenced_var, covariates):
    residualized_vars = {}
    for var in ["medial", "posterior", "anterior", "ventral", "THALAMUS_1"] + [
        influenced_var
    ]:
        residualized_vars[var] = residualize_vars(
            model_data,
            dependent_var=var,
            independent_vars=covariates,
        )

    residualized_vars = pd.DataFrame(residualized_vars).dropna()

    nobs = residualized_vars[influenced_var].shape[0]

    structs = ["posterior", "medial", "anterior", "ventral"]
    working_structs = structs.copy()

    nuclei_comps = {}
    thalamus_comps = {}
    pearson_results = {}
    pearson_results["THALAMUS_1"] = stats.pearsonr(
        residualized_vars["THALAMUS_1"], residualized_vars[influenced_var]
    )
    for i, struct in enumerate(structs):
        pearson_results[struct] = stats.pearsonr(
            residualized_vars[struct], residualized_vars[influenced_var]
        )

        thalamus_comps[f"THALAMUS_1 vs {struct}"] = run_R_script(
            pearson_results["THALAMUS_1"][0],
            pearson_results[struct][0],
            stats.pearsonr(residualized_vars["THALAMUS_1"], residualized_vars[struct])[
                0
            ],
            nobs,
        )
        if thalamus_comps[f"THALAMUS_1 vs {struct}"] >= 0.05:
            sign = "="
        elif abs(pearson_results["THALAMUS_1"][0]) > abs(pearson_results[struct][0]):
            sign = ">"
        else:
            sign = "<"
        thalamus_comps = {re.sub(r"vs", sign, k): v for k, v in thalamus_comps.items()}

    for i, struct1 in enumerate(structs):
        working_structs = working_structs[1:]
        for struct2 in working_structs:
            nuclei_comps[f"{struct1} vs {struct2}"] = run_R_script(
                pearson_results[struct1][0],
                pearson_results[struct2][0],
                stats.pearsonr(residualized_vars[struct1], residualized_vars[struct2])[
                    0
                ],
                nobs,
            )
            if nuclei_comps[f"{struct1} vs {struct2}"] >= 0.05:
                sign = "="
            elif abs(pearson_results[struct1][0]) > abs(pearson_results[struct2][0]):
                sign = ">"
            else:
                sign = "<"
            nuclei_comps = {re.sub(r"vs", sign, k): v for k, v in nuclei_comps.items()}

    return pearson_results, thalamus_comps, nuclei_comps

## Analysis around CP

Peripheral CSF segmentation was obtained starting from the `FAST` CSF segmentation, then subtracting the lateral, third, and fourth ventricles and also label 24 from Freesurfer's `aseg.auto_noCCseg` segmentation (technically Fastsurfer's)

### Does CP drive central atrophy?

Previous analysis showed that CP is very strongly associated with LV; in fact, of all the variables I'm looking at, LV is the one with the strongest association with CP. Both CP and LV enlarge in MS. We know that LV enlargement can be indicative of a few different pathologic processes.

1. Overproduction of CSF
2. Atrophy
3. CSF obstruction

Also, atrophy associated with ventricular enlargement could be global or central. 

#### Relationship between CP and CSF compartment volumes

**Does CP predict the volume of the third ventricle, fourth ventricle, and peripheral CSF, and which ones is it most strongly associated with?**

- CP has the strongest relationship with the LV's, then aseg label 24\*, then the third ventricle, and finally the fourth ventricle. 

\**Freesurfer's aseg segmentation has a label 24 called "CSF" which is a misnomer because it is actually pretty small and located at some periaqueductal/interthalamic CSF pocket.*

In [34]:
model_data_z = zscore(data[MS_patients], skip_vars=["PRL"])

test_outcomes = ["LV_log", "asegCSF_log", "thirdV_log", "fourthV_log", "periCSF"]
predictors = ["CP", "t2lv_log", "THALAMUS_1", "brain"]
covariates = ["age", "Female", "tiv"]

_, results, _ = regutils.run_regressions(
    model_data_z, test_outcomes, predictors, covariates=covariates
)

predictor_to_show = "CP"
formula = regutils.formula_string("outcome", predictor_to_show, covariates)
results_present = results[predictor_to_show].rename(
    index={
        # "asegCSF_log": "aseg24_CSF_log",
        # "thirdV_log": "aseg14_thirdV",
        # "fourthV_log": "aseg15_fourthV",
    }
)

display(Markdown(f"**Effect of {predictor_to_show} on outcomes**"))
display(Markdown(f"`{formula}`"))
display(
    Markdown(regutils.present_model(results_present, presentation_cols).to_markdown())
)

**Effect of CP on outcomes**

`outcome ~ CP + age + Female + tiv`

| outcome     |   coef |    pval |   p_fdr |     se | ci              |   R2 |
|:------------|-------:|--------:|--------:|-------:|:----------------|-----:|
| LV_log      | 0.6457 | 1.9e-62 | 9.5e-62 | 0.033  | [0.581, 0.711]  | 0.57 |
| asegCSF_log | 0.4676 | 2.5e-23 | 6.2e-23 | 0.0445 | [0.38, 0.555]   | 0.27 |
| thirdV_log  | 0.4264 | 1.1e-21 | 1.8e-21 | 0.0424 | [0.343, 0.51]   | 0.35 |
| fourthV_log | 0.23   | 6.2e-06 | 7.8e-06 | 0.0503 | [0.131, 0.329]  | 0.14 |
| periCSF     | 0.1491 | 2.6e-05 | 2.6e-05 | 0.0351 | [0.0801, 0.218] | 0.58 |

CP also predicts these CSF volumes after accounting for both brain and thalamus volumes.

In [35]:
covariates = ["brain", "THALAMUS_1", "age", "Female", "tiv"]

_, results, _ = regutils.run_regressions(
    model_data_z, test_outcomes, predictors, covariates=covariates
)

predictor_to_show = "CP"
formula = regutils.formula_string("outcome", predictor_to_show, covariates)
results_present = results[predictor_to_show].rename(
    index={
        # "asegCSF_log": "aseg24_CSF_log",
        # "thirdV_log": "aseg14_thirdV",
        # "fourthV_log": "aseg15_fourthV",
    }
)

display(Markdown(f"**Effect of {predictor_to_show} on outcomes after accounting for brain and THALAMUS_1 as well**"))
display(Markdown(f"`{formula}`"))
display(
    Markdown(regutils.present_model(results_present, presentation_cols).to_markdown())
)

**Effect of CP on outcomes after accounting for brain and THALAMUS_1 as well**

`outcome ~ CP + brain + THALAMUS_1 + age + Female + tiv`

| outcome     |   coef |    pval |   p_fdr |     se | ci               |   R2 |
|:------------|-------:|--------:|--------:|-------:|:-----------------|-----:|
| LV_log      | 0.4816 | 2.9e-34 | 1.4e-33 | 0.0363 | [0.41, 0.553]    | 0.67 |
| asegCSF_log | 0.2857 | 2.9e-09 | 7.3e-09 | 0.0472 | [0.193, 0.378]   | 0.37 |
| thirdV_log  | 0.2205 | 1.7e-07 | 2.8e-07 | 0.0415 | [0.139, 0.302]   | 0.48 |
| fourthV_log | 0.1328 | 0.02    | 0.025   | 0.0568 | [0.0211, 0.244]  | 0.17 |
| periCSF     | 0.0437 | 0.23    | 0.23    | 0.0367 | [-0.0284, 0.116] | 0.62 |

#### Sanity check on peripheral CSF

The following is more of a sanity check. If all the CSF volumes are defined properly, particulary peripheral CSF, we'd expect thalamic volume to be related more to the central CSF compartments and for brain volume to be related more to the peripheral CSF volume. This is borne out. Peripheral CSF has a much stronger relationship to WBV and LV has a much stronger relationship to thalamus

In [36]:
model_data_z = zscore(data[MS_patients], skip_vars=["PRL"])

rename_index = {
    "brain": "**brain**",
    "THALAMUS_1": "**THALAMUS_1**"
}

test_outcomes = ["LV_log", "thirdV_log", "fourthV_log", "periCSF"]
all_results = defaultdict(dict)
all_models = defaultdict(dict)
all_formulas = defaultdict(dict)
for outcome in test_outcomes:
    formula_list = {
        "brain": f"{outcome} ~ brain + age + Female + tiv",
        "thalamus": f"{outcome} ~ THALAMUS_1 + age + Female + tiv",
        "brain+thalamus": f"{outcome} ~ brain + THALAMUS_1 + age + Female + tiv"
    }
    
    all_results[outcome], all_models[outcome], all_formulas[outcome] = regutils.run_regressions3(
        model_data_z,
        formula_list.values(),
        model_names=formula_list.keys()
    )
    
outcome = "periCSF"
display(Markdown(f"##### {outcome}"))
results = all_results[outcome]
formulas = all_formulas[outcome]

# model_name = "thalamus"
# model = results[model_name]
# formula = formulas[model_name]

# display(Markdown(f"#### {model_name}"))
# display(Markdown(f"`{formula}`"))
# display(Markdown(regutils.present_model(model, presentation_cols,
#                                         rename_index=rename_index).to_markdown()))
# display(Markdown("---"))

model_name = "brain+thalamus"
model = results[model_name]
formula = formulas[model_name]

display(Markdown(f"{model_name}"))
display(Markdown(f"`{formula}`"))
display(Markdown(regutils.present_model(model, presentation_cols,
                                        rename_index=rename_index).to_markdown() + "\n\n"))
# display(Markdown("---"))


outcome = "LV_log"
display(Markdown(f"##### {outcome}"))
results = all_results[outcome]
formulas = all_formulas[outcome]

# model_name = "brain"
# model = results[model_name]
# formula = formulas[model_name]

# display(Markdown(f"#### {model_name}"))
# display(Markdown(f"`{formula}`"))
# display(Markdown(regutils.present_model(model, presentation_cols,
#                                         rename_index=rename_index).to_markdown()))
# display(Markdown("---"))

model_name = "brain+thalamus"
model = results[model_name]
formula = formulas[model_name]

display(Markdown(f"{model_name}"))
display(Markdown(f"`{formula}`"))
display(Markdown(regutils.present_model(model, presentation_cols,
                                        rename_index=rename_index).to_markdown()))
display(Markdown("*Note: brain has a significant association with LV_log before controlling for THALAMUS_1*"))
# display(Markdown("---"))

##### periCSF

brain+thalamus

`periCSF ~ brain + THALAMUS_1 + age + Female + tiv`

|                |    coef |    pval |     se | ci                |
|:---------------|--------:|--------:|-------:|:------------------|
| Intercept      |  0.003  | 0.92    | 0.0288 | [-0.0535, 0.0595] |
| **brain**      | -0.2327 | 1.9e-10 | 0.0357 | [-0.303, -0.162]  |
| **THALAMUS_1** | -0.1109 | 0.013   | 0.0445 | [-0.198, -0.0234] |
| age            |  0.0609 | 0.075   | 0.0341 | [-0.00615, 0.128] |
| Female         |  0.0816 | 0.02    | 0.035  | [0.013, 0.15]     |
| tiv            |  0.8337 | 9.2e-75 | 0.0376 | [0.76, 0.908]     |



##### LV_log

brain+thalamus

`LV_log ~ brain + THALAMUS_1 + age + Female + tiv`

|                |    coef |    pval |     se | ci                |
|:---------------|--------:|--------:|-------:|:------------------|
| Intercept      |  0.0023 | 0.94    | 0.0321 | [-0.0608, 0.0655] |
| **brain**      | -0.0151 | 0.72    | 0.0426 | [-0.0989, 0.0687] |
| **THALAMUS_1** | -0.6317 | 6.8e-47 | 0.0391 | [-0.709, -0.555]  |
| age            |  0.2105 | 2.7e-09 | 0.0347 | [0.142, 0.279]    |
| Female         |  0.03   | 0.42    | 0.0371 | [-0.0428, 0.103]  |
| tiv            |  0.4906 | 4e-26   | 0.0436 | [0.405, 0.576]    |

*Note: brain has a significant association with LV_log before controlling for THALAMUS_1*

### CP drives preferential central expansion

Does the relationship between CP and LV volumes represent a global process whereby CP influences overall atrophy or CSF hydrodynamics, or is CP causing regional changes? With the degree of preferential central expansion defined as the central CSF ratio (CCR):

$CCR = LV / periCSF$

**CP is associated with increasing CCR, beyond what is predicted by brain or thalamus volumes.**

In [5]:
model_data_z = zscore(data[MS_patients], skip_vars=["PRL"])

test_outcomes = ["CCR_log"]
predictors = ["CP"]
covariates = ["THALAMUS_1", "age", "Female", "tiv"]

_, results, formulas = regutils.run_regressions(
    model_data_z, test_outcomes, predictors, covariates=covariates
)

predictor_to_show = "CP"
formula = regutils.formula_string("outcome", predictor_to_show, covariates)
# display(Markdown(f"`{formula}`"))
display(Markdown(f"```R\n{results[predictor_to_show].formula[0]}\n```"))
display(
    Markdown(
        regutils.present_model(
            results[predictor_to_show], presentation_cols
        ).to_markdown()
    )
)


test_outcomes = ["CCR_log"]
predictors = ["CP"]
covariates = ["brain", "age", "Female", "tiv"]

_, results, formulas = regutils.run_regressions(
    model_data_z, test_outcomes, predictors, covariates=covariates
)

predictor_to_show = "CP"
formula = regutils.formula_string("outcome", predictor_to_show, covariates)
display(Markdown(f"```R\n{results[predictor_to_show].formula[0]}\n```\n"))
display(
    Markdown(
        regutils.present_model(
            results[predictor_to_show], presentation_cols
        ).to_markdown()
    )
)


```R
CCR_log ~ CP + THALAMUS_1 + age + Female + tiv
```

| outcome   |   coef |    pval |   p_fdr |     se | ci             |   R2 |
|:----------|-------:|--------:|--------:|-------:|:---------------|-----:|
| CCR_log   | 0.4713 | 8.2e-31 | 8.2e-31 | 0.0379 | [0.397, 0.546] | 0.61 |

```R
CCR_log ~ CP + brain + age + Female + tiv
```


| outcome   |   coef |    pval |   p_fdr |     se | ci             |   R2 |
|:----------|-------:|--------:|--------:|-------:|:---------------|-----:|
| CCR_log   |  0.635 | 6.7e-48 | 6.7e-48 | 0.0387 | [0.559, 0.711] | 0.51 |

Alternative presentation for the above

In [11]:
model_data_z = zscore(data[MS_patients], skip_vars=["PRL"])

outcome = "CCR_log"

exog_list = {
    "CP alone": ["CP"],
    "CP, control for brain": ["CP", "brain"],
    "CP, control for CT": ["CP", "cortical_thickness"],
    "CP, control for thalamus": ["CP", "THALAMUS_1"],
    "Just brain": ["brain"],
    "Just thalamus": ["THALAMUS_1"],
    "t2lv and brain": ["t2lv_log", "brain"],
    "t2lv and thalamus": ["t2lv_log", "THALAMUS_1"],
}
covariates = ["age", "Female", "tiv"]

results, models, formulas = regutils.run_regressions2(
    model_data_z,
    outcome,
    exog_list.values(),
    model_names=exog_list.keys(),
    covariates=covariates,
)

for model_name, model, formula in zip(
    results.keys(), results.values(), formulas.values()
):
    display(Markdown(f"#### {model_name}"))
    # display(Markdown(f"`{formula}`"))
    display(Markdown(f"```R\n{formula}\n```\n"))
    display(Markdown(regutils.present_model(model, presentation_cols).to_markdown()))
    display(Markdown("---"))

# _, results = regutils.run_regressions(
#     model_data_z, test_outcomes, predictors, covariates=covariates
# )

# predictor_to_show = "CP"
# formula = regutils.formula_string("outcome", predictor_to_show, covariates)
# display(Markdown(f"**Effect of {predictor_to_show} on outcome**"))
# display(Markdown(f"`{formula}`"))

# results[predictor_to_show]

#### CP alone

```R
CCR_log ~ CP + age + Female + tiv
```


|           |   coef |    pval |     se | ci                |
|:----------|-------:|--------:|-------:|:------------------|
| Intercept | 0.0013 | 0.97    | 0.0325 | [-0.0625, 0.0651] |
| CP        | 0.6401 | 2.8e-57 | 0.0347 | [0.572, 0.708]    |
| age       | 0.1737 | 5e-06   | 0.0376 | [0.0998, 0.248]   |
| Female    | 0.0526 | 0.15    | 0.0361 | [-0.0183, 0.123]  |
| tiv       | 0.0533 | 0.2     | 0.0414 | [-0.028, 0.135]   |

---

#### CP, control for brain

```R
CCR_log ~ CP + brain + age + Female + tiv
```


|           |    coef |    pval |     se | ci                |
|:----------|--------:|--------:|-------:|:------------------|
| Intercept |  0.0013 | 0.97    | 0.0326 | [-0.0626, 0.0653] |
| CP        |  0.635  | 6.7e-48 | 0.0387 | [0.559, 0.711]    |
| brain     | -0.0174 | 0.67    | 0.0407 | [-0.0975, 0.0626] |
| age       |  0.1681 | 1.1e-05 | 0.0379 | [0.0937, 0.242]   |
| Female    |  0.0547 | 0.12    | 0.0353 | [-0.0147, 0.124]  |
| tiv       |  0.0568 | 0.17    | 0.0413 | [-0.0243, 0.138]  |

---

#### CP, control for CT

```R
CCR_log ~ CP + cortical_thickness + age + Female + tiv
```


|                    |    coef |    pval |     se | ci                |
|:-------------------|--------:|--------:|-------:|:------------------|
| Intercept          |  0.0015 | 0.96    | 0.0309 | [-0.0592, 0.0621] |
| CP                 |  0.5526 | 1.5e-39 | 0.0381 | [0.478, 0.628]    |
| cortical_thickness | -0.2693 | 1.9e-11 | 0.0391 | [-0.346, -0.192]  |
| age                |  0.0858 | 0.012   | 0.0341 | [0.0188, 0.153]   |
| Female             |  0.0746 | 0.029   | 0.034  | [0.00774, 0.142]  |
| tiv                |  0.106  | 0.0078  | 0.0397 | [0.0281, 0.184]   |

---

#### CP, control for thalamus

```R
CCR_log ~ CP + THALAMUS_1 + age + Female + tiv
```


|            |    coef |    pval |     se | ci               |
|:-----------|--------:|--------:|-------:|:-----------------|
| Intercept  |  0.0016 | 0.96    | 0.0293 | [-0.056, 0.0591] |
| CP         |  0.4713 | 8.2e-31 | 0.0379 | [0.397, 0.546]   |
| THALAMUS_1 | -0.4034 | 1.1e-19 | 0.0424 | [-0.487, -0.32]  |
| age        |  0.1017 | 0.002   | 0.0327 | [0.0374, 0.166]  |
| Female     |  0.0828 | 0.015   | 0.0341 | [0.0158, 0.15]   |
| tiv        |  0.2516 | 3.4e-09 | 0.0417 | [0.17, 0.334]    |

---

#### Just brain

```R
CCR_log ~ brain + age + Female + tiv
```


|           |    coef |    pval |     se | ci                |
|:----------|--------:|--------:|-------:|:------------------|
| Intercept |  0.0022 | 0.96    | 0.0411 | [-0.0785, 0.0829] |
| brain     | -0.2133 | 1.4e-05 | 0.0486 | [-0.309, -0.118]  |
| age       |  0.3171 | 3.7e-11 | 0.0468 | [0.225, 0.409]    |
| Female    | -0.0439 | 0.3     | 0.0426 | [-0.128, 0.0399]  |
| tiv       |  0.1199 | 0.017   | 0.05   | [0.0216, 0.218]   |

---

#### Just thalamus

```R
CCR_log ~ THALAMUS_1 + age + Female + tiv
```


|            |    coef |    pval |     se | ci                |
|:-----------|--------:|--------:|-------:|:------------------|
| Intercept  |  0.002  | 0.95    | 0.034  | [-0.0649, 0.0689] |
| THALAMUS_1 | -0.6256 | 1.9e-50 | 0.0369 | [-0.698, -0.553]  |
| age        |  0.2008 | 2.2e-08 | 0.0353 | [0.132, 0.27]     |
| Female     |  0.0193 | 0.62    | 0.0387 | [-0.0568, 0.0953] |
| tiv        |  0.3765 | 1.2e-15 | 0.0454 | [0.287, 0.466]    |

---

#### t2lv and brain

```R
CCR_log ~ t2lv_log + brain + age + Female + tiv
```


|           |    coef |    pval |     se | ci                |
|:----------|--------:|--------:|-------:|:------------------|
| Intercept |  0.0026 | 0.94    | 0.0376 | [-0.0712, 0.0765] |
| t2lv_log  |  0.3846 | 1.7e-11 | 0.0557 | [0.275, 0.494]    |
| brain     | -0.1566 | 0.001   | 0.0474 | [-0.25, -0.0634]  |
| age       |  0.2228 | 3.1e-08 | 0.0396 | [0.145, 0.301]    |
| Female    | -0.0519 | 0.23    | 0.043  | [-0.136, 0.0326]  |
| tiv       |  0.0853 | 0.077   | 0.0482 | [-0.00933, 0.18]  |

---

#### t2lv and thalamus

```R
CCR_log ~ t2lv_log + THALAMUS_1 + age + Female + tiv
```


|            |    coef |    pval |     se | ci                |
|:-----------|--------:|--------:|-------:|:------------------|
| Intercept  |  0.0022 | 0.95    | 0.0337 | [-0.064, 0.0683]  |
| t2lv_log   |  0.1563 | 0.0079  | 0.0586 | [0.0412, 0.271]   |
| THALAMUS_1 | -0.5323 | 3.2e-24 | 0.0495 | [-0.63, -0.435]   |
| age        |  0.1836 | 1.4e-07 | 0.0343 | [0.116, 0.251]    |
| Female     |  0.005  | 0.9     | 0.0389 | [-0.0715, 0.0814] |
| tiv        |  0.3226 | 5.1e-12 | 0.0455 | [0.233, 0.412]    |

---

##### CP fully mediates the relationship between brain and CCR.

CP enlargement appeared to account for the relationship between whole brain atrophy and central CSF predominance. While brain volume alone predicted CCF (β*=-0.204, p<0.001), this association became non-significant when CP was included in the model (β*=-0.020, p=0.63), suggesting CP-associated processes mediate the central component of brain atrophy. This contrasts with T2 lesion volume, which showed independent effects alongside brain volume (both p<0.001), consistent with T2 lesions representing more diffuse pathology.

In [11]:
p2 = Process(
    data=model_data_z,
    model=4,
    x="brain",
    y="CCR2_log",
    m=["CP"],
    controls=["age", "Female", "tiv"],
    controls_in="all",
    suppr_init=True,
)

outcome, mediation = utils.read_pyprocess_output(p2)

print("="*80)
print(mediation)
print("="*80)



Direct effect of brain on CCR2_log:

  Effect     SE       t      p    LLCI   ULCI
 -0.0220 0.0391 -0.5620 0.5744 -0.0986 0.0547

Indirect effect of brain on CCR2_log:

      Effect  Boot SE  BootLLCI  BootULCI
  CP -0.1940   0.0323   -0.2589   -0.1338




In [15]:
p2 = Process(
    data=model_data_z,
    model=4,
    x="t2lv_log",
    y="CCR2_log",
    m=["brain"],
    controls=["age", "Female", "tiv"],
    controls_in="all",
    suppr_init=True,
)

outcome, mediation = utils.read_pyprocess_output(p2)

print("="*80)
print(mediation)
print("="*80)



Direct effect of t2lv_log on CCR2_log:

  Effect     SE      t      p   LLCI   ULCI
  0.3856 0.0395 9.7550 0.0000 0.3081 0.4631

Indirect effect of t2lv_log on CCR2_log:

         Effect  Boot SE  BootLLCI  BootULCI
  brain  0.0197   0.0092    0.0059    0.0434




## Analysis around MS Status

### CCR increases in MS

Is CCR specific for MS pathology? Logistic regression of MS status on CCR, controlling for age, sex, and tiv. CCR is increased in MS patients

In [6]:
model_data = data.copy()[(MS_patients) | (NONMS_patients)]
model_data_z = zscore(model_data)
model_data_z = model_data_z.join([pd.get_dummies(model_data['dz_type2'], dtype=int)])

outcome = "MS" # LV / allCSF
covariates = ["age", "Female", "tiv"]

predictors = {
    "CP": ["CP"],
    "LV_log": ["LV_log"],
    "CCR_log": ["CCR_log"],
    "periCSF": ["periCSF"],
    "periCSF_ratio_log": ["periCSF_ratio_log"]
}
    

results, models, formulas = regutils.run_regressions2(
    model_data_z,
    outcome,
    predictors.values(),
    model_names=predictors.keys(),
    covariates=covariates,
    regression_model=sm.Logit
)

for model_name, model, formula in zip(
    results.keys(), results.values(), formulas.values()
):
    display(Markdown(f"#### {model_name}"))
    display(Markdown(f"`{formula}`"))
    display(Markdown(regutils.present_model(model, presentation_cols).to_markdown()))
    

# model_name = predictors[0]
# model = results[model_name]
# formula = formulas[model_name]

# display(Markdown(f"#### {model_name}"))
# display(Markdown(f"`{formula}`"))
# display(Markdown(regutils.present_model(model, presentation_cols).to_markdown()))


#### CP

`MS ~ CP + age + Female + tiv`

|           |    coef |    pval |     se | ci                |
|:----------|--------:|--------:|-------:|:------------------|
| Intercept |  1.6885 | 3.4e-45 | 0.1197 | [1.45, 1.92]      |
| CP        |  0.257  | 0.05    | 0.1311 | [0.000107, 0.514] |
| age       | -0.2702 | 0.038   | 0.1301 | [-0.525, -0.0152] |
| Female    | -0.2365 | 0.11    | 0.1473 | [-0.525, 0.0521]  |
| tiv       | -0.3618 | 0.0093  | 0.1391 | [-0.634, -0.0891] |

#### LV_log

`MS ~ LV_log + age + Female + tiv`

|           |    coef |    pval |     se | ci                 |
|:----------|--------:|--------:|-------:|:-------------------|
| Intercept |  1.6923 | 1.6e-44 | 0.1209 | [1.46, 1.93]       |
| LV_log    |  0.3332 | 0.015   | 0.137  | [0.0647, 0.602]    |
| age       | -0.327  | 0.016   | 0.1351 | [-0.592, -0.0621]  |
| Female    | -0.2866 | 0.048   | 0.1452 | [-0.571, -0.00205] |
| tiv       | -0.4573 | 0.0016  | 0.1448 | [-0.741, -0.173]   |

#### CCR_log

`MS ~ CCR_log + age + Female + tiv`

|           |    coef |    pval |     se | ci                  |
|:----------|--------:|--------:|-------:|:--------------------|
| Intercept |  1.6914 | 1.5e-44 | 0.1208 | [1.45, 1.93]        |
| CCR_log   |  0.3163 | 0.017   | 0.1326 | [0.0565, 0.576]     |
| age       | -0.315  | 0.018   | 0.1334 | [-0.576, -0.0535]   |
| Female    | -0.2852 | 0.049   | 0.145  | [-0.569, -0.000902] |
| tiv       | -0.419  | 0.003   | 0.1413 | [-0.696, -0.142]    |

#### periCSF

`MS ~ periCSF + age + Female + tiv`

|           |    coef |    pval |     se | ci                |
|:----------|--------:|--------:|-------:|:------------------|
| Intercept |  1.6633 | 3.2e-45 | 0.1179 | [1.43, 1.89]      |
| periCSF   |  0.059  | 0.74    | 0.177  | [-0.288, 0.406]   |
| age       | -0.1904 | 0.12    | 0.1229 | [-0.431, 0.0505]  |
| Female    | -0.2987 | 0.038   | 0.1438 | [-0.58, -0.0169]  |
| tiv       | -0.4055 | 0.032   | 0.1891 | [-0.776, -0.0347] |

#### periCSF_ratio_log

`MS ~ periCSF_ratio_log + age + Female + tiv`

|                   |    coef |    pval |     se | ci                  |
|:------------------|--------:|--------:|-------:|:--------------------|
| Intercept         |  1.6914 | 1.5e-44 | 0.1208 | [1.45, 1.93]        |
| periCSF_ratio_log | -0.3163 | 0.017   | 0.1326 | [-0.576, -0.0565]   |
| age               | -0.315  | 0.018   | 0.1334 | [-0.576, -0.0535]   |
| Female            | -0.2852 | 0.049   | 0.145  | [-0.569, -0.000902] |
| tiv               | -0.419  | 0.003   | 0.1413 | [-0.696, -0.142]    |

---