## Setup

### Imports

In [5]:
import pprint
from warnings import simplefilter

import pandas as pd
from IPython.display import Markdown, display
from statsmodels.stats.multitest import multipletests

simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
import json
import re
import textwrap
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from statsmodels.stats.multitest import multipletests

import helpers
import matplotlib.pyplot as plt
import numpy as np
import pyperclip
import statsmodels.api as sm
from IPython.display import clear_output
from matplotlib import colormaps
from scipy import stats
from statsmodels.genmod.families import Poisson

from reload_recursive import reload_recursive
from statsmodels.stats.mediation import Mediation
from statsmodels.stats.outliers_influence import variance_inflation_factor
from tqdm.notebook import tqdm

from mri_data import file_manager as fm

In [6]:
reload_recursive(helpers)
import helpers
from helpers import load_df, zscore, get_colors

### Load Data

#### Clinical and Volumes

In [None]:
drive_root = fm.get_drive_root()
dataroot = drive_root / "3Tpioneer_bids"
data_dir = Path("/home/srs-9/Projects/ms_mri/data")
fig_path = Path(
    "/home/srs-9/Projects/ms_mri/analysis/thalamus/figures_tables/choroid_associations"
)

df = load_df()
df_thomas = helpers.load_hipsthomas(data_dir)

data = df.join(df_thomas)

#! See suggestions from assumption_checks.ipynb
transformations = {
    "LV": "log",
    "thirdV": "log",
    "fourthV": "log",
    "t2lv": "log",
    "PRL": "log1p"
}
data = helpers.transform_variables(data, transformations)

# these corrections should ultimately be made to the csv file
for struct in ["brain", "white", "grey", "thalamus", "t2lv"]:
    data[struct] = data[struct] * 1000

data["CCF"] = data["LV"] / data["allCSF"]
data["periCSF_ratio"] = data["periCSF"] / data["LV"]
data["periCSF_frac"] = data["periCSF"] / data["allCSF"]


viridis = colormaps["viridis"].resampled(20)
colors = helpers.get_colors()

MS_patients = data["dz_type2"] == "MS"
NONMS_patients = data["dz_type2"] == "!MS"
NIND_patients = data["dz_type5"] == "NIND"
OIND_patients = data["dz_type5"] == "OIND"
RMS_patients = data["dz_type5"] == "RMS"
PMS_patients = data["dz_type5"] == "PMS"


thalamic_nuclei = [2, 4, 5, 6, 7, 8, 9, 10, 11, 12]
deep_grey = [13, 14, 26, 27, 28, 29, 30, 31, 32]

thalamic_nuclei_str = [str(i) for i in thalamic_nuclei]

hips_thomas_ref = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/hipsthomas_struct_index.csv", index_col="index"
)["struct"]
hips_thomas_invref = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/hipsthomas_struct_index.csv", index_col="struct"
)["index"]

### Select variables and subjects to focus on

In [17]:
variables_ref = [
    "LV_log",
    "CP",
    "periCSF",
    "allCSF",
    "thirdV_log",
    "fourthV_log",
    "asegCSF",
    "CCF",
    "periCSF_ratio",
    "periCSF_frac",
    "thirdV_width",
    "THALAMUS_1",
    "medial",
    "posterior",
    "ventral",
    "anterior",
    "t2lv_log",
    "brain",
    "white",
    "grey",
    "PRL_log1p"
]

## Analysis

### Univariate associations with CP

In [21]:
model_data = data[MS_patients]
model_data_z = zscore(model_data)

model_data_z[variables_ref]

Unnamed: 0_level_0,LV_log,CP,periCSF,allCSF,thirdV_log,fourthV_log,asegCSF,CCF,periCSF_ratio,periCSF_frac,...,THALAMUS_1,medial,posterior,ventral,anterior,t2lv_log,brain,white,grey,PRL_log1p
subid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0.185517,0.236507,-0.429225,-0.383117,-0.065356,-1.030616,-0.508778,0.015102,-0.461396,-0.049576,...,0.069104,0.275775,0.513801,-0.393028,-0.389915,-0.082994,0.517406,0.043572,0.747016,-0.657851
1002,-0.886451,-0.333005,-0.276911,-0.450625,-0.292340,-2.156318,-0.054714,-0.795526,0.735281,0.530708,...,0.974651,0.744614,0.933027,0.943419,1.157345,-0.489286,3.338861,2.042856,3.563665,1.990865
1003,0.788179,0.247777,-0.723246,-0.433991,0.133426,-0.101677,-1.061527,0.764122,-0.890714,-0.651337,...,0.194886,0.783968,0.074988,-0.236669,-0.629691,0.546754,-1.017700,-0.010177,-1.523211,-0.657851
1004,-2.022649,-0.926567,-0.914150,-1.122714,-0.055114,0.784938,-0.202160,-1.217753,2.749657,0.904290,...,0.093044,0.523444,0.678038,-0.376627,0.245755,-0.681738,0.216279,-0.054093,0.363845,-0.657851
1005,1.122959,1.315372,-1.534894,-0.886542,1.196260,0.805517,0.582241,1.473364,-1.123658,-1.501799,...,-2.402779,-2.300008,-1.963063,-2.297572,-1.025610,1.296182,-0.904844,-0.569845,-0.954193,-0.657851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3016,0.135727,0.174293,-1.627756,-1.342396,0.883701,-0.604852,0.557096,0.188508,-0.600902,-0.398368,...,-1.658857,-1.442170,-1.306842,-1.585042,-1.565386,-0.473744,-0.677059,-0.104218,-0.943843,-0.657851
3017,-0.830717,-0.522734,1.227087,0.716433,-1.116507,-0.824999,-1.411175,-0.887790,1.044594,0.894721,...,0.368050,0.527100,0.322644,0.062903,0.149611,4.382667,0.898435,1.381294,0.365619,-0.657851
3021,1.547097,1.095716,1.968115,2.243509,1.554608,1.584601,2.012243,1.190391,-1.042127,-1.106358,...,-2.216912,-1.814796,-2.167069,-2.121386,-1.301504,0.867061,-1.139559,0.133788,-1.809182,-0.657851
3023,1.346444,0.523342,-0.917358,-0.277090,-0.245407,-0.621776,-1.248844,1.651129,-1.162361,-1.562944,...,-0.514763,-0.371143,-0.728815,-0.274973,-0.090004,2.681388,0.242144,-0.156786,0.476009,-0.657851


In [29]:
model_data = data[MS_patients]
model_data_z = zscore(model_data)

display(Markdown("**Age**:"))
display(stats.pearsonr(model_data['CP'], model_data['age']))

display(Markdown("**Female Sex**:"))
display(stats.pearsonr(model_data_z['CP'], model_data_z['Female']))

display(Markdown("**TIV**:"))
display(stats.pearsonr(model_data_z['CP'], model_data_z['tiv']))

**Age**:

PearsonRResult(statistic=np.float64(0.3954628212248364), pvalue=np.float64(5.707661072888709e-19))

**Female Sex**:

PearsonRResult(statistic=np.float64(-0.28448299814132133), pvalue=np.float64(3.6614132824199224e-10))

**TIV**:

PearsonRResult(statistic=np.float64(0.14760012589286067), pvalue=np.float64(0.0013643188892649314))

In [32]:
model_data = data[MS_patients]
model_data_z = zscore(model_data)

print(helpers.quick_regression("CP", "age", model_data, covariates=[]).params['age'])
print(helpers.quick_regression("CP", "age", model_data_z, covariates=[]).params['age'])

16.21536628528412
0.39546282122483634


### MRI and clinical measures versus CP 

In [19]:
model_data = df.join(df_thomas)[MS_patients]
model_data = zscore(model_data)

"""
def run_regressions(
    model_data: pd.DataFrame,
    outcome: str,
    predictors: list[str],
    covariates: list[str] = None,
    robust_cov: str = "HC3",
    fdr_method: str = "fdr_bh",
    fdr_alpha: float = 0.05,
):
"""

outcomes = [
    "LV_log",
    "thirdV_log",
    "medial", 
    "posterior", 
    "ventral",
    "anterior",
    "t2lv_log",
    "THALAMUS_1",
    "brain",
    "grey",
    "white",
    "cortical_thickness",
    "EDSS_sqrt",
    "SDMT",
    "PRL_log1p",
    "allCSF", 
    "periCSF",
    "CCF"
]
covariates = ["age", "Female", "tiv"]
predictor = "CP"

_, results = run_regressions_refactored(
    model_data,
    outcomes,
    [predictor],
    covariates
)
results = results[predictor]
display_order = results["coef"].apply(np.abs).sort_values(ascending=False).index
display(results.loc[display_order, :])

Unnamed: 0_level_0,coef,pval,p_fdr,se,llci,ulci,ci,R2,coef_sig
struct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
LV_log,0.614809,2.4994e-58,2.4994e-58,0.032947,0.550064,0.679554,"[0.55, 0.68]",0.5506,0.614809
CCF,0.478278,5.518126e-32,5.518126e-32,0.037604,0.404382,0.552174,"[0.404, 0.552]",0.346259,0.478278
medial,-0.465498,2.650005e-22,2.650005e-22,0.045478,-0.554867,-0.376129,"[-0.555, -0.376]",0.406559,-0.465498
posterior,-0.457083,1.756286e-20,1.756286e-20,0.046977,-0.549396,-0.364769,"[-0.549, -0.365]",0.361738,-0.457083
thirdV_log,0.42644,1.077634e-21,1.077634e-21,0.04235,0.343217,0.509663,"[0.343, 0.51]",0.351978,0.42644
THALAMUS_1,-0.422242,9.287207e-18,9.287207e-18,0.047228,-0.515049,-0.329434,"[-0.515, -0.329]",0.430558,-0.422242
t2lv_log,0.344074,1.324292e-10,1.324292e-10,0.052339,0.241222,0.446926,"[0.241, 0.447]",0.187313,0.344074
cortical_thickness,-0.32453,6.334241e-12,6.334241e-12,0.046001,-0.414926,-0.234134,"[-0.415, -0.234]",0.324296,-0.32453
allCSF,0.312863,2.34505e-20,2.34505e-20,0.032269,0.249452,0.376275,"[0.249, 0.376]",0.661835,0.312863
brain,-0.293461,8.495996e-11,8.495996e-11,0.044157,-0.380235,-0.206688,"[-0.38, -0.207]",0.305914,-0.293461
