## Setup

### Imports

In [1]:
import pprint
from warnings import simplefilter

import pandas as pd
from IPython.display import Markdown, display
from statsmodels.stats.multitest import multipletests

simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
import json
import re
import textwrap
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from statsmodels.stats.multitest import multipletests

import helpers
import matplotlib.pyplot as plt
import numpy as np
import pyperclip
import statsmodels.api as sm
from IPython.display import clear_output
from matplotlib import colormaps
from scipy import stats
from statsmodels.genmod.families import Poisson

from reload_recursive import reload_recursive
from statsmodels.stats.mediation import Mediation
from statsmodels.stats.outliers_influence import variance_inflation_factor
from tqdm.notebook import tqdm
import sys
from mri_data import file_manager as fm
sys.path.insert(0, "/home/srs-9/Projects/ms_mri/analysis/thalamus/helpers")


import helpers
import utils
import regression_utils as regutils
import my_namespace

ModuleNotFoundError: No module named 'my_namespace'

In [None]:
reload_recursive(regutils)
reload_recursive(helpers)
reload_recursive(utils)
reload_recursive(my_namespace)

import utils
from utils import load_df, zscore, get_colors, run_R_script
from my_namespace import *

import regression_utils as regutils
from regression_utils import (
    quick_regression,
    quick_regression2,
    residualize_vars,
    run_regressions,
)

### Load Data

#### Clinical and Volumes

In [None]:
drive_root = fm.get_drive_root()
dataroot = drive_root / "3Tpioneer_bids"
data_dir = Path("/home/srs-9/Projects/ms_mri/data")
fig_path = Path(
    "/home/srs-9/Projects/ms_mri/analysis/thalamus/figures_tables/choroid_associations"
)

df = load_df()
df_thomas = helpers.load_hipsthomas(data_dir)

data = df.join(df_thomas)

#! See suggestions from assumption_checks.ipynb
transformations = {
    "LV": "log",
    "thirdV": "log",
    "fourthV": "log",
    "t2lv": "log",
    "PRL": "log1p"
}
data = helpers.transform_variables(data, transformations)

# these corrections should ultimately be made to the csv file
for struct in ["brain", "white", "grey", "thalamus", "t2lv"]:
    data[struct] = data[struct] * 1000

data["CCF"] = data["LV"] / data["allCSF"]
data["periCSF_ratio"] = data["periCSF"] / data["LV"]
data["periCSF_frac"] = data["periCSF"] / data["allCSF"]


viridis = colormaps["viridis"].resampled(20)
colors = helpers.get_colors()

MS_patients = data["dz_type2"] == "MS"
NONMS_patients = data["dz_type2"] == "!MS"
NIND_patients = data["dz_type5"] == "NIND"
OIND_patients = data["dz_type5"] == "OIND"
RMS_patients = data["dz_type5"] == "RMS"
PMS_patients = data["dz_type5"] == "PMS"


thalamic_nuclei = [2, 4, 5, 6, 7, 8, 9, 10, 11, 12]
deep_grey = [13, 14, 26, 27, 28, 29, 30, 31, 32]

thalamic_nuclei_str = [str(i) for i in thalamic_nuclei]

hips_thomas_ref = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/hipsthomas_struct_index.csv", index_col="index"
)["struct"]
hips_thomas_invref = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/hipsthomas_struct_index.csv", index_col="struct"
)["index"]

## Analysis

### Univariate associations with CP

In [29]:
model_data = data[MS_patients]
model_data_z = zscore(model_data)

display(Markdown("**Age**:"))
display(stats.pearsonr(model_data['CP'], model_data['age']))

display(Markdown("**Female Sex**:"))
display(stats.pearsonr(model_data_z['CP'], model_data_z['Female']))

display(Markdown("**TIV**:"))
display(stats.pearsonr(model_data_z['CP'], model_data_z['tiv']))

**Age**:

PearsonRResult(statistic=np.float64(0.3954628212248364), pvalue=np.float64(5.707661072888709e-19))

**Female Sex**:

PearsonRResult(statistic=np.float64(-0.28448299814132133), pvalue=np.float64(3.6614132824199224e-10))

**TIV**:

PearsonRResult(statistic=np.float64(0.14760012589286067), pvalue=np.float64(0.0013643188892649314))

### MRI and clinical measures versus CP 

In [19]:
model_data = df.join(df_thomas)[MS_patients]
model_data = zscore(model_data)

"""
def run_regressions(
    model_data: pd.DataFrame,
    outcome: str,
    predictors: list[str],
    covariates: list[str] = None,
    robust_cov: str = "HC3",
    fdr_method: str = "fdr_bh",
    fdr_alpha: float = 0.05,
):
"""

outcomes = [
    "LV_log",
    "thirdV_log",
    "medial", 
    "posterior", 
    "ventral",
    "anterior",
    "t2lv_log",
    "THALAMUS_1",
    "brain",
    "grey",
    "white",
    "cortical_thickness",
    "EDSS_sqrt",
    "SDMT",
    "PRL_log1p",
    "allCSF", 
    "periCSF",
    "CCF"
]
covariates = ["age", "Female", "tiv"]
predictor = "CP"

_, results = run_regressions_refactored(
    model_data,
    outcomes,
    [predictor],
    covariates
)
results = results[predictor]
display_order = results["coef"].apply(np.abs).sort_values(ascending=False).index
display(results.loc[display_order, :])

Unnamed: 0_level_0,coef,pval,p_fdr,se,llci,ulci,ci,R2,coef_sig
struct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
LV_log,0.614809,2.4994e-58,2.4994e-58,0.032947,0.550064,0.679554,"[0.55, 0.68]",0.5506,0.614809
CCF,0.478278,5.518126e-32,5.518126e-32,0.037604,0.404382,0.552174,"[0.404, 0.552]",0.346259,0.478278
medial,-0.465498,2.650005e-22,2.650005e-22,0.045478,-0.554867,-0.376129,"[-0.555, -0.376]",0.406559,-0.465498
posterior,-0.457083,1.756286e-20,1.756286e-20,0.046977,-0.549396,-0.364769,"[-0.549, -0.365]",0.361738,-0.457083
thirdV_log,0.42644,1.077634e-21,1.077634e-21,0.04235,0.343217,0.509663,"[0.343, 0.51]",0.351978,0.42644
THALAMUS_1,-0.422242,9.287207e-18,9.287207e-18,0.047228,-0.515049,-0.329434,"[-0.515, -0.329]",0.430558,-0.422242
t2lv_log,0.344074,1.324292e-10,1.324292e-10,0.052339,0.241222,0.446926,"[0.241, 0.447]",0.187313,0.344074
cortical_thickness,-0.32453,6.334241e-12,6.334241e-12,0.046001,-0.414926,-0.234134,"[-0.415, -0.234]",0.324296,-0.32453
allCSF,0.312863,2.34505e-20,2.34505e-20,0.032269,0.249452,0.376275,"[0.249, 0.376]",0.661835,0.312863
brain,-0.293461,8.495996e-11,8.495996e-11,0.044157,-0.380235,-0.206688,"[-0.38, -0.207]",0.305914,-0.293461
