In [14]:
from pathlib import Path
import os
import pandas as pd
import pyperclip
import subprocess
from reload_recursive import reload_recursive
import helpers
import statsmodels.api as sm
from scipy import stats

from mri_data import file_manager as fm
from mri_data import utils

In [None]:
drive_root = fm.get_drive_root()
dataroot = drive_root / "3Tpioneer_bids"
labelroot = drive_root / "srs-9/3Tpioneer_bids_predictions"
data_dir = Path("/home/srs-9/Projects/ms_mri/analysis/paper1/data0")

In [25]:
df_full = pd.read_csv(data_dir / "t1_data_full.csv", index_col="subid")
pineal_label_file = dataroot / "pineal_labels.csv"
df_man = pd.read_csv(pineal_label_file, index_col="Subject")
df = df_full.loc[df_man.index, :]

for i,row in df.iterrows():
    label_path = dataroot / row['sub-ses'] / "pineal-SRS_T1.nii.gz"
    if not label_path.exists():
        label_path = dataroot / row['sub-ses'] / "pineal-SRS.nii.gz"
    
    df.loc[i, 'vol'] = utils.compute_volume(label_path)[1]

df = helpers.set_dz_type5(df)
df = helpers.fix_edss(df)
df = df.rename(columns={"extracted_EDSS": "EDSS"})
df = df.rename(columns={"lesion_vol_cubic": "t2lv"})
df = helpers.do_sqrt_transform(df, ["EDSS"])
df = helpers.do_sqrt_transform(df, ["t2lv"])

df = helpers.clean_df(df)
df['dzdur'] = df['dzdur'].astype("float")
numeric_vars = ["choroid_volume", "pineal_volume", "pituitary_volume", "Female", "age", "tiv", "EDSS", "EDSS_sqrt", "t2lv", "t2lv_sqrt", "dzdur", "thalamus", "vol", "cortical_thickness"]
keep_cols = numeric_vars + ["dz_type5"]
df = df[keep_cols]
df_z = df.copy()
df_z[numeric_vars] = df_z[numeric_vars].apply(stats.zscore, nan_policy="omit")

  df[f"{var}_sqrt"] = np.sqrt(df[var])


In [26]:
model_data = df[df['dz_type5'].isin(["RMS", "PMS"])]
formula = "cortical_thickness ~ vol + age + Female + tiv"
res = sm.OLS.from_formula(formula, data=model_data).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:     cortical_thickness   R-squared:                       0.447
Model:                            OLS   Adj. R-squared:                  0.375
Method:                 Least Squares   F-statistic:                     6.257
Date:                Fri, 16 May 2025   Prob (F-statistic):           0.000822
Time:                        12:23:51   Log-Likelihood:                 29.008
No. Observations:                  36   AIC:                            -48.02
Df Residuals:                      31   BIC:                            -40.10
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      2.0104      0.309      6.510      0.0