In [38]:
import os
import re
import subprocess
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import nibabel as nib
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from matplotlib import colormaps
from scipy import stats

from mri_data import file_manager as fm

sys.path.append("/home/srs-9/Projects/ms_mri/analysis/paper1")

import helpers

In [2]:
drive_root = fm.get_drive_root()
msmri_home = Path("/home/srs-9/Projects/ms_mri")
msmri_datadir = msmri_home / "data"
curr_dir = Path(os.getcwd())
data_dir = curr_dir / "data0"
cp_data_dir = Path("/home/srs-9/Projects/ms_mri/analysis/paper1/data0")
dataroot = Path("/media/smbshare/srs-9/thalamus_project/data")

showfigs = False
pd.options.display.precision = 3
subject_sessions = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/analysis/thalamus/data0/subject-sessions.csv",
    index_col="sub",
)

In [3]:
cp_vols = pd.read_csv(data_dir / "choroid_aschoplex_volumes.csv", index_col="subid")
tiv_data = pd.read_csv(data_dir / "tiv_data.csv", index_col="subid")
clinical_data = pd.read_csv(data_dir / "clinical_data_processed.csv", index_col="subid")
df = cp_vols.join([tiv_data, clinical_data])

df_z = df.copy()
numeric_vars = [col for col in df_z.columns if pd.api.types.is_numeric_dtype(df_z[col])]
df_z[numeric_vars] = df_z[numeric_vars].apply(stats.zscore, nan_policy="omit")

In [4]:
df_thomas = pd.read_csv(data_dir / "hipsthomas_vols.csv", index_col="subid")
cols_orig = df_thomas.columns
new_colnames = {}
for col in df_thomas.columns:
    new_col = re.sub(r"(\d+)-([\w-]+)", r"\2_\1", col)
    new_col = re.sub("-", "_", new_col)
    new_colnames[col] = new_col
df_thomas = df_thomas.rename(columns=new_colnames)
df_thomas_norm = df_thomas.apply(lambda col: col / df_thomas['THALAMUS_1'])
df_thomas_z = df_thomas.apply(stats.zscore, nan_policy="omit")
df_thomas_norm_z = df_thomas_norm.apply(stats.zscore, nan_policy="omit")


new_index = []
for col in df_thomas.columns:
    new_index.append(int(re.match(r".+_(\d+)$", col)[1]))

df_structs = pd.DataFrame({'struct': df_thomas.columns}, index=new_index)
ind_struct_lookup = {}
for i, row in df_structs.iterrows():
    ind_struct_lookup[i] = row['struct']
struct_ind_lookup = {}
for i, row in df_structs.iterrows():
    struct_ind_lookup[row['struct']] = i

thalamic_nuclei = df_thomas.columns[1:11]
thalamic_nuclei_inds = [struct_ind_lookup[struct] for struct in thalamic_nuclei]

### MS vs !MS

In [26]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type2'].isin(["MS", "!MS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type2'], dtype="int")), axis=1)

formula = "MS ~ THALAMUS_1 + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.383339
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                     MS   No. Observations:                  489
Model:                          Logit   Df Residuals:                      484
Method:                           MLE   Df Model:                            4
Date:                Thu, 15 May 2025   Pseudo R-squ.:                  0.1054
Time:                        14:44:43   Log-Likelihood:                -187.45
converged:                       True   LL-Null:                       -209.55
Covariance Type:            nonrobust   LLR p-value:                 5.873e-09
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      2.0559      0.168     12.263      0.000       1.727       2.384
THALAMUS_1    -1.2133      0.

### RMS vs PMS

In [None]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type5'], dtype="int")), axis=1)

formula = "PMS ~ THALAMUS_1 + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

### PPMS vs SPMS

Thalamic volume predicts PPMS vs SPMS. PPMS associated with greater thalamic volume, after controlling for age, sex, tiv, and disease duration

In [24]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['ms_type'].isin(["PPMS", "SPMS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['ms_type'], dtype="int")), axis=1)

formula = "PPMS ~ THALAMUS_1 + age + Female + tiv + dzdur"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.548368
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                   PPMS   No. Observations:                   78
Model:                          Logit   Df Residuals:                       72
Method:                           MLE   Df Model:                            5
Date:                Thu, 15 May 2025   Pseudo R-squ.:                  0.1839
Time:                        14:38:01   Log-Likelihood:                -42.773
converged:                       True   LL-Null:                       -52.413
Covariance Type:            nonrobust   LLR p-value:                  0.001704
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.1583      0.420     -0.377      0.706      -0.981       0.665
THALAMUS_1     0.6552      0.

In [46]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['ms_type'].isin(["SPMS", "RRMS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['ms_type'], dtype="int")), axis=1)

formula = "RRMS ~ age + choroid_volume + Female + tiv + dzdur"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.260491
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:                   RRMS   No. Observations:                  366
Model:                          Logit   Df Residuals:                      360
Method:                           MLE   Df Model:                            5
Date:                Thu, 15 May 2025   Pseudo R-squ.:                  0.3110
Time:                        18:24:49   Log-Likelihood:                -95.340
converged:                       True   LL-Null:                       -138.38
Covariance Type:            nonrobust   LLR p-value:                 4.453e-17
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          2.8356      0.288      9.861      0.000       2.272       3.399
age              