In [1]:
import os
import re
import subprocess
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import nibabel as nib
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from matplotlib import colormaps
from scipy import stats

from mri_data import file_manager as fm

sys.path.append("/home/srs-9/Projects/ms_mri/analysis/paper1")

import helpers

In [2]:
drive_root = fm.get_drive_root()
msmri_home = Path("/home/srs-9/Projects/ms_mri")
msmri_datadir = msmri_home / "data"
curr_dir = Path(os.getcwd())
data_dir = Path("/home/srs-9/Projects/ms_mri/data")
dataroot = Path("/media/smbshare/srs-9/thalamus_project/data")

showfigs = False
pd.options.display.precision = 3
subject_sessions = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/analysis/thalamus/data0/subject-sessions.csv",
    index_col="sub",
)

figpath = Path("/home/srs-9/Projects/ms_mri/analysis/thalamus/figures_tables/thalamus_phenotype")

In [3]:
cp_vols = pd.read_csv(data_dir / "choroid_aschoplex_volumes.csv", index_col="subid")
tiv_data = pd.read_csv(data_dir / "tiv_data.csv", index_col="subid")
clinical_data = pd.read_csv(data_dir / "clinical_data_processed.csv", index_col="subid")
df = cp_vols.join([tiv_data, clinical_data])

df_z = df.copy()
numeric_vars = [col for col in df_z.columns if pd.api.types.is_numeric_dtype(df_z[col])]
df_z[numeric_vars] = df_z[numeric_vars].apply(stats.zscore, nan_policy="omit")

In [5]:
df_thomas = pd.read_csv(data_dir / "hipsthomas_vols.csv", index_col="subid")
cols_orig = df_thomas.columns
new_colnames = {}
for col in df_thomas.columns:
    new_col = re.sub(r"(\d+)-([\w-]+)", r"\2_\1", col)
    new_col = re.sub("-", "_", new_col)
    new_colnames[col] = new_col
df_thomas = df_thomas.rename(columns=new_colnames)
df_thomas_norm = df_thomas.apply(lambda col: col / df_thomas['THALAMUS_1'])
df_thomas_z = df_thomas.apply(stats.zscore, nan_policy="omit")
df_thomas_norm_z = df_thomas_norm.apply(stats.zscore, nan_policy="omit")

df_thomas_left = pd.read_csv(data_dir / "hipsthomas_left_vols.csv", index_col="subid")
df_thomas_left = df_thomas_left.rename(columns=new_colnames)
df_thomas_left_z = df_thomas_left.apply(stats.zscore, nan_policy="omit")

df_thomas_right = pd.read_csv(data_dir / "hipsthomas_right_vols.csv", index_col="subid")
df_thomas_right = df_thomas_right.rename(columns=new_colnames)
df_thomas_right_z = df_thomas_right.apply(stats.zscore, nan_policy="omit")


thalamic_nuclei = [2, 4, 5, 6, 7, 8, 9, 10, 11, 12]
thalamic_nuclei_str = [str(i) for i in thalamic_nuclei]

hips_thomas_ref = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/hipsthomas_struct_index.csv", index_col="index"
)['struct']

def combine_nuclei(df):
    df2 = pd.DataFrame()
    df2['anterior'] = df['AV_2']
    df2['ventral'] = df['VA_4'] + df['VLa_5'] + df['VLP_6'] + df['VPL_7']
    df2['intralaminar'] = df['CM_11'] 
    df2['medial'] = df['MD_Pf_12']
    df2['posterior'] = df['Pul_8'] + df['LGN_9'] + df['MGN_10']
    df2['THALAMUS_1'] = df['THALAMUS_1']
    return df2

def combine_nuclei2(df):
    df2 = pd.DataFrame()
    df2["anterior"] = df["AV_2"]
    df2["ventral"] = df["VA_4"] + df["VLa_5"] + df["VLP_6"] + df["VPL_7"]
    df2["medial"] = df["MD_Pf_12"] + df["CM_11"]
    df2["posterior"] = df["Pul_8"] + df["LGN_9"] + df["MGN_10"]
    df2["THALAMUS_1"] = df["THALAMUS_1"]
    return df2

df_thomas2 = combine_nuclei2(df_thomas)
df_thomas2_z = df_thomas2.apply(stats.zscore, nan_policy="omit")

df_thomas2_left = combine_nuclei2(df_thomas_left)
df_thomas2_left_z = df_thomas2_left.apply(stats.zscore, nan_policy="omit")

df_thomas2_right = combine_nuclei2(df_thomas_right)
df_thomas2_right_z = df_thomas2_right.apply(stats.zscore, nan_policy="omit")
thomas2_structs = df_thomas2.columns[~df_thomas2.columns.isin(["THALAMUS_1"])]

  results[i] = self.func(v, *self.args, **self.kwargs)


### MS vs NIND

#### Left Side

In [75]:
model_data = df_z.join(df_thomas_left_z)
model_data = model_data[model_data['dz_type3'].isin(["MS", "NIND"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type3'], dtype="int")), axis=1)

covars = "age + Female + tiv"
outcome = "MS"

pvals = {}
coefs = {}
all_results = {}
# for struct in hips_thomas_ref[~hips_thomas_ref.isin(["THALAMUS_1", "Amy_34", "GP_33", "MTT_14", "Hb_13"])]:
for struct in hips_thomas_ref[thalamic_nuclei]:
    formula = f"{outcome} ~ {covars} + {struct}"
    res = sm.Logit.from_formula(formula, data=model_data).fit(disp=0)
    pvals[struct] = res.pvalues[struct]
    coefs[struct] = res.params[struct]
    all_results[struct] = res

regression_results_left = pd.DataFrame({"coef": coefs, "pvals": pvals})
regression_results_left['p_fdr'] = stats.false_discovery_control(regression_results_left['pvals'], method='bh')
# bins = np.linspace(regression_results_left['coef'].min(), regression_results_left.coef.max(), 10)
#! Hard coded min and max across both L/R cells
bins = np.linspace(-1.2, -0.05, 10)
regression_results_left['coef_bin'] = np.digitize(regression_results_left['coef'], bins)
regression_results_left = regression_results_left.sort_values(by="coef", ascending=True)
regression_results_left

Unnamed: 0,coef,pvals,p_fdr,coef_bin
Pul_8,-1.194,2.372e-06,2.372e-05,1
MD_Pf_12,-0.878,6.954e-05,0.0003477,3
CM_11,-0.67,0.0002267,0.0007557,5
AV_2,-0.632,0.0004719,0.00118,5
VPL_7,-0.528,0.008204,0.01367,6
LGN_9,-0.52,0.001508,0.003016,6
VLP_6,-0.449,0.02755,0.03936,6
MGN_10,-0.337,0.07158,0.08947,7
VA_4,-0.196,0.2785,0.3094,8
VLa_5,-0.045,0.8065,0.8065,10


#### Right Side

In [76]:
model_data = df_z.join(df_thomas_right_z)
model_data = model_data[model_data['dz_type3'].isin(["MS", "NIND"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type3'], dtype="int")), axis=1)

covars = "age + Female + tiv"
outcome = "MS"

pvals = {}
coefs = {}
all_results = {}
# for struct in hips_thomas_ref[~hips_thomas_ref.isin(["THALAMUS_1", "Amy_34", "GP_33", "MTT_14", "Hb_13"])]:
for struct in hips_thomas_ref[thalamic_nuclei]:
    formula = f"{outcome} ~ {covars} + {struct}"
    res = sm.Logit.from_formula(formula, data=model_data).fit(disp=0)
    pvals[struct] = res.pvalues[struct]
    coefs[struct] = res.params[struct]
    all_results[struct] = res

regression_results_right = pd.DataFrame({"coef": coefs, "pvals": pvals})
regression_results_right['p_fdr'] = stats.false_discovery_control(regression_results_right['pvals'], method='bh')
# bins = np.linspace(regression_results_right['coef'].min(), regression_results_right.coef.max(), 10)
#! Hard coded min and max across both L/R cells
bins = np.linspace(-1.23, -0.05, 10)
regression_results_right['coef_bin'] = np.digitize(regression_results_right['coef'], bins)
regression_results_right = regression_results_right.sort_values(by="coef", ascending=True)
regression_results_right

Unnamed: 0,coef,pvals,p_fdr,coef_bin
Pul_8,-1.222,1.201e-06,1.201e-05,1
MD_Pf_12,-0.916,4.81e-05,0.0001603,3
AV_2,-0.847,1.612e-05,8.062e-05,3
VLP_6,-0.546,0.0102,0.02039,6
VPL_7,-0.507,0.005921,0.0148,6
LGN_9,-0.365,0.02994,0.04991,7
VLa_5,-0.363,0.047,0.05875,7
MGN_10,-0.361,0.05436,0.0604,7
VA_4,-0.352,0.06502,0.06502,7
CM_11,-0.337,0.04676,0.05875,7


In [78]:
left_rename = {k: f"{k}_left" for k in regression_results_left.columns}
regression_results_left.rename(columns=left_rename, inplace=True)
right_rename = {k: f"{k}_right" for k in regression_results_right.columns}
regression_results_right.rename(columns=right_rename, inplace=True)

regression_results = pd.concat([regression_results_left, regression_results_right], axis=1)
regression_results.to_excel(figpath / "ms_vs_nind_left_right.xlsx")

### MS vs !MS

In [12]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type2'].isin(["MS", "!MS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type2'], dtype="int")), axis=1)

formula = "MS ~ THALAMUS_1 + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.405587
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                     MS   No. Observations:                  560
Model:                          Logit   Df Residuals:                      555
Method:                           MLE   Df Model:                            4
Date:                Wed, 22 Oct 2025   Pseudo R-squ.:                 0.09205
Time:                        20:53:49   Log-Likelihood:                -227.13
converged:                       True   LL-Null:                       -250.16
Covariance Type:            nonrobust   LLR p-value:                 2.397e-09
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.9098      0.145     13.132      0.000       1.625       2.195
THALAMUS_1    -1.1040      0.

In [9]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type3'].isin(["MS", "NIND"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type3'], dtype="int")), axis=1)

covars = "age + Female + tiv"
outcome = "MS"

pvals = {}
coefs = {}
se = {}
all_results = {}
for struct in hips_thomas_ref[thalamic_nuclei]:
    formula = f"{outcome} ~ {covars} + {struct}"
    res = sm.Logit.from_formula(formula, data=model_data).fit(disp=0)
    pvals[struct] = res.pvalues[struct]
    coefs[struct] = res.params[struct]
    all_results[struct] = res

regression_results = pd.DataFrame({"coef": coefs, "pvals": pvals})
regression_results['p_fdr'] = stats.false_discovery_control(regression_results['pvals'], method='bh')
regression_results = regression_results.sort_values(by="coef", ascending=True)
regression_results

Unnamed: 0,coef,pvals,p_fdr
Pul_8,-1.276,6.712e-07,6.712e-06
MD_Pf_12,-0.985,2.291e-05,7.638e-05
AV_2,-0.844,1.985e-05,7.638e-05
VPL_7,-0.63,0.002576,0.005153
CM_11,-0.598,0.001312,0.00328
VLP_6,-0.546,0.01073,0.01533
LGN_9,-0.505,0.00334,0.005567
MGN_10,-0.407,0.03915,0.04893
VA_4,-0.312,0.1063,0.1181
VLa_5,-0.253,0.188,0.188


In [32]:
model_data = df_z.join(df_thomas2_z)
model_data = model_data[model_data['dz_type2'].isin(["MS", "!MS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type3'], dtype="int")), axis=1)

covars = "age + Female + tiv"
outcome = "MS"

pvals = {}
coefs = {}
llci = {}
ulci = {}
ci_str = {}

all_results = {}
# for struct in df_thomas2.columns:
#for struct in df_thomas2.columns[~df_thomas2.columns.isin(["THALAMUS_1"])]:
for struct in df_thomas2.columns:
    formula = f"{outcome} ~ {struct} + {covars}"
    res = sm.Logit.from_formula(formula, data=model_data).fit(disp=0)
    pvals[struct] = res.pvalues[struct]
    coefs[struct] = res.params[struct]
    ci = res.conf_int()
    llci[struct] = ci.loc[struct, 0]
    ulci[struct] = ci.loc[struct, 1]
    ci_str[struct] = f"[{llci[struct]:.6f}, {ulci[struct]:.6f}]"
    all_results[struct] = res

regression_results = pd.DataFrame({"coef": coefs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(regression_results['pvals'], method='bh')

regression_results = regression_results.sort_values(by="coef", ascending=True)
# regression_results.to_csv(figpath / "ms_nind_combined_nuclei_thalamus_control.csv")
regression_results

Unnamed: 0,coef,llci,ulci,pvals,ci,p_fdr
posterior,-1.156,-1.536,-0.776,2.512e-09,"[-1.536424, -0.776046]",1.256e-08
THALAMUS_1,-1.104,-1.512,-0.696,1.129e-07,"[-1.511931, -0.696111]",2.821e-07
medial,-0.856,-1.197,-0.515,8.934e-07,"[-1.197363, -0.514536]",1.117e-06
anterior,-0.75,-1.044,-0.456,5.677e-07,"[-1.043818, -0.456090]",9.461e-07
ventral,-0.627,-0.974,-0.28,0.0003985,"[-0.974352, -0.280052]",0.0003985


In [17]:
struct

'posterior'

In [49]:
models = {}
models['posterior'] = sm.OLS.from_formula("posterior ~ age + Female + tiv", data=model_data).fit()
models['medial'] = sm.OLS.from_formula("medial ~ age + Female + tiv", data=model_data).fit()
models['anterior'] = sm.OLS.from_formula("anterior ~ age + Female + tiv", data=model_data).fit()
models['ventral'] = sm.OLS.from_formula("ventral ~ age + Female + tiv", data=model_data).fit()
models['MS'] = sm.OLS.from_formula("MS ~ age + Female + tiv", data=model_data).fit()

structs = ['posterior', 'medial', 'anterior', 'ventral']
working_structs = structs.copy()

R_cmd = ""
for i, struct1 in enumerate(structs):
    working_structs = working_structs[1:]   
    for struct2 in working_structs:
        p1 = stats.pearsonr(models[struct1].resid, models['MS'].resid).statistic
        p2 = stats.pearsonr(models[struct2].resid, models['MS'].resid).statistic
        p12 = stats.pearsonr(models[struct1].resid, models[struct2].resid).statistic
        R_cmd = R_cmd + f"'{struct1}, {struct2}'\ntest2r.t2({p1:0.3}, {p2:0.3}, {p12:0.3}, 468)$p_value\n\n"
#         test2r.t2(-0.2570, -0.225,
#           0.936, 468)

# print("posterior-medial", stats.pearsonr(model_posterior.resid, model_medial.resid).statistic)
# print("posterior-anterior", stats.pearsonr(model_posterior.resid, model_anterior.resid).statistic)
# print("anterior", stats.pearsonr(model2.resid, model3.resid).statistic)

# # data = pd.DataFrame({'posterior': model1.resid, 'medial': model2.resid, 'MS': model3.resid})
# # result = sm.Logit.from_formula('MS ~ posterior', data=data).fit()
# print(result.summary())
print(R_cmd)


'posterior, medial'
test2r.t2(-0.257, -0.209, 0.865, 468)$p_value

'posterior, anterior'
test2r.t2(-0.257, -0.215, 0.616, 468)$p_value

'posterior, ventral'
test2r.t2(-0.257, -0.151, 0.711, 468)$p_value

'medial, anterior'
test2r.t2(-0.209, -0.215, 0.579, 468)$p_value

'medial, ventral'
test2r.t2(-0.209, -0.151, 0.742, 468)$p_value

'anterior, ventral'
test2r.t2(-0.215, -0.151, 0.604, 468)$p_value




### RMS vs PMS

In [13]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type5'], dtype="int")), axis=1)

formula = "PMS ~ THALAMUS_1 + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.360051
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                    PMS   No. Observations:                  468
Model:                          Logit   Df Residuals:                      463
Method:                           MLE   Df Model:                            4
Date:                Wed, 22 Oct 2025   Pseudo R-squ.:                  0.2944
Time:                        20:55:36   Log-Likelihood:                -168.50
converged:                       True   LL-Null:                       -238.83
Covariance Type:            nonrobust   LLR p-value:                 2.056e-29
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.0694      0.187    -11.078      0.000      -2.436      -1.703
THALAMUS_1    -0.7050      0.

In [69]:
covars = "age + Female + tiv"
outcome = "PMS"

pvals = {}
coefs = {}
se = {}
all_results = {}
for struct in hips_thomas_ref[thalamic_nuclei]:
    formula = f"{outcome} ~ {covars} + {struct}"
    res = sm.Logit.from_formula(formula, data=model_data).fit(disp=0)
    pvals[struct] = res.pvalues[struct]
    coefs[struct] = res.params[struct]
    # se[struct] = res.HC0_se[struct]
    all_results[struct] = res

regression_results = pd.DataFrame({"coef": coefs, "pvals": pvals, "se": se})
regression_results['p_fdr'] = stats.false_discovery_control(regression_results['pvals'], method='bh')
regression_results = regression_results.sort_values(by="coef", ascending=True)
regression_results

Unnamed: 0,coef,pvals,se,p_fdr
AV_2,-0.696,9.646e-05,,0.0002648
VLP_6,-0.681,1.655e-05,,0.0001655
Pul_8,-0.605,5.881e-05,,0.0002648
LGN_9,-0.572,0.0001059,,0.0002648
VPL_7,-0.557,0.0003258,,0.000543
MD_Pf_12,-0.556,0.0001418,,0.0002836
VA_4,-0.536,0.002414,,0.003155
VLa_5,-0.533,0.002524,,0.003155
CM_11,-0.432,0.006107,,0.006785
MGN_10,-0.4,0.01441,,0.01441


In [8]:
model_data = df_z.join(df_thomas2_z)
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type5'], dtype="int")), axis=1)

covars = "age + Female + tiv"
outcome = "PMS"

pvals = {}
coefs = {}
llci = {}
ulci = {}
ci_str = {}

all_results = {}
# for struct in df_thomas2.columns:
for struct in df_thomas2.columns[~df_thomas2.columns.isin(["THALAMUS_1"])]:
    formula = f"{outcome} ~ {struct}*dzdur + {covars}"
    res = sm.Logit.from_formula(formula, data=model_data).fit(disp=0)
    pvals[struct] = res.pvalues[struct]
    coefs[struct] = res.params[struct]
    ci = res.conf_int()
    llci[struct] = ci.loc[struct, 0]
    ulci[struct] = ci.loc[struct, 1]
    ci_str[struct] = f"[{llci[struct]:.6f}, {ulci[struct]:.6f}]"
    all_results[struct] = res

regression_results = pd.DataFrame({"coef": coefs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(regression_results['pvals'], method='bh')

regression_results = regression_results.sort_values(by="coef", ascending=True)
# regression_results.to_csv(figpath / "ms_nind_combined_nuclei_thalamus_control.csv")
regression_results

Unnamed: 0,coef,llci,ulci,pvals,ci,p_fdr
anterior,-0.635,-0.996,-0.274,0.0005651,"[-0.995955, -0.274020]",0.002
ventral,-0.589,-0.958,-0.221,0.001696,"[-0.957503, -0.221384]",0.003
posterior,-0.502,-0.824,-0.179,0.00228,"[-0.824118, -0.179454]",0.003
medial,-0.413,-0.744,-0.082,0.01456,"[-0.744026, -0.081659]",0.015


### PPMS vs SPMS

Thalamic volume predicts PPMS vs SPMS. PPMS associated with greater thalamic volume, after controlling for age, sex, tiv, and disease duration

In [36]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['ms_type'].isin(["PPMS", "SPMS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['ms_type'], dtype="int")), axis=1)

formula = "PPMS ~ THALAMUS_1 + age + Female + tiv + dzdur"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.548368
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                   PPMS   No. Observations:                   78
Model:                          Logit   Df Residuals:                       72
Method:                           MLE   Df Model:                            5
Date:                Fri, 06 Jun 2025   Pseudo R-squ.:                  0.1839
Time:                        17:43:10   Log-Likelihood:                -42.773
converged:                       True   LL-Null:                       -52.413
Covariance Type:            nonrobust   LLR p-value:                  0.001704
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.1583      0.420     -0.377      0.706      -0.981       0.665
THALAMUS_1     0.6552      0.

In [37]:
covars = "age + Female + tiv"
outcome = "PPMS"

pvals = {}
coefs = {}
se = {}
all_results = {}
for struct in df_structs.loc[thalamic_nuclei_inds, 'struct']:
    formula = f"{outcome} ~ {covars} + {struct}"
    res = sm.Logit.from_formula(formula, data=model_data).fit()
    pvals[struct] = res.pvalues[struct]
    coefs[struct] = res.params[struct]
    # se[struct] = res.HC0_se[struct]
    all_results[struct] = res

regression_results = pd.DataFrame({"coef": coefs, "pvals": pvals, "se": se})
regression_results['p_fdr'] = stats.false_discovery_control(regression_results['pvals'], method='bh')
regression_results = regression_results.sort_values(by="coef", ascending=True)
regression_results

Optimization terminated successfully.
         Current function value: 0.635106
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.652023
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.655371
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.631714
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.589296
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.588183
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.642673
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.636726
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.655130
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.612379
  

Unnamed: 0,coef,pvals,se,p_fdr
VLa_5,0.017,0.952,,0.952
CM_11,0.052,0.839,,0.932
VA_4,0.229,0.473,,0.591
LGN_9,0.361,0.166,,0.238
MGN_10,0.442,0.101,,0.168
VLP_6,0.48,0.067,,0.167
AV_2,0.576,0.083,,0.167
MD_Pf_12,0.628,0.016,,0.052
Pul_8,0.868,0.003,,0.023
VPL_7,0.877,0.005,,0.023


In [31]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['ms_type'].isin(["SPMS", "RRMS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['ms_type'], dtype="int")), axis=1)

formula = "RRMS ~ age + THALAMUS_1 + Female + tiv + dzdur"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.244788
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:                   RRMS   No. Observations:                  368
Model:                          Logit   Df Residuals:                      362
Method:                           MLE   Df Model:                            5
Date:                Fri, 06 Jun 2025   Pseudo R-squ.:                  0.3592
Time:                        17:41:15   Log-Likelihood:                -90.082
converged:                       True   LL-Null:                       -140.58
Covariance Type:            nonrobust   LLR p-value:                 3.244e-20
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      3.0153      0.310      9.712      0.000       2.407       3.624
age           -0.9106      0.

In [33]:
covars = "age + Female + tiv"
outcome = "RRMS"

pvals = {}
coefs = {}
se = {}
all_results = {}
for struct in df_structs.loc[thalamic_nuclei_inds, 'struct']:
    formula = f"{outcome} ~ {covars} + {struct}"
    res = sm.Logit.from_formula(formula, data=model_data).fit()
    pvals[struct] = res.pvalues[struct]
    coefs[struct] = res.params[struct]
    # se[struct] = res.HC0_se[struct]
    all_results[struct] = res

regression_results = pd.DataFrame({"coef": coefs, "pvals": pvals, "se": se})
regression_results['p_fdr'] = stats.false_discovery_control(regression_results['pvals'], method='bh')
regression_results = regression_results.sort_values(by="coef", ascending=True)
regression_results

Optimization terminated successfully.
         Current function value: 0.268260
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.276843
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.276132
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.258476
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.259288
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.257989
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.270770
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.275042
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.278471
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.262450
  

Unnamed: 0,coef,pvals,se,p_fdr
CM_11,0.473,0.02905,,0.02905
VA_4,0.576,0.01344,,0.01494
VLa_5,0.583,0.01165,,0.01456
MGN_10,0.584,0.005444,,0.007778
LGN_9,0.667,0.001391,,0.002318
MD_Pf_12,0.785,7.174e-05,,0.0001793
AV_2,0.832,0.0007468,,0.001494
VLP_6,0.894,1.743e-05,,6.433e-05
VPL_7,0.897,1.93e-05,,6.433e-05
Pul_8,0.927,1.847e-05,,6.433e-05
