In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import numpy as np
import statsmodels.api as sm
from scipy import stats
import os
from scipy import stats

from mri_data import file_manager as fm

In [None]:
drive_root = fm.get_drive_root()
curr_dir = Path(os.getcwd())
data_dir = curr_dir / "data0"

subject_sessions = pd.read_csv("/home/srs-9/Projects/ms_mri/analysis/thalamus/data0/subject-sessions.csv", 
    index_col="sub")

cp_vols = pd.read_csv(data_dir / "choroid_aschoplex_volumes.csv", index_col="subid")
tiv_data = pd.read_csv(data_dir / "tiv_data.csv", index_col="subid")
clinical_data = pd.read_csv(data_dir / "clinical_data_processed.csv", index_col="subid")
df = cp_vols.join([tiv_data, clinical_data])

df_z = df.copy()
numeric_vars = [col for col in df_z.columns if pd.api.types.is_numeric_dtype(df_z[col])]
df_z[numeric_vars] = df_z[numeric_vars].apply(stats.zscore, nan_policy="omit")

### MS vs !MS

In [None]:
model_data = df_z.copy()
model_data = model_data[model_data['dz_type2'].isin(["MS", "!MS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type2'], dtype="int")), axis=1)

#### Choroid

In [None]:
formula = "MS ~ choroid_volume + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.415989
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                     MS   No. Observations:                  486
Model:                          Logit   Df Residuals:                      481
Method:                           MLE   Df Model:                            4
Date:                Tue, 13 May 2025   Pseudo R-squ.:                 0.02491
Time:                        15:29:42   Log-Likelihood:                -202.17
converged:                       True   LL-Null:                       -207.33
Covariance Type:            nonrobust   LLR p-value:                   0.03524
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          1.7599      0.132     13.382      0.000       1.502       2.018
choroid_volume   

#### T2LV

In [25]:
formula = "MS ~ t2lv + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.398282
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                     MS   No. Observations:                  489
Model:                          Logit   Df Residuals:                      484
Method:                           MLE   Df Model:                            4
Date:                Tue, 13 May 2025   Pseudo R-squ.:                 0.07056
Time:                        15:30:17   Log-Likelihood:                -194.76
converged:                       True   LL-Null:                       -209.55
Covariance Type:            nonrobust   LLR p-value:                 5.987e-06
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.9486      0.160     12.213      0.000       1.636       2.261
t2lv           0.9814      0.

#### Thalamus

In [None]:
formula = "MS ~ thalamus + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

### RMS vs PMS

In [27]:
model_data = df_z.copy()
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
model_data = pd.concat((model_data, pd.get_dummies(model_data['dz_type5'], dtype="int")), axis=1)

#### Choroid

In [28]:
formula = "PMS ~ choroid_volume + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.377147
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                    PMS   No. Observations:                  412
Model:                          Logit   Df Residuals:                      407
Method:                           MLE   Df Model:                            4
Date:                Tue, 13 May 2025   Pseudo R-squ.:                  0.2543
Time:                        15:33:55   Log-Likelihood:                -155.38
converged:                       True   LL-Null:                       -208.36
Covariance Type:            nonrobust   LLR p-value:                 5.288e-22
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         -1.9335      0.187    -10.364      0.000      -2.299      -1.568
choroid_volume   

#### T2LV

In [29]:
formula = "PMS ~ t2lv + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.373781
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                    PMS   No. Observations:                  414
Model:                          Logit   Df Residuals:                      409
Method:                           MLE   Df Model:                            4
Date:                Tue, 13 May 2025   Pseudo R-squ.:                  0.2637
Time:                        15:34:01   Log-Likelihood:                -154.75
converged:                       True   LL-Null:                       -210.18
Covariance Type:            nonrobust   LLR p-value:                 4.748e-23
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.9486      0.188    -10.384      0.000      -2.316      -1.581
t2lv           0.3665      0.

#### Thalamus

In [30]:
formula = "PMS ~ thalamus + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.362006
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                    PMS   No. Observations:                  414
Model:                          Logit   Df Residuals:                      409
Method:                           MLE   Df Model:                            4
Date:                Tue, 13 May 2025   Pseudo R-squ.:                  0.2869
Time:                        15:34:07   Log-Likelihood:                -149.87
converged:                       True   LL-Null:                       -210.18
Covariance Type:            nonrobust   LLR p-value:                 3.939e-25
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.0208      0.194    -10.425      0.000      -2.401      -1.641
thalamus      -0.6313      0.

#### PRL

In [33]:
formula = "PMS ~ PRL + age + Female + tiv"
res = sm.Logit.from_formula(formula, data=model_data).fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.379686
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                    PMS   No. Observations:                  414
Model:                          Logit   Df Residuals:                      409
Method:                           MLE   Df Model:                            4
Date:                Tue, 13 May 2025   Pseudo R-squ.:                  0.2521
Time:                        15:35:52   Log-Likelihood:                -157.19
converged:                       True   LL-Null:                       -210.18
Covariance Type:            nonrobust   LLR p-value:                 5.235e-22
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.9283      0.187    -10.302      0.000      -2.295      -1.561
PRL            0.1156      0.