In [87]:
import pandas as pd
from pathlib import Path
import numpy as np
import statsmodels.api as sm

from mri_data import file_manager as fm

In [90]:
drive_root = fm.get_drive_root()
msmri_home = Path("/home/srs-9/Projects/ms_mri")
curr_dir = msmri_home / "monai_analysis" / "choroid_pineal_pituitary_T1-1"

## Data Cleaning

In [185]:
df = pd.read_csv(curr_dir / "clinical_data_full.csv")
df = df.set_index("subid")

df_full = df
not_nas = (~df_full['pineal_volume'].isna() & ~df_full['choroid_volume'].isna() & ~df_full['pituitary_volume'].isna())
df = df_full.loc[not_nas, :]

In [93]:
# edss_type = pd.CategoricalDtype(categories=np.linspace(1, 11, 21), ordered=True)
# df['numEDSS'] = df['numEDSS'].astype(edss_type)
df['numEDSS'] = pd.Categorical(df['numEDSS'], ordered=True)
df.loc[df['edss_diff_abs'] == ".", 'edss_diff_abs'] = None
df.loc[df['dzdur'] == "#VALUE!", "dzdur"] = None

Add columns for volumes normalized by TIV

In [183]:
df.loc[:, 'norm_choroid_volume'] = df['choroid_volume']/df['tiv']
df.loc[:, 'norm_pineal_volume'] = df['pineal_volume']/df['tiv']
df.loc[:, 'norm_pituitary_volume'] = df['pituitary_volume']/df['tiv']

### Categorize Disease Types

- RMS vs MS vs !MS
- MS vs !MS
- MS vs OIND vs NIND

`dz_type` has relapsing MS (RMS) versus progressive MS (PMS) verusus not MS (!MS)

In [188]:
try:
    df.insert(18, 'dz_type', df['ms_type'])
except Exception:
    pass

df.loc[:, 'dz_type'] = df['ms_type']

df.loc[df['ms_type'].isin(['CIS', 'RRMS']), 'dz_type'] = 'RMS'
df.loc[df['ms_type'].isin(['PPMS', 'SPMS', 'RPMS', 'PRMS']), 'dz_type'] = 'PMS'
df.loc[df['ms_type'].isin(['NIND', 'OIND', 'HC']), 'dz_type'] = '!MS'
df['dz_type'].unique()

array(['RMS', 'PMS', '!MS', 'UNK', 'RIS'], dtype=object)

`dz_type2` has MS versus !MS and UNK

In [189]:
try:
    df.insert(18, 'dz_type2', df['dz_type'])
except Exception:
    pass

df.loc[:, 'dz_type2'] = df['dz_type']

df.loc[df['dz_type'].isin(['RMS', 'PMS']), 'dz_type2'] = 'MS'
df['dz_type2'].unique()

array(['MS', '!MS', 'UNK', 'RIS'], dtype=object)

`dz_type3` has MS v OIND v NIND

In [190]:
try:
    df.insert(18, 'dz_type3', df['ms_type'])
except Exception:
    pass

df.loc[:, 'dz_type3'] = df['ms_type']

df.loc[df['ms_type'].isin(['PPMS', 'SPMS', 'RPMS', 'PRMS', 'RRMS', 'CIS']), 'dz_type3'] = 'MS'
df['dz_type3'].unique()

array(['MS', 'NIND', 'UNK', 'HC', 'OIND', 'RIS'], dtype=object)

## Descriptive Stats

In [98]:
print(len(df), "patients in total")
nRMS = sum(df['dz_type'] == "RMS")
nPMS = sum(df['dz_type'] == "PMS")
nNotMS = sum(df['dz_type'] == "!MS")
print(nRMS, "patients with RMS", f"({round(nRMS / len(df), 2)})")
print(nPMS, "patients with PMS", f"({round(nPMS / len(df), 2)})")
print(nNotMS, "patients w/o MS", f"({round(nNotMS / len(df), 2)})")

492 patients in total
327 patients with RMS (0.66)
85 patients with PMS (0.17)
75 patients w/o MS (0.15)


In [99]:
print(len(df), "patients in total")
nMS = sum(df['dz_type3'] == "MS")
nOIND = sum(df['dz_type3'] == "OIND")
nNIND = sum(df['dz_type3'] == "NIND")
print(nMS, "patients with MS", f"({round(nMS / len(df), 2)})")
print(nOIND, "patients with OIND", f"({round(nOIND / len(df), 2)})")
print(nNIND, "patients w/o NIND", f"({round(nNIND / len(df), 2)})")

492 patients in total
412 patients with MS (0.84)
30 patients with OIND (0.06)
43 patients w/o NIND (0.09)


In [100]:
print("Female subjects:     {:0.2f}".format(sum(df['sex'] == "Female") / len(df)) )
print("Average age at scan: {:0.2f}".format(df['age'].mean()))

Female subjects:     0.78
Average age at scan: 47.77


In [101]:
df_rrms = df.loc[df["dz_type"] == "RMS"]
print("RMS Patients:")
print("\tFemale:  {:0.2f}".format(sum(df_rrms['sex'] == "Female") / len(df_rrms)))
print("\tAge:     {:0.2f}".format(df_rrms['age'].mean()))

df_pms = df.loc[df["dz_type"] == "PMS"]
print("PMS Patients:")
print("\tFemale:  {:0.2f}".format(sum(df_pms['sex'] == "Female") / len(df_pms)))
print("\tAge:     {:0.2f}".format(df_pms['age'].mean()))

df_notms = df.loc[df["dz_type"] == "!MS"]
print("!MS Patients:")
print("\tFemale:  {:0.2f}".format(sum(df_notms['sex'] == "Female") / len(df_notms)))
print("\tAge:     {:0.2f}".format(df_notms['age'].mean()))

RMS Patients:
	Female:  0.82
	Age:     44.64
PMS Patients:
	Female:  0.58
	Age:     58.24
!MS Patients:
	Female:  0.80
	Age:     49.70


#### Volumes

In [102]:
# choroid
print("Choroid volumes:")
print("\tRRMS:   {:0.2f}".format(df_rrms['choroid_volume'].mean()))
print("\tPMS:    {:0.2f}".format(df_pms['choroid_volume'].mean()))
print("\tnot-MS: {:0.2f}".format(df_notms['choroid_volume'].mean()))

# pineal
print("Pineal volumes:")
print("\tRRMS:   {:0.2f}".format(df_rrms['pineal_volume'].mean()))
print("\tPMS:    {:0.2f}".format(df_pms['pineal_volume'].mean()))
print("\tnot-MS: {:0.2f}".format(df_notms['pineal_volume'].mean()))

# pituitary
print("Pituitary volumes:")
print("\tRRMS:   {:0.2f}".format(df_rrms['pituitary_volume'].mean()))
print("\tPMS:    {:0.2f}".format(df_pms['pituitary_volume'].mean()))
print("\tnot-MS: {:0.2f}".format(df_notms['pituitary_volume'].mean()))

Choroid volumes:
	RRMS:   1921.89
	PMS:    2464.00
	not-MS: 1973.78
Pineal volumes:
	RRMS:   241.80
	PMS:    228.35
	not-MS: 267.28
Pituitary volumes:
	RRMS:   694.03
	PMS:    651.02
	not-MS: 690.05


In [103]:
print("Choroid volumes:")
choroid_ms_mean = df[df['dz_type2'] == "MS"].choroid_volume.mean()
choroid_notms_mean = df[df['dz_type2'] == "!MS"].choroid_volume.mean()
print("\tMS:     {:0.2f}".format(choroid_ms_mean))
print("\tnot-MS: {:0.2f}".format(choroid_notms_mean))

print("\nPineal volumes")
pineal_ms_mean = df[df['dz_type2'] == "MS"].pineal_volume.mean()
pineal_notms_mean = df[df['dz_type2'] == "!MS"].pineal_volume.mean()
print("\tMS:     {:0.2f}".format(pineal_ms_mean))
print("\tnot-MS: {:0.2f}".format(pineal_notms_mean))

print("\nPituitary volumes")
pituitary_ms_mean = df[df['dz_type2'] == "MS"].pituitary_volume.mean()
pituitary_notms_mean = df[df['dz_type2'] == "!MS"].pituitary_volume.mean()
print("\tMS:     {:0.2f}".format(pituitary_ms_mean))
print("\tnot-MS: {:0.2f}".format(pituitary_notms_mean))

Choroid volumes:
	MS:     2033.73
	not-MS: 1973.78

Pineal volumes
	MS:     239.02
	not-MS: 267.28

Pituitary volumes
	MS:     685.16
	not-MS: 690.05


In [104]:
print("Choroid volumes:")
choroid_ms_mean = df[df['dz_type3'] == "MS"].choroid_volume.mean()
choroid_oind_mean = df[df['dz_type3'] == "OIND"].choroid_volume.mean()
choroid_nind_mean = df[df['dz_type3'] == "NIND"].choroid_volume.mean()
print("\tMS:   {:0.2f}".format(choroid_ms_mean))
print("\tOIND: {:0.2f}".format(choroid_oind_mean))
print("\tNIND: {:0.2f}".format(choroid_nind_mean))


print("\nPineal volumes")
pineal_ms_mean = df[df['dz_type2'] == "MS"].pineal_volume.mean()
pineal_oind_mean = df[df['dz_type3'] == "OIND"].pineal_volume.mean()
pineal_nind_mean = df[df['dz_type3'] == "NIND"].pineal_volume.mean()
print("\tMS:   {:0.2f}".format(pineal_ms_mean))
print("\tOIND: {:0.2f}".format(pineal_oind_mean))
print("\tNIND: {:0.2f}".format(pineal_nind_mean))


print("\nPituitary volumes")
pituitary_ms_mean = df[df['dz_type2'] == "MS"].pituitary_volume.mean()
pituitary_oind_mean = df[df['dz_type3'] == "OIND"].pituitary_volume.mean()
pituitary_nind_mean = df[df['dz_type3'] == "NIND"].pituitary_volume.mean()
print("\tMS:   {:0.2f}".format(pituitary_ms_mean))
print("\tOIND: {:0.2f}".format(pituitary_oind_mean))
print("\tNIND: {:0.2f}".format(pituitary_nind_mean))

Choroid volumes:
	MS:   2033.73
	OIND: 2014.44
	NIND: 1980.16

Pineal volumes
	MS:   239.02
	OIND: 243.42
	NIND: 287.07

Pituitary volumes
	MS:   685.16
	OIND: 704.53
	NIND: 678.36


## Regression Models

Covariates: age, sex, TIV, contrast

### Linear Regression on Volume

Predict volume of structure with the following predictors:

- MS vs Not-MS
- MS vs OIND vs NIND
- RMS vs PMS

#### Choroid Plexus

##### MS vs not-MS

In [162]:
volume = 'choroid_volume'

data = df[['dz_type2', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type2'].isin(['MS', '!MS'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type2']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Male', 'tiv', 'WITH', 'dzdur', 'MS', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.236
Model:                            OLS   Adj. R-squared:                  0.225
Method:                 Least Squares   F-statistic:                     21.85
Date:                Mon, 20 Jan 2025   Prob (F-statistic):           2.06e-22
Time:                        16:20:34   Log-Likelihood:                -3357.1
No. Observations:                 431   AIC:                             6728.
Df Residuals:                     424   BIC:                             6757.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           16.7541      2.834      5.912      0.0

In [158]:
sum(np.isnan(data['dzdur'].astype('float')))

48

##### MS vs NIND vs OIND

In [163]:
volume = 'choroid_volume'

data = df[['dz_type3', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type3'].isin(['MS', 'OIND', 'NIND'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type3']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Female', 'tiv', 'WITH', 'dzdur', 'MS', 'OIND', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.238
Model:                            OLS   Adj. R-squared:                  0.225
Method:                 Least Squares   F-statistic:                     18.80
Date:                Mon, 20 Jan 2025   Prob (F-statistic):           7.67e-22
Time:                        16:21:01   Log-Likelihood:                -3349.1
No. Observations:                 430   AIC:                             6714.
Df Residuals:                     422   BIC:                             6747.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           16.8208      2.839      5.924      0.0

##### PMS v RMS

In [164]:
volume = 'choroid_volume'

data = df[['dz_type', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type'].isin(['RMS', 'PMS'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Female', 'tiv', 'WITH', 'dzdur', 'RMS', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.258
Model:                            OLS   Adj. R-squared:                  0.246
Method:                 Least Squares   F-statistic:                     23.02
Date:                Mon, 20 Jan 2025   Prob (F-statistic):           2.44e-23
Time:                        16:21:27   Log-Likelihood:                -3149.9
No. Observations:                 405   AIC:                             6314.
Df Residuals:                     398   BIC:                             6342.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           12.7335      3.058      4.164      0.0

#### Pineal

##### MS vs not-MS

Note: this is significant for MS vs Not-MS, but not when adding disease duration as a covariate

In [174]:
volume = 'pineal_volume'

data = df[['dz_type2', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type2'].isin(['MS', '!MS'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type2']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Male', 'tiv', 'WITH', 'MS', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.017
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     1.677
Date:                Mon, 20 Jan 2025   Prob (F-statistic):              0.139
Time:                        16:25:14   Log-Likelihood:                -2864.7
No. Observations:                 479   AIC:                             5741.
Df Residuals:                     473   BIC:                             5766.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           -0.3214      0.358     -0.897      0.3

##### MS vs NIND vs OIND

Note: this is significant for MS vs Not-MS, but not when adding disease duration as a covariate

In [170]:
volume = 'pineal_volume'

data = df[['dz_type3', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type3'].isin(['MS', 'OIND', 'NIND'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type3']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Female', 'tiv', 'WITH', 'MS', 'OIND', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.027
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     2.154
Date:                Mon, 20 Jan 2025   Prob (F-statistic):             0.0463
Time:                        16:24:13   Log-Likelihood:                -2851.2
No. Observations:                 477   AIC:                             5716.
Df Residuals:                     470   BIC:                             5746.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           -0.3337      0.360     -0.927      0.3

##### PMS v RMS

In [172]:
volume = 'pineal_volume'

data = df[['dz_type', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type'].isin(['RMS', 'PMS'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Female', 'tiv', 'WITH', 'RMS', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     1.605
Date:                Mon, 20 Jan 2025   Prob (F-statistic):              0.158
Time:                        16:24:41   Log-Likelihood:                -2360.5
No. Observations:                 405   AIC:                             4733.
Df Residuals:                     399   BIC:                             4757.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           -0.4370      0.362     -1.207      0.2

#### Pituitary

##### MS vs not-MS

In [178]:
volume = 'pituitary_volume'

data = df[['dz_type2', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type2'].isin(['MS', '!MS'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type2']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Male', 'tiv', 'WITH', 'MS', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.074
Model:                            OLS   Adj. R-squared:                  0.065
Method:                 Least Squares   F-statistic:                     7.593
Date:                Mon, 20 Jan 2025   Prob (F-statistic):           7.10e-07
Time:                        16:26:21   Log-Likelihood:                -2995.1
No. Observations:                 479   AIC:                             6002.
Df Residuals:                     473   BIC:                             6027.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           -1.4413      0.470     -3.064      0.0

##### MS vs NIND vs OIND

In [179]:
volume = 'pituitary_volume'

data = df[['dz_type3', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type3'].isin(['MS', 'OIND', 'NIND'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type3']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Female', 'tiv', 'WITH', 'MS', 'OIND', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.078
Model:                            OLS   Adj. R-squared:                  0.066
Method:                 Least Squares   F-statistic:                     6.599
Date:                Mon, 20 Jan 2025   Prob (F-statistic):           1.04e-06
Time:                        16:26:38   Log-Likelihood:                -2982.6
No. Observations:                 477   AIC:                             5979.
Df Residuals:                     470   BIC:                             6008.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           -1.4815      0.474     -3.126      0.0

##### PMS v RMS

In [181]:
volume = 'pituitary_volume'

data = df[['dz_type', 'age', 'sex', 'tiv', 'flair_contrast', 'dzdur', volume]]
data = data.rename(columns={volume: 'volume'})
data = data[data['dz_type'].isin(['RMS', 'PMS'])]
data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Female', 'tiv', 'WITH', 'RMS', 'intercept']

res = sm.OLS(data['volume'].astype('float'), 
             data[predictors].astype('float'), missing='drop').fit()

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 volume   R-squared:                       0.074
Model:                            OLS   Adj. R-squared:                  0.062
Method:                 Least Squares   F-statistic:                     6.340
Date:                Mon, 20 Jan 2025   Prob (F-statistic):           1.11e-05
Time:                        16:27:11   Log-Likelihood:                -2524.6
No. Observations:                 405   AIC:                             5061.
Df Residuals:                     399   BIC:                             5085.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
age           -1.2256      0.543     -2.259      0.0

### Ordinal Regression on EDSS

Predict EDSS score using volumes. Use MS vs OIND vs NIND as covariate (in addition to the others)

In [151]:
data = df[['dz_type3', 'age', 'sex', 'tiv', 'flair_contrast', 'numEDSS', 'choroid_volume', 'pineal_volume', 'pituitary_volume']]
data = data[data['dz_type3'].isin(['MS', 'OIND', 'NIND'])]
data.loc[:, ['dz_type3_factor']], _ = data['dz_type3'].factorize(sort=False)

data = data[data['flair_contrast'].isin(['WITH', 'WITHOUT'])]
data['numEDSS'] = pd.Categorical(data['numEDSS'], ordered=True)

data = pd.concat((
    data,
    pd.get_dummies(data['dz_type3']),
    pd.get_dummies(data['flair_contrast']),
    pd.get_dummies(data['sex'])), axis=1)
data['intercept'] = 1

predictors = ['age', 'Female', 'tiv', 'WITH', 'MS', 'OIND', 'choroid_volume', 'intercept']

data = data.dropna()

# mod = OrderedModel(data2['numEDSS'], 
#                    data2[predictors].astype('float'), distr='probit', disp=False)
# res = mod.fit(method='nm')

res = sm.OLS(data['numEDSS'].astype('float'), 
             data[predictors].astype('float')).fit(missing='drop')

print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                numEDSS   R-squared:                       0.105
Model:                            OLS   Adj. R-squared:                  0.092
Method:                 Least Squares   F-statistic:                     7.880
Date:                Mon, 20 Jan 2025   Prob (F-statistic):           4.81e-09
Time:                        16:15:20   Log-Likelihood:                -1607.1
No. Observations:                 477   AIC:                             3230.
Df Residuals:                     469   BIC:                             3264.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
age                0.0663      0.029      2.