In [1]:
import pandas as pd
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
from pathlib import Path
import json
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
from datetime import datetime
import re
from scipy import stats
import numpy as np
import statsmodels.api as sm
import statsmodels
from matplotlib import colormaps
from tqdm.notebook import tqdm
import helpers
from collections import defaultdict
from tqdm.notebook import tqdm


from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.regression.linear_model import OLS

from mri_data import file_manager as fm

## Load Data

In [2]:
drive_root = fm.get_drive_root()
dataroot = drive_root / "3Tpioneer_bids"
data_dir = Path("/home/srs-9/Projects/ms_mri/data")
fig_path = Path("/home/srs-9/Projects/ms_mri/analysis/thalamus/figures_tables/edss_regressions")

choroid_volumes = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/choroid_aschoplex_volumes.csv", index_col="subid"
).dropna()
tiv = pd.read_csv("/home/srs-9/Projects/ms_mri/data/tiv_data.csv", index_col="subid")

df = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/clinical_data_processed.csv", index_col="subid"
)
sdmt = pd.read_csv("/home/srs-9/Projects/ms_mri/analysis/thalamus/SDMT_sheet.csv", index_col="subid")
df = df.join([choroid_volumes, tiv, sdmt['SDMT']])
df['SDMT'] = pd.to_numeric(df['SDMT'], errors='coerce')
df['thalamus_sqrt'] = np.sqrt(df['thalamus'])
df['thalamus_curt'] = np.sqrt(df['thalamus']**3)
df_z = df.copy()
numeric_cols = df.select_dtypes(include='number').columns
df_z[numeric_cols] = df_z[numeric_cols].apply(stats.zscore, nan_policy="omit")

df_ms = df[df['dz_type2'] == "MS"]
df_ms_z = df_ms.copy()
df_ms_z[numeric_cols] = df_ms_z[numeric_cols].apply(stats.zscore, nan_policy="omit")

# assert df.loc[1340, 'EDSS'] == 2.5


viridis = colormaps['viridis'].resampled(20)

colors = helpers.get_colors()

In [97]:
df_thomas = pd.read_csv(data_dir / "hipsthomas_vols.csv", index_col="subid")
cols_orig = df_thomas.columns
new_colnames = {}
for col in df_thomas.columns:
    new_col = re.sub(r"(\d+)-([\w-]+)", r"\2_\1", col)
    new_col = re.sub("-", "_", new_col)
    new_colnames[col] = new_col
df_thomas = df_thomas.rename(columns=new_colnames)
df_thomas_norm = df_thomas.apply(lambda col: col / df_thomas['THALAMUS_1'])
df_thomas_z = df_thomas.apply(stats.zscore, nan_policy="omit")
df_thomas_norm_z = df_thomas_norm.apply(stats.zscore, nan_policy="omit")

df_thomas_left = pd.read_csv(data_dir / "hipsthomas_left_vols.csv", index_col="subid")
df_thomas_left = df_thomas_left.rename(columns=new_colnames)
df_thomas_left_z = df_thomas_left.apply(stats.zscore, nan_policy="omit")

df_thomas_right = pd.read_csv(data_dir / "hipsthomas_right_vols.csv", index_col="subid")
df_thomas_right = df_thomas_right.rename(columns=new_colnames)
df_thomas_right_z = df_thomas_right.apply(stats.zscore, nan_policy="omit")


thalamic_nuclei = [2, 4, 5, 6, 7, 8, 9, 10, 11, 12]
thalamic_nuclei_str = [str(i) for i in thalamic_nuclei]

hips_thomas_ref = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/hipsthomas_struct_index.csv", index_col="index"
)['struct']
# hips_thomas_ref.rename(columns={"struct": "struct_name"}, inplace=True)

choroid_dists = pd.read_csv(data_dir / "centroid-choroid_SDT.csv", index_col="subid")
ventricle_dists = pd.read_csv(
    data_dir / "centroid-ventricle_SDT.csv", index_col="subid"
)

mni_choroid_dists = pd.read_csv("/home/srs-9/Projects/ms_mri/data/mni-centroid-choroid_SDT2.csv")

def combine_nuclei(df):
    df2 = pd.DataFrame()
    df2['anterior'] = df['AV_2']
    df2['ventral'] = df['VA_4'] + df['VLa_5'] + df['VLP_6'] + df['VPL_7']
    df2['intralaminar'] = df['CM_11'] 
    df2['medial'] = df['MD_Pf_12']
    df2['posterior'] = df['Pul_8'] + df['LGN_9'] + df['MGN_10']
    df2['THALAMUS_1'] = df['THALAMUS_1']
    return df2

df_thomas2 = combine_nuclei(df_thomas)
df_thomas2_z = df_thomas2.apply(stats.zscore, nan_policy="omit")

df_thomas2_left = combine_nuclei(df_thomas_left)
df_thomas2_left_z = df_thomas2_left.apply(stats.zscore, nan_policy="omit")

df_thomas2_right = combine_nuclei(df_thomas_right)
df_thomas2_right_z = df_thomas2_right.apply(stats.zscore, nan_policy="omit")

## Functions

In [67]:
def compute_se_diff(se1, n1, se2, n2):
    return np.sqrt((se1**2/n1) + (se2**2/n2))

def plot_regression(
    data, predictor, outcome, covariates, xlabel=None, ylabel=None, title=None,
    color="blue1"
):
    plus_covariates = ""
    if len(covariates) > 0:
        plus_covariates = f"+ {' + '.join(covariates)}"
    if xlabel is None:
        xlabel = predictor
    if ylabel is None:
        ylabel = outcome
    if title is None:
        title = f"{outcome} vs {predictor}"
    
    light_color = colors[f"light {color}"]
    dark_color = colors[f"dark {color}"]

    formula = f"{outcome} ~ {predictor} {plus_covariates}"
    res = sm.OLS.from_formula(formula, data=data).fit()
    x, y_pred, y_lims = helpers.get_regression_y(data, res, predictor, outcome)

    fig, axs = plt.subplot_mosaic(
        [['histx', '.'], ['scatter', 'histy']],
        figsize=(8, 6),
        width_ratios=(4, 1),
        height_ratios=(1, 4),
        layout='constrained',
    )

    helpers.scatter_hist(
        data[predictor],
        data[outcome],
        axs['scatter'],
        axs['histx'],
        axs['histy'],
        light_color=light_color,
        dark_color=dark_color,
    )

    axs['scatter'].plot(x, y_pred, color="black")
    axs['scatter'].fill_between(
        x, y_lims[0], y_lims[1], alpha=0.4, color=light_color
    )
    axs['scatter'].set_ylabel(ylabel)
    axs['scatter'].set_xlabel(xlabel)
    fig.suptitle(title)
    return fig, axs

## Main MRI Features

### Regressions

#### MS Patients

In [60]:
model_data = df_ms_z.copy()
model_data = model_data.join([df_thomas_z])
covariates = "age + Female + tiv"

outcome = "EDSS_sqrt"
predictors = ["brain", "white", "grey", "cortical_thickness", "THALAMUS_1", "t2lv", "PRL", "choroid_volume"]

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}
r2 = {}

for x in predictors:
    formula = f"{outcome} ~ {x} + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    pvals[x] = res.pvalues[x]
    coefs[x] = res.params[x]
    stderrs[x] = res.HC0_se[x]
    ci = res.conf_int()
    llci[x] = ci.loc[x, 0]
    ulci[x] = ci.loc[x, 1]
    ci_str[x] = f"[{llci[x]:.6f}, {ulci[x]:.6f}]"
    r2[x] = res.rsquared_adj


regression_results = pd.DataFrame({"coef": coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)
regression_results['R2'] = r2
regression_results.index.name = "structure"
regression_results.to_excel(fig_path / "EDSS_and_main_mri_features_MS.xlsx")
regression_results

Unnamed: 0_level_0,coef,stderr,llci,ulci,pvals,ci,p_fdr,R2
structure,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
brain,-0.081767,0.047614,-0.17598,0.012446,0.08876744,"[-0.175980, 0.012446]",0.1183566,0.217506
white,-0.033401,0.045745,-0.121893,0.055091,0.4586032,"[-0.121893, 0.055091]",0.4586032,0.213391
grey,-0.104938,0.050713,-0.203374,-0.006501,0.03672554,"[-0.203374, -0.006501]",0.05876086,0.220066
cortical_thickness,-0.220291,0.044493,-0.312,-0.128582,3.147701e-06,"[-0.312000, -0.128582]",1.25908e-05,0.249745
THALAMUS_1,-0.292028,0.045303,-0.384468,-0.199588,1.220738e-09,"[-0.384468, -0.199588]",9.765905e-09,0.274819
t2lv,0.111863,0.047572,0.028841,0.194885,0.00838322,"[0.028841, 0.194885]",0.02235525,0.224561
PRL,0.106407,0.039786,0.02233,0.190484,0.01323684,"[0.022330, 0.190484]",0.02647369,0.223152
choroid_volume,0.071059,0.043683,-0.020848,0.162966,0.129349,"[-0.020848, 0.162966]",0.1478274,0.216463


In [75]:
formula = "EDSS_sqrt ~ age + Female + tiv + THALAMUS_1"
res = sm.OLS.from_formula(formula, data=model_data).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              EDSS_sqrt   R-squared:                       0.276
Model:                            OLS   Adj. R-squared:                  0.269
Method:                 Least Squares   F-statistic:                     41.56
Date:                Wed, 11 Jun 2025   Prob (F-statistic):           1.59e-29
Time:                        14:50:51   Log-Likelihood:                -554.25
No. Observations:                 441   AIC:                             1118.
Df Residuals:                     436   BIC:                             1139.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0441      0.041     -1.073      0.2

#### NIND Group

In [59]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type3'].isin(["NIND"])]
covariates = "age + Female + tiv"

outcome = "EDSS_sqrt"
predictors = ["brain", "white", "grey", "cortical_thickness", "THALAMUS_1", "t2lv", "PRL", "choroid_volume"]

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}
r2 = {}

for x in predictors:
    formula = f"{outcome} ~ {x} + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    pvals[x] = res.pvalues[x]
    coefs[x] = res.params[x]
    stderrs[x] = res.HC0_se[x]
    ci = res.conf_int()
    llci[x] = ci.loc[x, 0]
    ulci[x] = ci.loc[x, 1]
    ci_str[x] = f"[{llci[x]:.6f}, {ulci[x]:.6f}]"
    r2[x] = res.rsquared_adj


regression_results = pd.DataFrame({"coef": coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)
regression_results['R2'] = r2

regression_results.index.name = "structure"

regression_results.to_excel(fig_path / "EDSS_and_main_mri_features_NIND.xlsx")
regression_results

Unnamed: 0_level_0,coef,stderr,llci,ulci,pvals,ci,p_fdr,R2
structure,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
brain,-0.179508,0.257239,-0.781819,0.422803,0.539081,"[-0.781819, 0.422803]",0.887287,-0.03254
white,0.088013,0.235531,-0.452799,0.628825,0.736379,"[-0.452799, 0.628825]",0.949229,-0.048222
grey,-0.381446,0.260129,-0.989191,0.226299,0.203827,"[-0.989191, 0.226299]",0.543538,0.037907
cortical_thickness,-0.622947,0.16734,-1.216941,-0.028953,0.040837,"[-1.216941, -0.028953]",0.326694,0.169074
thalamus,0.033765,0.462934,-1.064874,1.132404,0.949229,"[-1.064874, 1.132404]",0.949229,-0.054785
t2lv,0.034695,0.158398,-0.406233,0.475623,0.870541,"[-0.406233, 0.475623]",0.949229,-0.05343
PRL,2.598156,0.566916,-0.892707,6.089019,0.135308,"[-0.892707, 6.089019]",0.541231,0.071142
choroid_volume,0.146956,0.212451,-0.367336,0.661249,0.554554,"[-0.367336, 0.661249]",0.887287,-0.073471


In [84]:
formula = "EDSS_sqrt ~ age + Female + tiv + t2lv_logtrans + cortical_thickness"
res = sm.OLS.from_formula(formula, data=model_data).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              EDSS_sqrt   R-squared:                       0.256
Model:                            OLS   Adj. R-squared:                  0.247
Method:                 Least Squares   F-statistic:                     29.93
Date:                Wed, 11 Jun 2025   Prob (F-statistic):           3.79e-26
Time:                        15:47:01   Log-Likelihood:                -560.28
No. Observations:                 441   AIC:                             1133.
Df Residuals:                     435   BIC:                             1157.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept             -0.0173      0

### Compare Thalamus Associations for PMS vs RMS

Thalamus does not predict EDSS any better in PMS patients compared to RMS patients.

There are other methods to try in the GPT convo (not because I think this result will change, but to practice them here so I can apply them elsewhere if desired)

- Chow / SUest
- Compare partial correlations with Fisher's z

In [26]:
formula = "EDSS_sqrt ~ THALAMUS_1*PMS + age*PMS + Female + tiv"

model_data = df_z.join([df_thomas_z])
model_data = model_data[model_data.dz_type5.isin(["PMS", "RMS"])]
model_data = pd.concat([model_data, pd.get_dummies(model_data.dz_type5, dtype="int")], axis=1)
model_data.loc[model_data['EDSS_sqrt'].isna(), 'EDSS_sqrt'] = model_data['EDSS_sqrt'].median()

res = sm.OLS.from_formula(formula, data=model_data).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              EDSS_sqrt   R-squared:                       0.394
Model:                            OLS   Adj. R-squared:                  0.385
Method:                 Least Squares   F-statistic:                     42.80
Date:                Mon, 09 Jun 2025   Prob (F-statistic):           1.95e-46
Time:                        13:40:01   Log-Likelihood:                -544.05
No. Observations:                 468   AIC:                             1104.
Df Residuals:                     460   BIC:                             1137.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         -0.2137      0.042     -5.

Something is wrong here. Not significant if I compute standard deviation from the stderrs and plug into a t test calculator.

Using a t-test here isn't the best course anyways (according to GPT, check convo "Comparing regression betas"), but still curious what went wrong

In [50]:
formula = "EDSS_sqrt ~ THALAMUS_1 + age + Female + tiv"

model_data = df_z.join([df_thomas_z])

model_data_RMS = model_data.copy()
model_data_RMS = model_data_RMS[model_data_RMS['dz_type5'].isin(["RMS"])]
res_RMS = sm.OLS.from_formula(formula, data=model_data_RMS).fit()


model_data_PMS = model_data.copy()
model_data_PMS = model_data_PMS[model_data_PMS['dz_type5'].isin(["PMS"])]
res_PMS = sm.OLS.from_formula(formula, data=model_data_PMS).fit()

param1 = res_RMS.params['THALAMUS_1']
param2 = res_PMS.params['THALAMUS_1']

ci1 = res_RMS.conf_int()
ci2 = res_PMS.conf_int()

se1 = res_RMS.HC0_se['THALAMUS_1']
dof1 = res_RMS.df_resid
se2 = res_PMS.HC0_se['THALAMUS_1']
dof2 = res_PMS.df_resid

se_diff = compute_se_diff(se1, dof1, se2, dof2)
mean_diff = param1 - param2
t_stat = mean_diff / se_diff

dof = dof1 + dof2 - 2
p_val = stats.t.sf(t_stat, dof)

print("RMS:")
print(f"{param1:.2} ± {se1:.2} [{ci1.loc['THALAMUS_1', 0]:.2}, {ci1.loc['THALAMUS_1', 1]:.2}]")
print("\n")
print("PMS:")
print(f"{param2:.2} ± {se2:.2} [{ci2.loc['THALAMUS_1', 0]:.2}, {ci2.loc['THALAMUS_1', 1]:.2}]")
print("\n")

print(f"p = {p_val:.2}")

RMS:
-0.19 ± 0.058 [-0.3, -0.089]


PMS:
-0.22 ± 0.072 [-0.38, -0.052]


p = 0.0027


## HIPS-THOMAS

Check these out:

- https://pmc.ncbi.nlm.nih.gov/articles/PMC11081814/
- https://pmc.ncbi.nlm.nih.gov/articles/PMC11087027/

### L/R for thalamus diagram

In [136]:
model_data = df_z.join(df_thomas_left_z)
model_data['whole_thalamus'] = df_thomas_z['THALAMUS_1']
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
covariates = "age + Female + tiv + whole_thalamus"

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}
r2 = {}

outcome = "SDMT"

structs = thalamic_nuclei
all_results = {}
for x in structs:
    col = hips_thomas_ref[x]
    formula = f"{outcome} ~ {col}*dzdur + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    all_results[col] = res
    pvals[col] = res.pvalues[col]
    coefs[col] = res.params[col]
    stderrs[col] = res.HC0_se[col]
    ci = res.conf_int()
    llci[col] = ci.loc[col, 0]
    ulci[col] = ci.loc[col, 1]
    ci_str[col] = f"[{llci[col]:.6f}, {ulci[col]:.6f}]"
    r2[col] = res.rsquared_adj

regression_results = pd.DataFrame({"coef": coefs, "pvals": pvals})
regression_results['p_fdr'] = stats.false_discovery_control(regression_results['pvals'], method='bh')
bins = np.linspace(regression_results['coef'].min(), regression_results.coef.max(), 10)
regression_results['coef_bin'] = np.digitize(regression_results['coef'], bins)
regression_results.sort_values(by="coef_bin")

Unnamed: 0,coef,pvals,p_fdr,coef_bin
VA_4,-0.101688,0.102935,0.343052,1
MGN_10,-0.013086,0.838949,0.992107,3
VLa_5,-0.000577,0.992107,0.992107,4
VLP_6,0.013163,0.86765,0.992107,4
VPL_7,0.001127,0.986813,0.992107,4
LGN_9,0.059205,0.28365,0.472751,6
CM_11,0.11024,0.058184,0.290922,8
MD_Pf_12,0.105914,0.231104,0.462208,8
Pul_8,0.142073,0.137221,0.343052,9
AV_2,0.164519,0.005956,0.059559,10


In [135]:
model_data = df_z.join(df_thomas_right_z)
model_data['whole_thalamus'] = df_thomas_z['THALAMUS_1']

model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
covariates = "age + Female + tiv + whole_thalamus"

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}
r2 = {}

outcome = "SDMT"

structs = thalamic_nuclei
all_results = {}
for x in structs:
    col = hips_thomas_ref[x]
    formula = f"{outcome} ~ {col}*dzdur + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    all_results[col] = res
    pvals[col] = res.pvalues[col]
    coefs[col] = res.params[col]
    stderrs[col] = res.HC0_se[col]
    ci = res.conf_int()
    llci[col] = ci.loc[col, 0]
    ulci[col] = ci.loc[col, 1]
    ci_str[col] = f"[{llci[col]:.6f}, {ulci[col]:.6f}]"
    r2[col] = res.rsquared_adj

regression_results = pd.DataFrame({"coef": coefs, "pvals": pvals})
regression_results['p_fdr'] = stats.false_discovery_control(regression_results['pvals'], method='bh')
bins = np.linspace(regression_results['coef'].min(), regression_results.coef.max(), 10)
regression_results['coef_bin'] = np.digitize(regression_results['coef'], bins)
regression_results.sort_values(by="coef_bin")

Unnamed: 0,coef,pvals,p_fdr,coef_bin
MD_Pf_12,-0.136758,0.115708,0.578541,1
MGN_10,-0.073673,0.255085,0.850284,3
Pul_8,-0.055095,0.596611,0.986718,4
AV_2,-0.003957,0.946986,0.986718,6
VA_4,0.017358,0.788785,0.986718,6
VLa_5,0.02058,0.72347,0.986718,6
VPL_7,-0.003041,0.963638,0.986718,6
CM_11,0.000989,0.986718,0.986718,6
VLP_6,0.048392,0.539701,0.986718,8
LGN_9,0.100644,0.088978,0.578541,10


### Bilateral Thalamus

In [132]:
# data_to_sample = df_z.join(df_thomas_z).reset_index()
# inds = np.random.randint(0, len(data_to_sample), size=(300,))

# rng = np.random.default_rng()
# inds = rng.choice(len(data_to_sample), 200)
# model_data = data_to_sample.loc[inds, :].set_index("subid")

model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
covariates = "age + Female + tiv"

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}
r2 = {}

outcome = "EDSS_sqrt"

structs = thalamic_nuclei + [1]
all_results = {}
for x in structs:
    col = hips_thomas_ref[x]
    formula = f"{outcome} ~ {col}*dzdur + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    all_results[col] = res
    pvals[col] = res.pvalues[col]
    coefs[col] = res.params[col]
    stderrs[col] = res.HC0_se[col]
    ci = res.conf_int()
    llci[col] = ci.loc[col, 0]
    ulci[col] = ci.loc[col, 1]
    ci_str[col] = f"[{llci[col]:.6f}, {ulci[col]:.6f}]"
    r2[col] = res.rsquared_adj



regression_results = pd.DataFrame({"coef": coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)
regression_results['R2'] = r2
regression_results.sort_values(by="coef", inplace=True)
regression_results.to_excel(fig_path / "thalamic_nuclei.xlsx")
regression_results

Unnamed: 0,coef,stderr,llci,ulci,pvals,ci,p_fdr,R2
AV_2,-0.24615,0.050051,-0.340305,-0.151995,4.16212e-07,"[-0.340305, -0.151995]",4e-06,0.287471
THALAMUS_1,-0.238654,0.045351,-0.334926,-0.142382,1.538312e-06,"[-0.334926, -0.142382]",4e-06,0.297414
Pul_8,-0.224721,0.042781,-0.314127,-0.135316,1.107948e-06,"[-0.314127, -0.135316]",4e-06,0.298971
VLP_6,-0.223242,0.049036,-0.322426,-0.124059,1.221864e-05,"[-0.322426, -0.124059]",2.2e-05,0.284468
VPL_7,-0.219521,0.043713,-0.312219,-0.126823,4.297471e-06,"[-0.312219, -0.126823]",9e-06,0.293372
LGN_9,-0.212283,0.039497,-0.296814,-0.127752,1.131578e-06,"[-0.296814, -0.127752]",4e-06,0.296883
MGN_10,-0.193351,0.043738,-0.28882,-0.097881,8.035349e-05,"[-0.288820, -0.097881]",0.000113,0.27768
MD_Pf_12,-0.18484,0.043352,-0.276241,-0.09344,8.225939e-05,"[-0.276241, -0.093440]",0.000113,0.280289
VLa_5,-0.16746,0.049402,-0.271315,-0.063604,0.001635378,"[-0.271315, -0.063604]",0.001999,0.264186
CM_11,-0.132125,0.048073,-0.227993,-0.036256,0.007017786,"[-0.227993, -0.036256]",0.00772,0.262361


Correlate each of the thalamic nuclei to EDSS, including whole thalalmic volume as a covariate. None of the p values are significant, but we can still compare coefficients to see each nucleus's relative contribution to EDSS. Will bootstrap these regressions in the following cell to see if the rank is significant.  

In [None]:
model_data = df_z.join(df_thomas_z)
model_data['whole_thalamus'] = df_thomas_z['THALAMUS_1']
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
covariates = "age + Female + tiv + whole_thalamus"

pvals = {}
coefs = {}
thal_coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}

outcome = "EDSS_sqrt"

structs = thalamic_nuclei
all_results = {}
for x in structs:
    col = hips_thomas_ref[x]
    formula = f"{outcome} ~ {col}*dzdur + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    pvals[col] = res.pvalues[col]
    coefs[col] = res.params[col]
    thal_coefs[col] = res.params['whole_thalamus']
    stderrs[col] = res.HC0_se[col]
    ci = res.conf_int()
    llci[col] = ci.loc[col, 0]
    ulci[col] = ci.loc[col, 1]
    ci_str[col] = f"[{llci[col]:.6f}, {ulci[col]:.6f}]"



regression_results = pd.DataFrame({"coef": coefs, "thal_coef": thal_coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)
main_coefs = np.array([coefs[hips_thomas_ref[x]] for x in structs])
regression_results.sort_values(by="coef", inplace=True)
# regression_results.to_excel(fig_path / "thalamic_nuclei-thalamus_control.xlsx")
regression_results

Unnamed: 0,coef,thal_coef,stderr,llci,ulci,pvals,ci,p_fdr
Pul_8,-0.231755,0.140193,0.229634,-0.662783,0.199273,0.288247,"[-0.662783, 0.199273]",0.720619
VPL_7,-0.045152,-0.115548,0.138047,-0.348798,0.258493,0.768327,"[-0.348798, 0.258493]",0.919766
AV_2,-0.014706,-0.159832,0.132676,-0.303977,0.274565,0.919766,"[-0.303977, 0.274565]",0.919766
MGN_10,0.030353,-0.166037,0.157458,-0.287444,0.348149,0.849917,"[-0.287444, 0.348149]",0.919766
LGN_9,0.060382,-0.143471,0.110966,-0.210902,0.331666,0.659376,"[-0.210902, 0.331666]",0.919766
MD_Pf_12,0.089955,-0.253138,0.217808,-0.331654,0.511565,0.672628,"[-0.331654, 0.511565]",0.919766
VLP_6,0.107453,-0.250984,0.219252,-0.261772,0.476677,0.564552,"[-0.261772, 0.476677]",0.919766
CM_11,0.246302,-0.296324,0.140038,-0.041402,0.534005,0.092427,"[-0.041402, 0.534005]",0.323064
VA_4,0.271324,-0.332803,0.181552,-0.050022,0.592671,0.096919,"[-0.050022, 0.592671]",0.323064
VLa_5,0.275967,-0.286637,0.123941,-0.020823,0.572758,0.067987,"[-0.020823, 0.572758]",0.323064


Bootstrap the regressions

In [121]:
data_to_sample = df_z.join(df_thomas_right_z)
data_to_sample = data_to_sample[data_to_sample['dz_type5'].isin(["RMS", "PMS"])].reset_index()
data_to_sample.loc[data_to_sample['EDSS_sqrt'].isna(), 'EDSS_sqrt'] = model_data['EDSS_sqrt'].median()

covariates = "age + Female + tiv"

def get_zeros():
    return np.zeros((1000,))

coefs_boot = defaultdict(get_zeros)

outcome = "EDSS_sqrt"

for i in tqdm(range(1000)):
    inds = np.random.randint(0, data_to_sample.index.max(), len(data_to_sample))
    model_data = data_to_sample.loc[inds, :]
    for x in structs:
        col = hips_thomas_ref[x]
        formula = f"{outcome} ~ {col} + {covariates}"
        res = sm.OLS.from_formula(formula, data=model_data).fit()
        coefs_boot[col][i] = res.params[col]

  0%|          | 0/1000 [00:00<?, ?it/s]

Get the mean and 95% CI of the bootstrapped coefficients for each nucleus

In [126]:
coefs_copy = coefs_boot.copy()
llci = {}
ulci = {}
means = {}
for x in structs:
    col = hips_thomas_ref[x]
    coefs_copy[col] = np.sort(coefs_copy[col])
    llci[col] = coefs_copy[col][25]
    ulci[col] = coefs_copy[col][975]
    means[col] = np.mean(coefs_copy[col])
    print(f"{means[col]:.4f} [{llci[col]:2.2f}, {ulci[col]:2.2f}]")

coefs_bootmean = np.array([means[hips_thomas_ref[x]] for x in structs])
np.corrcoef(main_coefs, coefs_bootmean)[0,1]

-0.2111 [-0.31, -0.11]
-0.1685 [-0.26, -0.06]
-0.1914 [-0.29, -0.08]
-0.2630 [-0.35, -0.16]
-0.2197 [-0.31, -0.12]
-0.2362 [-0.33, -0.15]
-0.2459 [-0.33, -0.17]
-0.1943 [-0.29, -0.10]
-0.1371 [-0.23, -0.04]
-0.2011 [-0.28, -0.11]


0.8048577608658982

Correlate the coefficients of the nuclei for each boostrapped sample to the main regression to see if the relative strengths of the nuclei's coefficients are consistent across resamples

In [123]:
coefs_boot_arr = np.zeros((len(structs), 1000))
for i, x in enumerate(structs):
    col = hips_thomas_ref[x]
    coefs_boot_arr[i, :] = coefs_boot[col]

corr_coefs = np.zeros((1000,))
kendal_taus = np.zeros((1000,))
for i in range(1000):
    corr_coefs[i] = np.corrcoef(coefs_boot_arr[:,i], main_coefs)[0,1]
    kendal_taus[i] = stats.kendalltau(coefs_boot_arr[:,i], main_coefs)[0]
mean_corrcoef = np.mean(corr_coefs)
mean_tau = np.mean(kendal_taus)
corr_coefs = np.sort(corr_coefs)
kendal_taus = np.sort(kendal_taus)
print(f"{mean_corrcoef:.2f} [{corr_coefs[25]:.2f}, {corr_coefs[975]:.2f}]")
print(f"{mean_tau:.2f} [{kendal_taus[25]:.2f}, {kendal_taus[975]:.2f}]")

0.50 [0.08, 0.81]
0.32 [-0.02, 0.64]


In [42]:
win_counts = np.zeros((coefs_boot_arr.shape[0],))
for i in range(1000):
    win_counts[coefs_boot_arr[:,i].argmin()] += 1

win_count_df = pd.Series(win_counts/1000, index=hips_thomas_ref[thalamic_nuclei])
win_count_df.sort_values(ascending=False)

struct
LGN_9       0.234
AV_2        0.201
VLP_6       0.183
Pul_8       0.176
VPL_7       0.136
MGN_10      0.029
VLa_5       0.020
MD_Pf_12    0.017
CM_11       0.004
VA_4        0.000
dtype: float64

In [138]:
model_data = df_z.join([df_thomas2_z])
model_data = model_data[model_data.dz_type2 == "MS"]

covariates = "age + Female + tiv"

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}

for col in df_thomas2:
    formula = f"SDMT ~ {col}*dzdur + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    pvals[col] = res.pvalues[col]
    coefs[col] = res.params[col]
    stderrs[col] = res.HC0_se[col]
    ci = res.conf_int()
    llci[col] = ci.loc[col, 0]
    ulci[col] = ci.loc[col, 1]
    ci_str[col] = f"[{llci[col]:.6f}, {ulci[col]:.6f}]"

regression_results = pd.DataFrame({"coef": coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)

regression_results.sort_values(by="coef")

Unnamed: 0,coef,stderr,llci,ulci,pvals,ci,p_fdr
intralaminar,0.286602,0.050193,0.188608,0.384595,1.675085e-08,"[0.188608, 0.384595]",1.675085e-08
medial,0.295248,0.051669,0.201886,0.388609,1.176907e-09,"[0.201886, 0.388609]",2.141751e-09
anterior,0.298352,0.047016,0.200579,0.396125,4.149486e-09,"[0.200579, 0.396125]",4.979384e-09
posterior,0.325328,0.052922,0.234183,0.416473,8.570286e-12,"[0.234183, 0.416473]",2.571086e-11
ventral,0.330423,0.058725,0.22538,0.435465,1.427834e-09,"[0.225380, 0.435465]",2.141751e-09
THALAMUS_1,0.349394,0.057068,0.251523,0.447264,8.503268e-12,"[0.251523, 0.447264]",2.571086e-11


In [117]:
model_data = df_z.join(df_thomas_z)

covariates = "age + Female + tiv"

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}

for x in thalamic_nuclei:
    col = hips_thomas_ref[x]
    res = sm.OLS.from_formula(f"{col} ~ THALAMUS_1 + tiv", data=model_data).fit()
    model_data[f"{col}_resid"] = res.resid

    col2 = f"{col}_resid"
    formula = f"EDSS_sqrt ~ {col2}*dzdur + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    pvals[col] = res.pvalues[col2]
    coefs[col] = res.params[col2]
    stderrs[col] = res.HC0_se[col2]
    ci = res.conf_int()
    llci[col] = ci.loc[col2, 0]
    ulci[col] = ci.loc[col2, 1]
    ci_str[col] = f"[{llci[col]:.6f}, {ulci[col]:.6f}]"

regression_results = pd.DataFrame({"coef": coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)

regression_results.sort_values(by="coef")

Unnamed: 0,coef,stderr,llci,ulci,pvals,ci,p_fdr
LGN_9,-0.149686,0.051829,-0.264497,-0.034874,0.01072,"[-0.264497, -0.034874]",0.107202
VPL_7,-0.109161,0.077652,-0.265438,0.047115,0.170535,"[-0.265438, 0.047115]",0.426338
VLP_6,-0.106625,0.092477,-0.286748,0.073498,0.245336,"[-0.286748, 0.073498]",0.490673
AV_2,-0.088672,0.070059,-0.210556,0.033211,0.153502,"[-0.210556, 0.033211]",0.426338
Pul_8,-0.029199,0.124842,-0.269485,0.211087,0.811376,"[-0.269485, 0.211087]",0.99724
MGN_10,-0.007467,0.073632,-0.15023,0.135296,0.918183,"[-0.150230, 0.135296]",0.99724
VLa_5,0.000216,0.064464,-0.122552,0.122984,0.99724,"[-0.122552, 0.122984]",0.99724
CM_11,0.060011,0.06794,-0.06781,0.187832,0.356714,"[-0.067810, 0.187832]",0.514482
MD_Pf_12,0.100432,0.104229,-0.115016,0.315881,0.360137,"[-0.115016, 0.315881]",0.514482
VA_4,0.124176,0.062814,-0.004679,0.253031,0.058882,"[-0.004679, 0.253031]",0.294409


In [124]:
model_data = df_z.join([df_thomas2_z])
model_data = model_data[model_data.dz_type2 == "MS"]

covariates = "age + Female + tiv + THALAMUS_1"

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}

for col in df_thomas2:
    formula = f"EDSS_sqrt ~ {col}*dzdur + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    pvals[col] = res.pvalues[col]
    coefs[col] = res.params[col]
    stderrs[col] = res.HC0_se[col]
    ci = res.conf_int()
    llci[col] = ci.loc[col, 0]
    ulci[col] = ci.loc[col, 1]
    ci_str[col] = f"[{llci[col]:.6f}, {ulci[col]:.6f}]"

regression_results = pd.DataFrame({"coef": coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)

regression_results.sort_values(by="coef")

Unnamed: 0,coef,stderr,llci,ulci,pvals,ci,p_fdr
THALAMUS_1,-0.238654,0.045351,-0.334926,-0.142382,2e-06,"[-0.334926, -0.142382]",9e-06
anterior,-0.132947,0.069969,-0.257915,-0.007978,0.037114,"[-0.257915, -0.007978]",0.111343
posterior,-0.130666,0.128086,-0.381456,0.120125,0.30641,"[-0.381456, 0.120125]",0.459615
ventral,-0.030548,0.104296,-0.242056,0.18096,0.776657,"[-0.242056, 0.180960]",0.776657
intralaminar,0.052855,0.069251,-0.074872,0.180583,0.416498,"[-0.074872, 0.180583]",0.499797
medial,0.126086,0.102518,-0.087665,0.339838,0.246961,"[-0.087665, 0.339838]",0.459615


In [15]:
model_data = df_z.join([df_thomas2_z])
model_data = model_data[model_data.dz_type2 == "MS"]

covariates = "age + Female + tiv + THALAMUS_1"

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}
ci_str = {}

for col in df_thomas2:
    formula = f"choroid_volume ~ {col} + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    pvals[col] = res.pvalues[col]
    coefs[col] = res.params[col]
    stderrs[col] = res.HC0_se[col]
    ci = res.conf_int()
    llci[col] = ci.loc[col, 0]
    ulci[col] = ci.loc[col, 1]
    ci_str[col] = f"[{llci[col]:.6f}, {ulci[col]:.6f}]"

regression_results = pd.DataFrame({"coef": coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals, "ci": ci_str})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)

regression_results.sort_values(by="coef")

Unnamed: 0,coef,stderr,llci,ulci,pvals,ci,p_fdr
THALAMUS_1,-0.471241,0.047756,-0.557449,-0.385033,3.496056e-24,"[-0.557449, -0.385033]",2.0976330000000003e-23
medial,-0.326169,0.103968,-0.522535,-0.129804,0.0011796,"[-0.522535, -0.129804]",0.002359199
posterior,-0.295484,0.118422,-0.527846,-0.063121,0.01280286,"[-0.527846, -0.063121]",0.01536344
intralaminar,-0.174645,0.065383,-0.292919,-0.056371,0.00388849,"[-0.292919, -0.056371]",0.005832734
anterior,0.055964,0.064762,-0.058611,0.170539,0.3376318,"[-0.058611, 0.170539]",0.3376318
ventral,0.597348,0.098615,0.408056,0.786639,1.243761e-09,"[0.408056, 0.786639]",3.731284e-09


### Deep Grey Structures

In [136]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
covariates = "age + Female + tiv"

pvals = {}
coefs = {}
stderrs = {}
llci = {}
ulci = {}

outcome = "EDSS_sqrt"
structs = hips_thomas_ref.index[~hips_thomas_ref.index.isin(thalamic_nuclei)]

all_results = {}
for x in structs:
    col = hips_thomas_ref[x]
    formula = f"{outcome} ~ {col} + {covariates}"
    res = sm.OLS.from_formula(formula, data=model_data).fit()
    pvals[col] = res.pvalues[col]
    coefs[col] = res.params[col]
    stderrs[col] = res.HC0_se[col]
    ci = res.conf_int()
    llci[col] = ci.loc[col, 0]
    ulci[col] = ci.loc[col, 1]


regression_results = pd.DataFrame({"coef": coefs, "stderr": stderrs, "llci": llci, "ulci": ulci, "pvals": pvals})
regression_results['p_fdr'] = stats.false_discovery_control(
    regression_results['pvals'], method='bh'
)
regression_results.sort_values(by="coef", inplace=True)
regression_results.to_csv(fig_path / "deep_grey_regressions.csv")
regression_results

Unnamed: 0,coef,stderr,llci,ulci,pvals,p_fdr
THALAMUS_1,-0.294878,0.045745,-0.38822,-0.201536,1.220738e-09,1.464886e-08
Cla_28,-0.215097,0.049181,-0.313742,-0.116452,2.235316e-05,0.0001341189
Acc_26,-0.177858,0.048999,-0.280341,-0.075376,0.0007065838,0.002826335
RN_32,-0.148705,0.05409,-0.258899,-0.038512,0.00828267,0.01987841
Hb_13,-0.134733,0.043543,-0.219743,-0.049723,0.001959034,0.005877101
Cau_27,-0.133372,0.054594,-0.234685,-0.032058,0.009991938,0.01998388
Amy_34,-0.111904,0.045568,-0.200122,-0.023686,0.01302889,0.02233524
Put_31,-0.084238,0.059147,-0.192744,0.024269,0.1277856,0.1916784
GP_33,-0.073634,0.050006,-0.177461,0.030194,0.1640826,0.1968991
GPe_29,-0.073625,0.048188,-0.176024,0.028774,0.1583391,0.1968991


In [210]:
vif_data = df_thomas_z[hips_thomas_ref[thalamic_nuclei + [1]]]
vif = pd.DataFrame()
vif['struct'] = vif_data.columns
vif.set_index("struct", inplace=True)
vif['VIF'] = [variance_inflation_factor(vif_data.values, i, )
                          for i in range(len(vif_data.columns))]

In [225]:
model_data = df_z.join(df_thomas2_z)
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
formula = f"EDSS_sqrt ~ age + Female + tiv + {" + ".join(df_thomas2_z.columns[~df_thomas2_z.columns.isin(["THALAMUS_1"])])}"
res = sm.OLS.from_formula(formula, data=model_data).fit()
print(res.summary())


                            OLS Regression Results                            
Dep. Variable:              EDSS_sqrt   R-squared:                       0.289
Model:                            OLS   Adj. R-squared:                  0.277
Method:                 Least Squares   F-statistic:                     22.61
Date:                Thu, 05 Jun 2025   Prob (F-statistic):           5.27e-29
Time:                        20:05:35   Log-Likelihood:                -569.77
No. Observations:                 453   AIC:                             1158.
Df Residuals:                     444   BIC:                             1195.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept       -0.0257      0.041     -0.631   

---

### Elastic Net Trials

In [46]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
import numpy as np

In [50]:
model_data = df_z.join(df_thomas_z)
model_data = model_data[model_data['dz_type5'].isin(["RMS", "PMS"])]
model_data = model_data[~model_data['EDSS_sqrt'].isna()]

structs = hips_thomas_ref[thalamic_nuclei]


X = model_data[structs]
y = model_data['EDSS_sqrt']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Lasso Regression model with a specific alpha (regularization strength)
alpha = 0.1  # Adjust alpha as needed
lasso = Lasso(alpha=alpha)

# Train the model
lasso.fit(X_train, y_train)

# Make predictions on the test set
y_pred = lasso.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Print the coefficients (note that some may be zero)
print("Coefficients:", lasso.coef_)

Mean Squared Error: 1.0688643148091397
Coefficients: [-0.16628181 -0.         -0.         -0.         -0.         -0.02281968
 -0.02927025 -0.15345964 -0.         -0.        ]


In [51]:
check = pd.DataFrame({"coef": lasso.coef_}, index=X.columns)
check['abs'] = check['coef'].abs()
check = check.sort_values(by="abs", ascending=False)
check

Unnamed: 0,coef,abs
AV_2,-0.166282,0.166282
MGN_10,-0.15346,0.15346
LGN_9,-0.02927,0.02927
Pul_8,-0.02282,0.02282
VA_4,-0.0,0.0
VLa_5,-0.0,0.0
VLP_6,-0.0,0.0
VPL_7,-0.0,0.0
CM_11,-0.0,0.0
MD_Pf_12,-0.0,0.0


In [49]:
from sklearn.linear_model import ElasticNetCV


regr = ElasticNetCV(cv=10, random_state=0)
regr.fit(X_train, y_train)
print(regr.alpha_)
print(regr.intercept_)
y_pred = regr.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(mse)

check = pd.DataFrame({"coef": regr.coef_}, index=X.columns)
check['abs'] = check['coef'].abs()
check = check.sort_values(by="abs", ascending=False)
check

0.04268818040991856
-0.005121767360380086
1.042679507226447


Unnamed: 0,coef,abs
AV_2,-0.203274,0.203274
MGN_10,-0.167571,0.167571
LGN_9,-0.058592,0.058592
Pul_8,-0.035116,0.035116
VLa_5,-0.013014,0.013014
VA_4,0.0,0.0
VLP_6,-0.0,0.0
VPL_7,0.0,0.0
CM_11,-0.0,0.0
MD_Pf_12,-0.0,0.0


In [245]:
formula = "EDSS_sqrt ~ age + Female + tiv + AV_2 + MGN_10"
res = sm.OLS.from_formula(formula, data=model_data).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              EDSS_sqrt   R-squared:                       0.275
Model:                            OLS   Adj. R-squared:                  0.267
Method:                 Least Squares   F-statistic:                     33.94
Date:                Thu, 05 Jun 2025   Prob (F-statistic):           2.15e-29
Time:                        20:52:20   Log-Likelihood:                -574.28
No. Observations:                 453   AIC:                             1161.
Df Residuals:                     447   BIC:                             1185.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0215      0.041     -0.524      0.6