In [1]:
import pprint
from warnings import simplefilter

import pandas as pd
from IPython.display import Markdown, display
from statsmodels.stats.multitest import multipletests

simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
import json
import re
import textwrap
from collections import defaultdict
from datetime import datetime
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pyperclip
import statsmodels.api as sm
from IPython.display import clear_output
from matplotlib import colormaps
from scipy import stats
from statsmodels.genmod.families import Poisson
from pyprocessmacro import Process


from reload_recursive import reload_recursive
from statsmodels.stats.mediation import Mediation
from statsmodels.stats.outliers_influence import variance_inflation_factor
from tqdm.notebook import tqdm
import sys

sys.path.insert(0, "/home/srs-9/Projects/ms_mri/analysis/thalamus/helpers")
from mri_data import file_manager as fm
import helpers


import regression_utils

In [22]:
reload_recursive(regression_utils)
reload_recursive(helpers)
import helpers
from helpers import load_df, zscore, get_colors
from regression_utils import quick_regression, quick_regression2, residualize_vars, run_regressions

#### Clinical and Volumes

### Load Data

In [3]:
drive_root = fm.get_drive_root()
dataroot = drive_root / "3Tpioneer_bids"
data_dir = Path("/home/srs-9/Projects/ms_mri/data")
fig_path = Path(
    "/home/srs-9/Projects/ms_mri/analysis/thalamus/figures_tables/choroid_associations"
)

df = load_df()
df_thomas = helpers.load_hipsthomas(data_dir)

data = df.join(df_thomas)

# these corrections should ultimately be made to the csv file
for struct in ["brain", "white", "grey", "thalamus", "t2lv"]:
    data[struct] = data[struct] * 1000

data = helpers.composite_vars(data)

#! See suggestions from assumption_checks.ipynb
# TODO It would be helpful if the transformed variable name was general so I 
# TODO     wouldnt have to remember which transform was applied to each
transformations = {
    "LV": "log",
    "thirdV": "log",
    "fourthV": "log",
    "asegCSF": "log",
    "t2lv": "log",
    "PRL": "log1p",
    "CCF": "log",
    "CCF0": "log",
    "periCSF_ratio": "log",
    "periCSF_frac": "square"
}
data = helpers.transform_variables(data, transformations)
dataT = helpers.transform_variables(data, transformations, rename=False)


viridis = colormaps["viridis"].resampled(20)
colors = helpers.get_colors()

MS_patients = data["dz_type2"] == "MS"
NONMS_patients = data["dz_type2"] == "!MS"
NIND_patients = data["dz_type5"] == "NIND"
OIND_patients = data["dz_type5"] == "OIND"
RMS_patients = data["dz_type5"] == "RMS"
PMS_patients = data["dz_type5"] == "PMS"


thalamic_nuclei = [2, 4, 5, 6, 7, 8, 9, 10, 11, 12]
deep_grey = [13, 14, 26, 27, 28, 29, 30, 31, 32]

thalamic_nuclei_str = [str(i) for i in thalamic_nuclei]

hips_thomas_ref = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/hipsthomas_struct_index.csv", index_col="index"
)["struct"]
hips_thomas_invref = pd.read_csv(
    "/home/srs-9/Projects/ms_mri/data/hipsthomas_struct_index.csv", index_col="struct"
)["index"]


model_data = data[MS_patients]
model_data_z = zscore(data[MS_patients], skip_vars=["PRL"])

## Analysis

### Cohort Statistics

Number of patients in each group, average demographic characteristics, etc

### Association of MRI features to disease status

In [23]:
model_data = dataT.copy()[(MS_patients) | (NONMS_patients)]
model_data_z = zscore(model_data)
model_data_z = model_data_z.join([pd.get_dummies(model_data['dz_type2'], dtype=int)])

outcome = "MS" # LV / allCSF
covariates = ["age", "Female", "tiv"]

predictors = ["CP", "LV", "t2lv", "THALAMUS_1", "brain", "white", "grey"]
    

results, _ = run_regressions(model_data_z, outcome, predictors, covariates, regression_model=sm.Logit)
results = results[outcome]
display(Markdown(f"`{outcome} ~ predictor + {" + ".join(covariates)}`"))
display_order = results["coef"].apply(np.abs).sort_values(ascending=False).index
# display_order = results.index
display_cols = results.columns[~results.columns.isin(["ci"])]
display(results.loc[display_order, display_cols])

Optimization terminated successfully.
         Current function value: 0.433903
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.433952
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.403504
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.405587
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.437331
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.436844
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.438151
         Iterations 6


`MS ~ predictor + age + Female + tiv`

Unnamed: 0_level_0,coef,pval,p_fdr,se,llci,ulci,R2,formula,coef_sig
predictor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
THALAMUS_1,-1.101103,1.128537e-07,3.949878e-07,0.207571,-1.507935,-0.694272,,MS ~ THALAMUS_1 + age + Female + tiv,-1.101103
t2lv,0.932914,2.124369e-08,1.487058e-07,0.166545,0.60649,1.259337,,MS ~ t2lv + age + Female + tiv,0.932914
LV,0.330156,0.01700351,0.03967485,0.138336,0.059023,0.601289,,MS ~ LV + age + Female + tiv,0.330156
CP,0.256962,0.04990488,0.08733354,0.131051,0.000107,0.513817,,MS ~ CP + age + Female + tiv,0.0
white,-0.17767,0.1453146,0.2034404,0.122002,-0.416791,0.06145,,MS ~ white + age + Female + tiv,0.0
brain,-0.164708,0.2087747,0.2435705,0.131039,-0.42154,0.092123,,MS ~ brain + age + Female + tiv,0.0
grey,-0.113817,0.4143598,0.4143598,0.139439,-0.387112,0.159479,,MS ~ grey + age + Female + tiv,0.0


In [17]:
model_data = dataT.copy()[(MS_patients)]
model_data = model_data.join([pd.get_dummies(model_data['dz_type5'], dtype=int)])
model_data = zscore(model_data)

outcome = "PMS" # LV / allCSF
covariates = ["age", "Female", "tiv"]

predictors = ["CP", "LV", "t2lv", "PRL", "THALAMUS_1", "brain", "white", "grey"]
    

results, _ = run_regressions(model_data, outcome, predictors, covariates)
results = results[outcome]
display(Markdown(f"`{outcome} ~ predictor + {" + ".join(covariates)}`"))
display_order = results["coef"].apply(np.abs).sort_values(ascending=False).index
# display_order = results.index
display_cols = results.columns[~results.columns.isin(["ci"])]
display(results.loc[display_order, display_cols])

`PMS ~ predictor + age + Female + tiv`

Unnamed: 0_level_0,coef,pval,p_fdr,se,llci,ulci,R2,formula,coef_sig
predictor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
THALAMUS_1,-0.233275,9e-06,7.5e-05,0.052042,-0.335543,-0.131007,0.255258,PMS ~ THALAMUS_1 + age + Female + tiv,-0.233275
LV,0.15984,0.003165,0.012658,0.053875,0.053969,0.26571,0.231935,PMS ~ LV + age + Female + tiv,0.15984
t2lv,0.121674,0.006877,0.018338,0.044817,0.033605,0.209744,0.229975,PMS ~ t2lv + age + Female + tiv,0.121674
grey,-0.091389,0.082652,0.132243,0.052544,-0.194644,0.011866,0.222287,PMS ~ grey + age + Female + tiv,0.0
CP,0.078287,0.079916,0.132243,0.044607,-0.009371,0.165944,0.221427,PMS ~ CP + age + Female + tiv,0.0
brain,-0.057036,0.243909,0.325212,0.048884,-0.153099,0.039026,0.219029,PMS ~ brain + age + Female + tiv,0.0
PRL,0.027926,0.492878,0.563289,0.040691,-0.052037,0.107889,0.217276,PMS ~ PRL + age + Female + tiv,0.0
white,-0.004195,0.925067,0.925067,0.044575,-0.091788,0.083399,0.216564,PMS ~ white + age + Female + tiv,0.0
