In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import numpy as np
import re
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import mannwhitneyu
from scipy import stats
import matplotlib.ticker as ticker
import os
from reload_recursive import reload_recursive
import patsy
from pyprocessmacro import Process
from statsmodels.stats.mediation import Mediation
from statsmodels.miscmodels.ordinal_model import OrderedModel
from pingouin import mediation_analysis

from mri_data import file_manager as fm
import helpers

In [2]:
reload_recursive(helpers)

## Setup

In [3]:
drive_root = fm.get_drive_root()
msmri_home = Path("/home/srs-9/Projects/ms_mri")
msmri_datadir = msmri_home / "data"
curr_dir = Path(os.getcwd())
data_dir = curr_dir / "data0"
showfigs = False
# pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.options.display.precision = 3
colors = helpers.get_colors()

In [4]:
df = pd.read_csv(data_dir / "t1_data_full.csv")

In [5]:
df = pd.read_csv(data_dir / "t1_data_full.csv")
df = df.set_index("subid")

df = helpers.set_dz_type5(df)
df = helpers.set_dz_type3(df)
df = helpers.set_dz_type2(df)
df = helpers.fix_edss(df)
df = helpers.clean_df(df)
df = helpers.set_has_prl(df)

keep_cols = [
    "subject",
    "age",
    "sex",
    "ms_type",
    "dz_type2",
    "dz_type3",
    "dz_type5",
    "dzdur",
    "EDSS",
    "MSSS",
    "gMSSS",
    "ARMSS",
    "DMT_score",
    "DMT_hx_all",
    "TER",
    "DMF",
    "NAT",
    "INF",
    "flair_contrast",
    "thalamus",
    "brain",
    "white",
    "grey",
    "cortical_thickness",
    "lesion_count",
    "lesion_vol_cubic",
    "PRL",
    "HAS_PRL",
    "tiv",
    "choroid_volume",
    "pineal_volume", 
    "pituitary_volume"
]

df = df.loc[:, keep_cols]
df = pd.concat((df, pd.get_dummies(df["sex"], dtype="int")), axis=1)
df["Intercept"] = 1

In [6]:
df.loc[:, "lesion_vol_logtrans"] = np.log(df["lesion_vol_cubic"])
df.loc[:, "edss_sqrt"] = np.sqrt(df["EDSS"].astype("float"))
df.loc[:, "msss_sqrt"] = np.sqrt(df["MSSS"])
df.loc[:, "armss_sqrt"] = np.sqrt(df["ARMSS"])
df.loc[:, "gmsss_sqrt"] = np.sqrt(df["gMSSS"])

In [7]:
vars = [
    "age",
    "Female",
    "dzdur",
    "EDSS",
    "MSSS",
    "gMSSS",
    "ARMSS",
    "edss_sqrt",
    "msss_sqrt",
    "armss_sqrt",
    "gmsss_sqrt",
    "DMT_score",
    "DMT_hx_all",
    "TER",
    "DMF",
    "NAT",
    "INF",
    "thalamus",
    "brain",
    "white",
    "grey",
    "cortical_thickness",
    "lesion_count",
    "lesion_vol_cubic",
    "lesion_vol_logtrans",
    "PRL",
    "HAS_PRL",
    "tiv",
    "choroid_volume",
    "pineal_volume",
    "pituitary_volume"
]

for var in vars:
    df[var] = pd.to_numeric(df[var])

In [8]:
def mean_sd(df, column, cond=None):
    if cond is None:
        cond = (df.index.isin(df.index))
    
    return df.loc[cond, column].mean(), df.loc[cond, column].std()

In [9]:
def percent_cat(df, column, cat, cond=None):
    if cond is None:
        cond = (df.index.isin(df.index))
    
    return sum(cond(df) & (df[column] == cat)) / len(df[cond(df)])

In [10]:
def rms_cond(df):
    return df['dz_type5']=="RMS"

def pms_cond(df):
    return df['dz_type5']=="PMS"

def oind_cond(df):
    return df['dz_type5']=="OIND"

def nind_cond(df):
    return df['dz_type5']=="NIND"

In [11]:
print(f"RMS: {sum(df["dz_type5"]=="RMS")}")
print(f"PMS: {sum(df["dz_type5"]=="PMS")}")
print(f"NIND: {sum(df["dz_type5"]=="NIND")}")
print(f"OIND: {sum(df["dz_type5"]=="OIND")}")

RMS: 327
PMS: 85
NIND: 43
OIND: 30


In [13]:
print("Age\n---")
print("RMS:  {:0.2f} ± {:0.2f}".format(*mean_sd(df, "age", cond=rms_cond)))
print("PMS:  {:0.2f} ± {:0.2f}".format(*mean_sd(df, "age", cond=pms_cond)))
print("NIND: {:0.2f} ± {:0.2f}".format(*mean_sd(df, "age", cond=nind_cond)))
print("OIND: {:0.2f} ± {:0.2f}".format(*mean_sd(df, "age", cond=oind_cond)))

print("\n")

print("Sex, Female\n-----------")
print("RMS:  {:0.2f}".format(percent_cat(df, "sex", "Female", cond=rms_cond)))
print("PMS:  {:0.2f}".format(percent_cat(df, "sex", "Female", cond=pms_cond)))
print("NIND: {:0.2f}".format(percent_cat(df, "sex", "Female", cond=nind_cond)))
print("OIND: {:0.2f}".format(percent_cat(df, "sex", "Female", cond=oind_cond)))

print("\n")

print("dzdur\n----")
print("RMS:  {:0.1f} ± {:0.1f}".format(*mean_sd(df, "dzdur", cond=rms_cond)))
print("PMS:  {:0.1f} ± {:0.1f}".format(*mean_sd(df, "dzdur", cond=pms_cond)))
print("NIND: {:0.1f} ± {:0.1f}".format(*mean_sd(df, "dzdur", cond=nind_cond)))
print("OIND: {:0.1f} ± {:0.1f}".format(*mean_sd(df, "dzdur", cond=oind_cond)))

print("\n")

print("EDSS\n----")
print("RMS:  {:0.1f} ± {:0.1f}".format(*mean_sd(df, "EDSS", cond=rms_cond)))
print("PMS:  {:0.1f} ± {:0.1f}".format(*mean_sd(df, "EDSS", cond=pms_cond)))
print("NIND: {:0.1f} ± {:0.1f}".format(*mean_sd(df, "EDSS", cond=nind_cond)))
print("OIND: {:0.1f} ± {:0.1f}".format(*mean_sd(df, "EDSS", cond=oind_cond)))

print("\n")

print("TIV\n----")
print("RMS:  {:0.2f} ± {:0.2f}".format(*mean_sd(df, "tiv", cond=rms_cond)))
print("PMS:  {:0.2f} ± {:0.2f}".format(*mean_sd(df, "tiv", cond=pms_cond)))
print("NIND: {:0.2f} ± {:0.2f}".format(*mean_sd(df, "tiv", cond=nind_cond)))
print("OIND: {:0.2f} ± {:0.2f}".format(*mean_sd(df, "tiv", cond=oind_cond)))

print("\n")

print("lesion_vol_cubic\n----------------")
print("RMS:  {:0.2f} ± {:0.2f}".format(*mean_sd(df, "lesion_vol_cubic", cond=rms_cond)))
print("PMS:  {:0.2f} ± {:0.2f}".format(*mean_sd(df, "lesion_vol_cubic", cond=pms_cond)))
print("NIND: {:0.2f} ± {:0.2f}".format(*mean_sd(df, "lesion_vol_cubic", cond=nind_cond)))
print("OIND: {:0.2f} ± {:0.2f}".format(*mean_sd(df, "lesion_vol_cubic", cond=oind_cond)))

print("\n")

print("PRL\n----------------")
print("RMS:  {:0.2f} ± {:0.2f}".format(*mean_sd(df, "PRL", cond=rms_cond)))
print("PMS:  {:0.2f} ± {:0.2f}".format(*mean_sd(df, "PRL", cond=pms_cond)))
print("NIND: {:0.2f} ± {:0.2f}".format(*mean_sd(df, "PRL", cond=nind_cond)))
print("OIND: {:0.2f} ± {:0.2f}".format(*mean_sd(df, "PRL", cond=oind_cond)))

Age
---
RMS:  44.64 ± 11.94
PMS:  58.24 ± 9.35
NIND: 48.93 ± 11.78
OIND: 51.60 ± 11.60


Sex, Female
-----------
RMS:  0.82
PMS:  0.58
NIND: 0.81
OIND: 0.77


dzdur
----
RMS:  10.2 ± 8.7
PMS:  19.8 ± 11.8
NIND: 10.4 ± 10.8
OIND: 8.7 ± 9.7


EDSS
----
RMS:  2.1 ± 1.5
PMS:  5.0 ± 1.9
NIND: 2.5 ± 1.8
OIND: 2.3 ± 1.5


TIV
----
RMS:  1493321.35 ± 130080.31
PMS:  1506403.56 ± 132494.95
NIND: 1536062.90 ± 151071.44
OIND: 1512073.80 ± 139567.19


lesion_vol_cubic
----------------
RMS:  1.54 ± 0.79
PMS:  1.99 ± 0.86
NIND: 1.37 ± 0.86
OIND: 1.24 ± 0.56


PRL
----------------
RMS:  1.02 ± 1.99
PMS:  0.71 ± 1.30
NIND: 0.02 ± 0.15
OIND: 0.07 ± 0.37


In [None]:
imaging_means = pd.DataFrame(columns=["RMS", "PMS", "NIND", "OIND"])
imaging_std = pd.DataFrame(columns=["RMS", "PMS", "NIND", "OIND"])

