In [8]:
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import numpy as np
import re
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import mannwhitneyu
from scipy import stats
import matplotlib.ticker as ticker
import os
from reload_recursive import reload_recursive
import patsy
from pyprocessmacro import Process
from statsmodels.stats.mediation import Mediation
from statsmodels.miscmodels.ordinal_model import OrderedModel
from pingouin import mediation_analysis
import sys

from mri_data import file_manager as fm
import helpers

In [9]:
reload_recursive(helpers)

## Setup

In [10]:
drive_root = fm.get_drive_root()
msmri_home = Path("/home/srs-9/Projects/ms_mri")
msmri_datadir = msmri_home / "data"
curr_dir = Path(os.getcwd())
data_dir = curr_dir / "data0"
showfigs = False
# pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.options.display.precision = 3
colors = helpers.get_colors()

In [11]:
jasp_prefix = "t1_2"
df = pd.read_csv(data_dir / "t1_2_data_full.csv")
df = df.set_index("subid")

df = helpers.set_dz_type5(df)
df = helpers.set_dz_type3(df)
df = helpers.set_dz_type2(df)
df = helpers.fix_edss(df)
df = helpers.clean_df(df)
df = helpers.set_has_prl(df)

keep_cols = [
    "subject",
    "age",
    "sex",
    "ms_type",
    "dz_type2",
    "dz_type3",
    "dz_type5",
    "dzdur",
    "EDSS",
    "MSSS",
    "gMSSS",
    "ARMSS",
    "DMT_score",
    "DMT_hx_all",
    "TER",
    "DMF",
    "NAT",
    "INF",
    "flair_contrast",
    "thalamus",
    "brain",
    "white",
    "grey",
    "cortical_thickness",
    "lesion_count",
    "lesion_vol_cubic",
    "PRL",
    "HAS_PRL",
    "tiv",
    "choroid_volume",
    "pineal_volume", 
    "pituitary_volume"
]

df = df.loc[:, keep_cols]
df = pd.concat((df, pd.get_dummies(df["sex"], dtype="int")), axis=1)
df["Intercept"] = 1

In [12]:
df.loc[:, "lesion_vol_logtrans"] = np.log(df["lesion_vol_cubic"])
df.loc[:, "edss_sqrt"] = np.sqrt(df["EDSS"].astype("float"))
df.loc[:, "msss_sqrt"] = np.sqrt(df["MSSS"])
df.loc[:, "armss_sqrt"] = np.sqrt(df["ARMSS"])
df.loc[:, "gmsss_sqrt"] = np.sqrt(df["gMSSS"])

data_ms = df.loc[df["dz_type5"].isin(["RMS", "PMS"]), :]

In [15]:
columns = [
    "ms_type",
    "dz_type2",
    "dz_type3",
    "dz_type5",
    "age",
    "sex",
    "tiv",
    "flair_contrast",
    "EDSS",
    "ARMSS",
    "PRL",
    "lesion_vol_cubic",
    "MSSS",
    "choroid_volume",
    "pineal_volume",
    "pituitary_volume",
    "thalamus",
    "brain",
    "cortical_thickness",
    "dzdur",
]
df_jasp = df[columns]
df_jasp = pd.concat((df_jasp, pd.get_dummies(df_jasp["sex"])), axis=1)
df_jasp = df_jasp.loc[df_jasp["dzdur"] != ".", :]

for i, row in df_jasp.iterrows():
    if row["Female"]:
        df_jasp.loc[i, "Female"] = 1
    else:
        df_jasp.loc[i, "Female"] = 0

    if row["Male"]:
        df_jasp.loc[i, "Male"] = 1
    else:
        df_jasp.loc[i, "Male"] = 0

  df_jasp.loc[i, "Female"] = 1
  df_jasp.loc[i, "Male"] = 0


In [16]:
jasp_dir = curr_dir / "jasp"

df_jasp.to_csv(jasp_dir / f"{jasp_prefix}_dataframe.csv")

In [17]:
df_jasp3 = df_jasp[df_jasp["dz_type3"].isin(["MS", "NIND", "OIND"])]
df_jasp5 = df_jasp[df_jasp["dz_type5"].isin(["RMS", "PMS", "NIND", "OIND"])]
df_jasp_ms = df_jasp[df_jasp["dz_type5"].isin(["RMS", "PMS"])]

In [18]:
df_jasp3.to_csv(jasp_dir / f"{jasp_prefix}_dataframe3.csv")
df_jasp5.to_csv(jasp_dir / f"{jasp_prefix}_dataframe5.csv")
df_jasp_ms.to_csv(jasp_dir / f"{jasp_prefix}_dataframeMS.csv")