# Basic Statistics

## Set Up

### Imports

In [2]:
from warnings import simplefilter

import pandas as pd

simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
simplefilter(action="ignore", category=DeprecationWarning)
simplefilter(action="ignore", category=FutureWarning)

import re
from pathlib import Path
from pyprocessmacro import Process

import numpy as np
import pyperclip
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
from IPython.display import Markdown, HTML, display
from io import StringIO
import matplotlib.pyplot as plt
from collections import defaultdict

from reload_recursive import reload_recursive
import sys
import os
from my_python_utils import smart_title

sys.path.insert(0, "/home/srs-9/Projects/ms_mri/analysis/thalamus/helpers")

import helpers
import utils
import regression_utils as regutils
import my_namespace


#### Reload modules

In [3]:
reload_recursive(regutils)
reload_recursive(helpers)
reload_recursive(utils)
reload_recursive(my_namespace)

from utils import zscore
from my_namespace import presentation_cols, colors

#### Load Data

##### Clinical and Volumes

In [4]:
fig_path = Path(
    "/home/srs-9/Projects/ms_mri/analysis/thalamus/results/figures_tables/0-Basic_statistics"
)
if not fig_path.exists():
    os.makedirs(fig_path)

data = utils.load_data("/home/srs-9/Projects/ms_mri/analysis/thalamus/results/data.csv")


MS_patients = data["dz_type2"] == "MS"
NONMS_patients = data["dz_type2"] == "!MS"
NIND_patients = data["dz_type5"] == "NIND"
OIND_patients = data["dz_type5"] == "OIND"
RMS_patients = data["dz_type5"] == "RMS"
PMS_patients = data["dz_type5"] == "PMS"

p = "<p style='font-family: Mononoki Nerd Font Mono;line-height: 1.3;font-size: 14px;'>"

## Analysis

### Demographic and Clinical Data

### MRI Measures

In [5]:
model_data = data[MS_patients]

vars = ["brain", "white", "grey", "THALAMUS_1"]
max_width = max(len(outcome) for outcome in vars)
display_text = ""
display_text += f"{p}<u>MRI Volumes:</u></p>"
display_volumes = [
    f"{var:<{max_width + 1}}: {model_data[var].mean():0.2f} ± {model_data[var].std():0.2f}"
    for var in vars
]
display_text += f"{p}{'<br>'.join(display_volumes)}</p>"
display(HTML(display_text))

In [28]:
mri_volumes = {
    "brain": "WBV (mm3)",
    "white": "WMV (mm3)",
    "grey": "GMV (mm3)",
    "THALAMUS_1": "WTV (mm3)",
    "cortical_thickness": "cortical_thickness (mm)",
}
inflammation_measures = {
    "CP": "choroid plexus (mm3)",
    "t2lv": "T2LV (mm3)",
    "PRL": "PRL count"
}
csf_compartments = {
    "LV": "LV (mm3)",
    "interCSF": "interthalamic CSF (mm3)",
    "thirdV": "third ventricle (mm3)",
    "fourthV": "fourth ventricle (mm3)",
    "periCSF": "subarachnoid CSF (mm3)"
}

csf_compartments | mri_volumes | inflammation_measures

{'LV': 'LV (mm3)',
 'interCSF': 'interthalamic CSF (mm3)',
 'thirdV': 'third ventricle (mm3)',
 'fourthV': 'fourth ventricle (mm3)',
 'periCSF': 'subarachnoid CSF (mm3)',
 'brain': 'WBV (mm3)',
 'white': 'WMV (mm3)',
 'grey': 'GMV (mm3)',
 'THALAMUS_1': 'WTV (mm3)',
 'cortical_thickness': 'cortical_thickness (mm)',
 'CP': 'choroid plexus (mm3)',
 't2lv': 'T2LV (mm3)',
 'PRL': 'PRL count'}

In [37]:
mri_volumes = {
    "brain": "whole brain volume",
    "white": "white matter volume",
    "grey": "grey matter volume",
    "THALAMUS_1": "whole thalamus volume",
    "cortical_thickness": "cortical_thickness (mm)",
}
inflammation_measures = {
    "CP": "choroid plexus (mm3)",
    "t2lv": "T2LV (mm3)",
    "PRL": "PRL count"
}
csf_compartments = {
    "LV": "LV",
    "interCSF": "interthalamic CSF",
    "thirdV": "third ventricle",
    "fourthV": "fourth ventricle",
    "periCSF": "subarachnoid CSF"
}
max_width = max(len(var_disp) for var_disp in (csf_compartments | mri_volumes | inflammation_measures).values())

#* MRI Volumes
display_volumes = [
    f"{var_disp:<{max_width + 1}}: {model_data[var].mean():0.2f} ± {model_data[var].std():0.2f}"
    for var, var_disp in mri_volumes.items()
]
print("MRI Volumes (mm3):\n-----------------------------------------------")
print("\n".join(display_volumes))

#* Measures of Inflammation
display_volumes = [
    f"{var_disp:<{max_width + 1}}: {model_data[var].mean():0.2f} ± {model_data[var].std():0.2f}"
    for var, var_disp in inflammation_measures.items()
]
print("\nMeasures of Inflammation:\n-----------------------------------------------")
print("\n".join(display_volumes))

#* CSF Compartments
display_volumes = [
    f"{var_disp:<{max_width + 1}}: {model_data[var].mean():0.2f} ± {model_data[var].std():0.2f}"
    for var, var_disp in csf_compartments.items()
]
print("\nCSF Compartments (mm3):\n-----------------------------------------------")
print("\n".join(display_volumes))

MRI Volumes (mm3):
-----------------------------------------------
whole brain volume      : 1406866.48 ± 81819.80
white matter volume     : 654415.28 ± 38816.98
grey matter volume      : 752451.21 ± 54406.74
whole thalamus volume   : 8956.35 ± 1642.24
cortical_thickness (mm) : 2.28 ± 0.13

Measures of Inflammation:
-----------------------------------------------
choroid plexus (mm3)    : 1718.35 ± 511.21
T2LV (mm3)              : 1662.32 ± 946.04
PRL count               : 0.89 ± 1.79

CSF Compartments (mm3):
-----------------------------------------------
LV                      : 19454.91 ± 14852.40
interthalamic CSF       : 963.13 ± 347.66
third ventricle         : 954.28 ± 483.20
fourth ventricle        : 1747.36 ± 511.06
subarachnoid CSF        : 344944.62 ± 34932.04


In [5]:
test_data = data[MS_patients]

for var in ['brain', 'white', 'grey']:
    test_data[var] = test_data[var] / 1000
    
cols = ['brain', 'cat12_brain', 'white', 'cat12_white', 'grey', 'cat12_grey']
max_width = max(len(outcome) for outcome in cols)


vars = ("brain", "cat12_brain", "WBV")
for vars in [("brain", "cat12_brain", "WBV"), ("white", "cat12_wm", "WMV"), ("grey", "cat12_gm", "GMV"),
             ("CP", "LV", "CPLV")]:
    print(f"{vars[2]}\n-----------")
    print(f"{vars[0]:<{max_width+1}}: {test_data[vars[0]].mean():0.2f} ± {test_data[vars[0]].std():0.2f}")
    print(f"{vars[1]:<{max_width+1}}: {test_data[vars[1]].mean():0.2f} ± {test_data[vars[1]].std():0.2f}")
    
    tmp_data = test_data[list(vars[:2])].dropna()
    slope, intercept, r, p, se = stats.linregress(tmp_data[vars[0]], tmp_data[vars[1]])
    pearsonr = stats.pearsonr(tmp_data[vars[0]], tmp_data[vars[1]])
    print(f"{'compare':<{max_width+1}}: ρ={pearsonr.statistic:0.2f}, p={pearsonr.pvalue:0.2}, R2={r**2:0.2f}")
    print("\n")

WBV
-----------
brain       : 1406.87 ± 81.82
cat12_brain : 1097.16 ± 119.01
compare     : ρ=0.41, p=1.3e-20, R2=0.17


WMV
-----------
white       : 654.42 ± 38.82
cat12_wm    : 612.50 ± 64.42
compare     : ρ=0.35, p=7.1e-15, R2=0.12


GMV
-----------
grey        : 752.45 ± 54.41
cat12_gm    : 484.66 ± 61.96
compare     : ρ=0.14, p=0.0022, R2=0.02


CPLV
-----------
CP          : 1718.35 ± 511.21
LV          : 19454.91 ± 14852.40
compare     : ρ=0.55, p=6.2e-39, R2=0.31




In [6]:
test_data = pd.read_csv("/home/srs-9/Projects/ms_mri/data/Clinical_Data_All_updated.csv")

slope, intercept, r, p, se = stats.linregress(test_data['brain'], test_data['cat12_brain'])
pearsonr = stats.pearsonr(test_data['brain'], test_data['cat12_brain'])
print(f"WBV: ρ={pearsonr.statistic:0.2f}, R2={r**2:0.2f}, p={pearsonr.pvalue:0.2}")

slope, intercept, r, p, se = stats.linregress(test_data['white'], test_data['cat12_wm'])
pearsonr = stats.pearsonr(test_data['white'], test_data['cat12_wm'])
print(f"WMV: ρ={pearsonr.statistic:0.2f}, R2={r**2:0.2f}, p={pearsonr.pvalue:0.2}")

slope, intercept, r, p, se = stats.linregress(test_data['grey'], test_data['cat12_gm'])
pearsonr = stats.pearsonr(test_data['grey'], test_data['cat12_gm'])
print(f"GMV: ρ={pearsonr.statistic:0.2f}, R2={r**2:0.2f}, p={pearsonr.pvalue:0.2}")

WBV: ρ=0.40, R2=0.16, p=2.9e-23
WMV: ρ=0.36, R2=0.13, p=1.4e-18
GMV: ρ=0.11, R2=0.01, p=0.0084
