In [None]:
import numpy as np
import pandas as pd
import pingouin as pg
import seaborn as sns
import scipy.stats as sp_stats
import matplotlib.pyplot as plt
sns.set(font_scale=1.25, style='ticks')

## Data loading

**Health data**

In [None]:
# Load health
df_health = pd.read_csv('../output/csv/df_health_mesa.csv', index_col=[0, 1, 2])

cols_health = df_health.columns.tolist()

print(df_health.shape)
df_health.head()

In [None]:
df_health["age"].hist(bins=np.arange(40, 90, 5));
plt.xlim(40, 90)
plt.xlabel("Age")
plt.ylabel("Participants")
plt.title("Age distribution — MESA");

In [None]:
# Check data types
df_health.info()

In [None]:
df_health.isna().sum()

**Sleep stats**

In [None]:
# Load sleep stats
df_sstats = pd.read_csv('../output/csv/df_sleepstats.csv')
df_sstats = df_sstats[df_sstats['dataset'] == 'mesa']
df_sstats['visit'] = df_sstats['visit'].str.split("visit").str.get(-1).astype(int)
df_sstats = df_sstats.set_index(['dataset', 'subj', 'visit']).sort_index()
cols_sstats = df_sstats.columns.tolist()
print(df_sstats.shape)
df_sstats.head()

**Coupling**

In [None]:
# Load coupling (NREM)
df_coupling = pd.read_csv("../output/csv/df_mesa_coupling_NREM_inverted.csv")
df_coupling['visit'] = 1
df_coupling = df_coupling.set_index(['dataset', 'subj', 'visit']).sort_index()
cols_coupling = df_coupling.columns.tolist()
print(df_coupling.shape)
df_coupling.head()

In [None]:
df_coupling.mean().round(3)

**Spectral**

In [None]:
# Load spectral
df_spectral = pd.read_csv("../output/csv/df_spectral.csv")
df_spectral = df_spectral[df_spectral['dataset'] == 'mesa']
df_spectral['visit'] = df_spectral['visit'].str.split("visit").str.get(-1).astype(int)
df_spectral = df_spectral.set_index(['dataset', 'subj', 'visit']).sort_index()
df_spectral = df_spectral.filter(like="REM")
cols_spectral = df_spectral.columns.tolist()
print(df_spectral.shape)
df_spectral.head()

In [None]:
# Create SWA/delta column
df_spectral.eval("bp_delta_NREM = bp_sdelta_NREM + bp_fdelta_NREM", inplace=True)
df_spectral.eval("bp_delta_REM = bp_sdelta_REM + bp_fdelta_REM", inplace=True)
df_spectral.shape

**Merge**

In [None]:
# Join
df = (
    # Inner merge
    df_health
    .join(df_coupling, how="inner")
    .join(df_sstats, how="inner")
    # Left merge for spectral
    .join(df_spectral, how="left")
)    

print(df.shape)
df.round(3)

### Descriptive statistics

In [None]:
# Number of unique participants
print(df.index.get_level_values(1).nunique(), 'unique participants')

In [None]:
# Number of nights per participant
n_visit_per_subj = df.reset_index().groupby(['subj'])['visit'].nunique()
display(n_visit_per_subj.value_counts())

In [None]:
# Percent of missing values
100 * (df.isna().sum().sort_values(ascending=False) / df.shape[0]).head(15).round(3)

***

## Preprocessing

In [None]:
# Distribution of fasting glucose values per diabetes status
# sns.histplot(df, x="fasting_glucose", bins=15, hue="diabetes", stat="density", alpha=0.8, common_norm=False, legend=True);
# plt.xlabel("Fasting glucose (mg/dl)");
# plt.ylabel("Diabetes status");

In [None]:
# Mask rows with too extreme fasting glucose
print((~df['fasting_glucose'].between(60, 250)).sum())
df['fasting_glucose'].where(df['fasting_glucose'].between(60, 250), inplace=True)

In [None]:
# Based on the ADA 2003 criteria
df['diabetes'].value_counts()

### Data transformation

In [None]:
# Square-root transformation
df['fasting_glucose'] = np.sqrt(df['fasting_glucose'])

In [None]:
# Log-transform HRV metrics, except heart rate
cols_hrv = df.filter(like="hrv_").columns.tolist()
print(cols_hrv)
df['hrv_rmssd'] = np.log(df['hrv_rmssd'])
df['hrv_sdnn'] = np.log(df['hrv_sdnn'])
df['hrv_hf'] = np.log(df['hrv_hf'])
df['hrv_hfn'] = np.log(df['hrv_hfn'])

#### Outlier removal

In [None]:
# Remove outliers in all coupling metrics, based on `ndPAC_thr_supzero` or `ndPAC_prop_supzero`
prop_zscore = sp_stats.zscore(df["sw_ndpac_prop_supzero"])
thr_zscore = sp_stats.zscore(df["sw_ndpac_thr_supzero"])
sns.displot(prop_zscore, rug=True);
sns.displot(thr_zscore, rug=True);
outliers = np.logical_or(prop_zscore.abs() > 4, thr_zscore.abs() > 4) 
print(outliers.sum(), 'values with abs(z) > 4')

In [None]:
df["sw_ndpac"] = np.where(outliers, np.nan, df["sw_ndpac"])
df["sw_ndpac_prop_supzero"] = np.where(outliers, np.nan, df["sw_ndpac_prop_supzero"])
df["sw_ndpac_thr_supzero"] = np.where(outliers, np.nan, df["sw_ndpac_thr_supzero"])
df["sw_pp"] = np.where(outliers, np.nan, df["sw_pp"])
df["sw_pp_thr_supzero"] = np.where(outliers, np.nan, df["sw_pp_thr_supzero"])

In [None]:
# Same for HRV
for c in cols_hrv:  
    mask = sp_stats.zscore(df[c]).abs() > 4
    print(c, mask.sum(), "values will be masked")
    df[c] = df[c].mask(mask)

### Distribution

In [None]:
plt.figure(figsize=(5, 5))
sns.histplot(df["sw_ndpac_thr_supzero"], bins=np.arange(0.25, 0.45, 0.01), fill=True, lw=2, color="k")
plt.xlim(0.25, None)
plt.yticks([0, 50, 100, 150, 200])
plt.xlabel("SO—sigma coupling strength")
plt.ylabel("Participants")
plt.savefig("../output/plots/coupling_hist_mesa.png", bbox_inches="tight", dpi=300)

In [None]:
plt.figure(figsize=(5, 5))
sns.histplot(df["sw_ndpac_prop_supzero"], bins=np.arange(0.7, 1.02, 0.02), fill=True, lw=2, color="k")
# plt.xlim(0.25, None)
plt.yticks([0, 50, 100, 150, 200])
plt.xlabel("Prop. of SO with significant coupling")
plt.ylabel("Participants")
plt.savefig("../output/plots/coupling_prop_hist_mesa.png", bbox_inches="tight", dpi=300)

In [None]:
color = sns.color_palette("Spectral_r")[-1]
pg.plot_circmean(df['sw_pp_thr_supzero'], 
                 kwargs_markers=dict(mec=color, alpha=0.4, mfc='none', ms=7), figsize=(2.5, 2.5),
                 kwargs_arrow={'width': 0.02, 'head_width': 0.15, 'head_length': 0.1, 'fc': 'k', 'ec': 'k'});
plt.savefig("../output/plots/circ_hist_mesa.png", bbox_inches="tight", dpi=300)

In [None]:
# Circular correlation with preferred phase
display(pg.circ_corrcl(df['sw_pp_thr_supzero'], df['fasting_glucose']))
sns.lmplot(data=df, x='sw_pp_thr_supzero', y='fasting_glucose', order=2, height=4.5);

In [None]:
# Distribution of health variables
df[cols_health].hist(bins=15, layout=(4, 6), figsize=(12, 7), grid=False, sharey=True)
plt.tight_layout()

In [None]:
# Distribution of HRV NREM variables
df.filter(like="hrv").hist(bins=15, layout=(4, 4), figsize=(9, 7), grid=False, sharey=True)
plt.tight_layout()

In [None]:
# Distribution of coupling variables
df[cols_coupling].hist(bins=15, layout=(3, 5), figsize=(9, 6), grid=False, sharey=True)
plt.tight_layout()

### Correlation coupling x health

In [None]:
(df.pairwise_corr(columns=[['sw_ndpac_prop_supzero'], df_health.columns.tolist()], method="spearman")
   [["X", "Y", "n", "r", "p-unc"]]
   .set_index(["X", "Y"])
   .sort_values('p-unc')
   .query("`p-unc` < 0.05")
   .round(3))

In [None]:
plt.figure(figsize=(12, 12))

sns.heatmap(
    df[cols_health + cols_coupling].corr(method="spearman"), 
    square=True, vmin=-1, vmax=1, linewidths=1, 
    cmap=sns.color_palette("Spectral_r", n_colors=8), annot=False, fmt=".1f",
    cbar_kws={"shrink": 0.5, "label": "Correlation coefficient", "ticks": np.arange(-1, 1.25, 0.25)}, 
)

plt.savefig("../output/plots/corr_matrix_all_mesa.png", dpi=300, bbox_inches="tight")

### Correlation coupling x bandpower

In [None]:
# Coupling proportion
(df.pairwise_corr(columns=[['sw_ndpac_prop_supzero'], df_spectral.columns.tolist()], method="spearman")
   [["X", "Y", "n", "r", "p-unc"]]
   .set_index(["X", "Y"])
   .sort_values('p-unc')
   .query("`p-unc` < 0.05")
   .round(3))

In [None]:
# Same for strength
(df.pairwise_corr(columns=[['sw_ndpac_thr_supzero'], df_spectral.columns.tolist()], method="spearman")
   [["X", "Y", "n", "r", "p-unc"]]
   .set_index(["X", "Y"])
   .sort_values('p-unc')
   .query("`p-unc` < 0.05")
   .round(3))

*******

## Glucose ~ SO-spi coupling

In [None]:
def format_pval(x):
    if x < 0.001:
        return "p<0.001"
    else:
        return f"p={x:.3f}"

### Fasting glucose

In [None]:
# Fasting glucose
corr = (df.pairwise_corr(columns='fasting_glucose', method="pearson", covar='age')
   [["Y", "n", "r", "p-unc"]]
   .set_index(["Y"])
   .sort_values('p-unc')
)

corr.query("`p-unc` < 0.05").round(3)

In [None]:
plt.figure(figsize=(5, 5))
sns.regplot(
    x=df["fasting_glucose"], y=df["sw_ndpac_prop_supzero"], 
    x_partial=df['age'], y_partial=df['age'],
    scatter_kws=dict(fc=color, ec="k", lw=1.25, alpha=0.5), 
    line_kws=dict(color="k"))
plt.xlabel("Fasting glucose $(\sqrt{mg/dl})$")
plt.ylabel("Prop. of SO with significant coupling")
plt.title("MESA")
plt.ylim(0.60, 1)
plt.yticks([0.6, 0.7, 0.8, 0.9, 1])

# Annotate r and p-values
r, pval = corr.loc["sw_ndpac_prop_supzero", ["r", "p-unc"]]
plt.annotate(f'r = {r:.2f}\n{format_pval(pval)}', xy=(0.95, 0.85), 
             xycoords="axes fraction", ha="right", fontweight="bold")

plt.tight_layout()

plt.savefig("../output/plots/corr_fasting_prop_mesa.png", dpi=300, bbox_inches="tight")

In [None]:
plt.figure(figsize=(5, 5))
sns.regplot(
    x=df["fasting_glucose"], y=df["sw_ndpac_thr_supzero"], 
    x_partial=df['age'], y_partial=df['age'],
    scatter_kws=dict(fc=color, ec="k", lw=1.25, alpha=0.5), 
    line_kws=dict(color="k"))
plt.xlabel("Fasting glucose $(\sqrt{mg/dl})$")
plt.ylabel("SO—sigma coupling strength")
plt.ylim(0.24, 0.40)
plt.yticks([0.24, 0.28, 0.32, 0.36, 0.40]);

# Annotate r and p-values
r, pval = corr.loc["sw_ndpac_thr_supzero", ["r", "p-unc"]]
plt.annotate(f'r = {r:.2f}\n{format_pval(pval)}', xy=(0.95, 0.85), 
             xycoords="axes fraction", ha="right", fontweight="bold")

plt.tight_layout()

plt.savefig("../output/plots/corr_fasting_ndpac_mesa.png", dpi=300, bbox_inches="tight")

*******

## Export

In [None]:
df.to_csv("../output/csv/df_concat_R_1sec_MESA.csv")