In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(font_scale=1)

In [None]:
feat = {
    'ai_all': "Arousal index",
    'ahi': "AHI",
    'sp_density': "Spindles density",
    'sp_power': "Spindles power",
    'sp_freq': "Spindles frequency",
    'sw_density': "SO density",
    'sw_ptp': "SO amplitude",
    'sw_freq': "SO frequency",
    'sw_ndpac_thr_supzero': "SO-spi coupling strength",
    'sw_ndpac_prop_supzero': "Prop. of coupled SO",
    'sstats_SPT': "SPT",
    'sstats_WASO': "WASO",
    'sstats_TST': "TST",
    'sstats_TIB': "TIB",
    'sstats_N1': "N1",
    'sstats_N2': "N2",
    'sstats_N3': "N3",
    'sstats_REM': "REM",
    'sstats_NREM': "NREM",
    "sstats_SOL": "SOL",
    'sstats_Lat_N1': "N1 latency",
    'sstats_Lat_N2': "N2 latency",
    'sstats_Lat_N3': "N3 latency",
    'sstats_Lat_REM': "REM latency",
    'sstats_pN1': "%N1",
    'sstats_pN2': "%N2",
    'sstats_pN3': "%N3",
    'sstats_pREM': "%REM",
    'sstats_pNREM': "%NREM",
    'sstats_SE': "SE",
    'sstats_SME': "SME",
    'bp_alpha_NREM': "NREM alpha",
    'bp_alpha_REM': "REM alpha",
    'bp_beta_NREM': "NREM beta",
    'bp_beta_REM': "REM beta",
    'bp_fdelta_NREM': "NREM fast delta",
    'bp_fdelta_REM': "REM fast delta",
    'bp_sdelta_NREM': "NREM slow delta",
    'bp_sdelta_REM': "REM slow delta",
    'bp_delta_NREM': "NREM delta (SWA)",
    'bp_delta_REM': "REM delta",
    'bp_sigma_NREM': "NREM sigma",
    'bp_sigma_REM': "REM sigma",
    'bp_theta_NREM': "NREM theta",
    'bp_theta_REM': "REM theta",
    'bp_total_pow_NREM': "NREM power",
    'bp_total_pow_REM': "REM power",
}

In [None]:
df = pd.read_csv("../output/csv/rank_sleep_CFS.csv", index_col=0)
# df = pd.read_csv("../output/csv/rank_sleep_homa_CFS.csv", index_col=0)
df.index.name = "metric"
print(df.shape[0], "sleep predictors")
df.head().round(3)

In [None]:
# Are there rows with all NaN?
df.loc[df.isna().all(1)]

In [None]:
# Both should be zero
print(len(np.setdiff1d(df.index, list(feat.keys()))))
print(len(np.setdiff1d(list(feat.keys()), df.index)))

In [None]:
# Replace with readable names
df.index = df.index.map(feat)
df.head().round(3)

In [None]:
if "n_valid" in df.columns:
    # If sample sizes differ, we cannot directly compare the R^2 because models are using different data
    display(df["n_valid"].value_counts())

In [None]:
# Plot Negative log p-values
df["neglog_p"] = -np.log10(df["p"])
reds = sns.color_palette("Reds")
fig, ax = plt.subplots(1, 1, figsize=(3, 10))
sns.barplot(data=df, x="neglog_p", y=df.index, palette=reds, lw=0, alpha=1, ec="k", ax=ax)
plt.axvline(-np.log10(0.05), color="k", ls=":", lw=1.5)
plt.annotate("p = 0.05", xy=(-np.log10(0.05) + 0.1, df.shape[0] - 1), ha="left", color="k", rotation=0, fontstyle="italic")
plt.xlabel("Negative log p-value")  # ($-\\log_{10}(p)$)
plt.ylabel(None);
plt.savefig("../output/plots/neglogpval_CFS.png", dpi=300, bbox_inches="tight")