In [None]:
import numpy as np
import pandas as pd
import pingouin as pg
import seaborn as sns
import scipy.stats as sp_stats
import matplotlib.pyplot as plt
sns.set(font_scale=1.25, style='ticks')

In [None]:
def mean_std(x, show_range=True):
    """Print the mean and standard deviation."""
    if show_range:
        return f"{x.mean():.2f} ± {x.std():.2f} (range = {x.min():.0f}-{x.max():.0f})"
    else:
        return f"{x.mean():.2f} ± {x.std():.2f}"

## CFS

In [None]:
df = pd.read_csv('../output/csv/df_concat_R_1sec_CFS.csv')
df["sw_ndpac_prop_supzero"] *= 100
print(df.shape)
df.head()

In [None]:
cols_health = [
    'age', 'bmi', 'ahi', 'ahi_nrem', 'ahi_rem', 'sp_density', 'sw_density', 'sw_ndpac_thr_supzero', 'sw_ndpac_prop_supzero', 
    'sstats_SPT', 'sstats_TST', 'sstats_SOL', 'sstats_pN1', 'sstats_pN2', 'sstats_pN3', 'sstats_pREM',
    'sstats_SME', 'hrv_rmssd']

cols_glucose = ['fasting_glucose', 'ogtt', 'insulin_fast', 'insulin_ogtt']  # sqrt transform
cols_homa = ['homa', 'homab']  # log transform

### All participants

In [None]:
df["subj"].nunique()

In [None]:
df["family"].nunique()

In [None]:
df['male'].value_counts()

In [None]:
df['race'].value_counts()

In [None]:
df['diabetes_cfs'].value_counts()

In [None]:
df['hypertension'].value_counts()

In [None]:
df["smoking_cfs"].value_counts(dropna=False)

In [None]:
df[cols_health].apply(mean_std)

In [None]:
# df[cols_glucose].apply(mean_std)
np.square(df[cols_glucose]).apply(mean_std)

In [None]:
df[cols_homa].apply(mean_std)

In [None]:
print(sp_stats.circmean(df["sw_pp"], low=-np.pi, high=np.pi, nan_policy="omit") * 180 / np.pi)
print(sp_stats.circstd(df["sw_pp"], low=-np.pi, high=np.pi, nan_policy="omit") * 180 / np.pi)

In [None]:
# Proportion of participants with clinically significant AHI (> 5)
print(f"{100 * (df['ahi'] > 5).sum() / df.shape[0]:.1f}% of participants have an AHI > 5")
print(f"{100 * (df['ahi'] > 15).sum() / df.shape[0]:.1f}% of participants have an AHI > 15")
print(f"{100 * (df['ahi'] > 30).sum() / df.shape[0]:.1f}% of participants have an AHI > 30")

***********

## MESA

In [None]:
df = pd.read_csv('../output/csv/df_concat_R_1sec_MESA.csv')
df["sw_ndpac_prop_supzero"] *= 100
print(df.shape)
df.head()

In [None]:
cols_health = [
    'age', 'race', 'male', 'diabetes', 'hypertension', 'bmi', 'diabetes_medication', 'hba1c',
    'income', 'smoking', 'ahi', 'sp_density', 'sw_density', 'sw_ndpac_thr_supzero', 'sw_ndpac_prop_supzero', 
    'sstats_SPT', 'sstats_TST', 'sstats_SOL', 'sstats_pN1', 'sstats_pN2', 'sstats_pN3', 'sstats_pREM', 
    'sstats_SME', 'hrv_rmssd', 'hrv_ihr', 'days_exam5_to_psg']

In [None]:
df['diabetes'].value_counts()

In [None]:
df['race'].value_counts()

In [None]:
df['hypertension'].value_counts()

In [None]:
df['diabetes_medication'].value_counts()

In [None]:
df['smoking'].value_counts()

In [None]:
df[cols_health]._get_numeric_data().apply(mean_std)

In [None]:
np.square(df["fasting_glucose"]).agg(mean_std)

In [None]:
# Proportion of participants with clinically significant AHI (> 5)
print(f"{100 * (df['ahi'] > 5).sum() / df.shape[0]:.1f}% of participants have an AHI > 5")
print(f"{100 * (df['ahi'] > 15).sum() / df.shape[0]:.1f}% of participants have an AHI > 15")
print(f"{100 * (df['ahi'] > 30).sum() / df.shape[0]:.1f}% of participants have an AHI > 30")