# NSRR Sleep Statistics

In [None]:
import os
import glob
import yasa
import warnings
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

from helper_functions import get_sub_visit_hyp

# Define paths
root_dir = '/Volumes/NSRR/'

In [None]:
df = []

for dataset in ['mesa', 'cfs']:
    
    # Extract all EDF files
    all_edfs, hypno_dir = get_all_edfs(dataset, root_dir)

    for eeg_file in tqdm(all_edfs):
    
        # Extract subject, visit and hypno_file from fname
        sub, visit, hypno_file = get_sub_visit_hyp(eeg_file, dataset, hypno_dir)

        # Check that file exists
        if not os.path.isfile(eeg_file):
            warnings.warn("File not found %s" % eeg_file)
            continue
        if not os.path.isfile(hypno_file):
            warnings.warn("File not found %s" % hypno_file)
            continue

        # LOAD HYPNOGRAM
        hypno, _ = yasa.load_profusion_hypno(hypno_file)
        if not len(hypno[hypno > 0]):
            continue

        # Calculate sleep statistics
        sstats = yasa.sleep_statistics(hypno, 1/30)

        sstats = pd.DataFrame(sstats, index=[sub])
        sstats.index.name = "subj"
        sstats = sstats.add_prefix("sstats_")
        sstats.insert(0, "dataset", dataset)
        sstats.insert(1, "visit", visit)
        sstats.columns = sstats.columns.str.replace("%", "p")

        # Append to main dataframe
        df.append(sstats)

In [None]:
df = pd.concat(df).reset_index().set_index(['dataset', 'subj', 'visit'])
df.round(3)

In [None]:
# Show missing values
df.isna().sum()

In [None]:
df.groupby(level=0).mean().round(1).T

In [None]:
df.to_csv("output/csv/df_sleepstats.csv")