In [1]:
import numpy as np
import pandas as pd

from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from biopsykit.utils.dataframe_handling import multi_xs

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
from fau_colors import register_cmaps

register_cmaps()

sns.set_theme(context="talk", style="white", palette="faculties_light", font_scale=1.2)

%matplotlib widget

In [5]:
from sleep_analysis.datasets.d04_main_dataset_control import D04MainStudy

In [6]:
dataset = D04MainStudy()
dataset

Unnamed: 0,subj_id
0,2
1,3
2,4
3,5
4,6
5,7
6,10
7,11
8,12
9,14


In [7]:
demographcs = pd.read_excel("/Users/danielkrauss/code/Empkins/Data/empkins_contactless_sleep_lab/demographics.xlsx", index_col=0, usecols=["VP","Age", "Weight", "Height", "Gender", "TSD", "Exclude"])
demographcs

Unnamed: 0_level_0,Age,Weight,Height,Gender,TSD,Exclude
VP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,28,57.0,172,0,-,True
2,27,97.0,197,1,407.5,False
3,32,65.0,178,1,474.5,False
4,26,64.0,169,0,312.5,False
5,25,57.0,164,0,178.5,False
6,27,75.0,178,1,408.5,False
7,26,82.0,193,1,415,False
8,55,99.0,185,1,-,True
9,23,78.0,177,1,-,True
10,21,63.0,165,0,438,False


In [8]:
def calculate_total_sleep_time(df: pd.DataFrame) -> float:
    """
    Calculate the Total Sleep Time (TST) in minutes from a dataframe containing sleep phases.
    Assumes the dataframe has a DateTime index and a column named 'Sleep Phase'.
    
    Total Sleep Duration is defined as the interval between sleep onset and sleep offset 
    while the participant is asleep, based on polysomnography.
    
    Parameters:
        df (pd.DataFrame): Dataframe with a datetime index and a 'Sleep Phase' column.
    
    Returns:
        float: Total sleep time in minutes.
    """
    # Ensure the index is datetime
    df = df.copy()
    df.index = pd.to_datetime(df.index)
    
    # Filter out wake periods
    sleep_df = df[df['Sleep Phase'] != 'Wach']
    
    # Determine sleep onset and offset
    if sleep_df.empty:
        return 0
    
    sleep_onset = sleep_df.index[0]
    sleep_offset = sleep_df.index[-1]
    
    # Compute total sleep duration
    total_sleep_time = (sleep_offset - sleep_onset).total_seconds() / 60
    
    return total_sleep_time


In [9]:
#for participant in dataset:
    #print("VP",participant.index["subj_id"][0], "total sleep duration:" ,calculate_total_sleep_time(participant.psg_labels))

In [10]:
import pandas as pd

def filter_and_analyze_sleep(df: pd.DataFrame, subject_list: list) -> dict:
    """
    Filters out excluded participants, selects participants based on a given list,
    and computes mean and standard deviation for TSD, age, and gender distribution
    for the entire dataset, train, and test sets.

    Parameters:
        df (pd.DataFrame): Dataframe containing sleep data with 'Exclude' column.
        subject_list (list): List of subject numbers to include in the filtered dataset.

    Returns:
        dict: Dictionary containing mean and standard deviation of TSD, age, and gender
              distribution for all, train, and test sets.
    """
    # Exclude participants marked as True in 'Exclude'
    filtered_df = df[df['Exclude'] == False].copy()
    
    # Select only the subjects in subject_list
    train_test_df = filtered_df.loc[filtered_df.index.isin(subject_list)]
    
    # Compute gender distribution
    overall_male = (filtered_df["Gender"] == 1).sum()
    overall_female = (filtered_df["Gender"] == 0).sum()
    train_test_male = (train_test_df["Gender"] == 1).sum()
    train_test_female = (train_test_df["Gender"] == 0).sum()
    
    # Compute statistics
    overall_stats = {
        "TSD_mean": filtered_df["TSD"].dropna().mean(),
        "TSD_std": filtered_df["TSD"].dropna().std(),
        "Age_mean": filtered_df["Age"].mean(),
        "Age_std": filtered_df["Age"].std(),
        "Male_count": overall_male,
        "Female_count": overall_female
    }
    
    train_test_stats = {
        "TSD_mean": train_test_df["TSD"].dropna().mean(),
        "TSD_std": train_test_df["TSD"].dropna().std(),
        "Age_mean": train_test_df["Age"].mean(),
        "Age_std": train_test_df["Age"].std(),
        "Male_count": train_test_male,
        "Female_count": train_test_female
    }
    
    return {"overall": overall_stats, "train_test": train_test_stats}

In [11]:
filter_and_analyze_sleep(df=demographcs, subject_list=[44,
19,
34,
38,
22,
40,
28,
18,
])

{'overall': {'TSD_mean': 428.55555555555554,
  'TSD_std': 78.27969794979482,
  'Age_mean': 38.666666666666664,
  'Age_std': 16.6870351727664,
  'Male_count': 14,
  'Female_count': 22},
 'train_test': {'TSD_mean': 430.3125,
  'TSD_std': 73.7742393580384,
  'Age_mean': 44.375,
  'Age_std': 18.63895689907872,
  'Male_count': 2,
  'Female_count': 6}}

In [12]:
filter_and_analyze_sleep(df=demographcs, subject_list=[31,
33,
3,
4,
5,
24,
2,
6,
14,
41,
10,
43,
20,
27,
35,
36,
30,
21,
16,
42,
15,
37,
7,
11,
26,
25,
12,
32
])

{'overall': {'TSD_mean': 428.55555555555554,
  'TSD_std': 78.27969794979482,
  'Age_mean': 38.666666666666664,
  'Age_std': 16.6870351727664,
  'Male_count': 14,
  'Female_count': 22},
 'train_test': {'TSD_mean': 428.05357142857144,
  'TSD_std': 80.8152143823264,
  'Age_mean': 37.035714285714285,
  'Age_std': 16.07731814483245,
  'Male_count': 12,
  'Female_count': 16}}