In [1]:
%matplotlib widget
import matplotlib.pyplot as plt
import pandas as pd
from pandas import Series, MultiIndex
import os 
import copy
import numpy as np
from tqdm import tqdm
os.chdir('/home/kkotzen/research/PPG_sleepstaging_orion2 /')
from src.parsing.MESAParser import MESAParser

In [2]:
dl = MESAParser()
patients = dl.get_patient_IDs_from_file()

In [3]:
WAKE, LIGHT, DEEP, REM = 0, 1, 2, 3
SLEEP_STRING_ENCODING = {0: 'wake', 1: 'n1', 2: 'n2', 3: 'n3', 4: 'rem'}
SLEEP_ENCODING = {'wake': WAKE, 'n1': LIGHT, 'n2': LIGHT, 'n3': DEEP, 'rem': REM}
SLEEP_DECODING = {WAKE: 'Wake', LIGHT: 'Light', DEEP: 'Deep', REM: 'REM'}
N_CLASSES = len(list(SLEEP_DECODING.keys()))

def sleep_transitions(sleep):
    "Counts from-to sleep stages "
    total_transitions = 0
    count = x = {i:{j:0 for j in range(N_CLASSES) if not i==j} for i in range(N_CLASSES)}
    for i in range(sleep.shape[0] - 1):
        if not sleep[i]==sleep[i+1]:
            count[sleep[i]][sleep[i+1]]+=1 
            total_transitions+=1
    count_flat = {f"{SLEEP_DECODING[i]}-{SLEEP_DECODING[j]}":count[i][j] for i in count.keys() for j in count[i].keys()}
    count_flat['Total-Transitions']: total_transitions
    return count_flat

def longest(sleep, stage):
    """Finds the longest number of uninterupted epochs for a given sleep stage."""
    this_length, max_length = 0, 0
    
    for i in range(len(sleep)-1):
        if sleep[i] == sleep[i+1] and sleep[i] == stage:
            this_length += 1
        else:
            max_length = max(max_length, this_length)
            if sleep[i]==stage or sleep[i+1] == stage:
                this_length = 1
            else:
                this_length = 0
    max_length = max(max_length, this_length)
    return max_length

def sleep_metrics(sleep):
    
    invalid = np.sum(sleep>9)
    sleep[sleep>9] ==1
    total = sleep.shape[0]
    wake = np.sum(sleep==WAKE)/total
    light = np.sum(sleep==LIGHT)/total
    deep = np.sum(sleep==DEEP)/total
    rem = np.sum(sleep==REM)/total
    
    time_in_stage_dict = {'Wake': wake, "Light": light, 'Deep': deep, "REM": rem}
    
    longest_time_in_stage_dict = {'Longest-Wake': longest(sleep, WAKE), "Longest-Light": longest(sleep, LIGHT), 
                             'Longest-Deep': longest(sleep, DEEP), "Longest-REM": longest(sleep, REM)}
    
    counts_dict = sleep_transitions(sleep)
    
    return {**time_in_stage_dict, **counts_dict, **longest_time_in_stage_dict}

    
def get_sleep(patient):
    sleep = dl.load_sleep(patient)
    sleep[sleep > 5] = 0
    sleep = [SLEEP_STRING_ENCODING[s] for s in sleep]
    sleep = np.array([SLEEP_ENCODING[s] for s in sleep])
    return sleep

In [9]:
d = np.array([0,0,0,0,0,1,0,1,0,1,2,3,3,3,3,0,3,0,3,0])
sleep_metrics(d)

{'Wake': 0.5,
 'Light': 0.15,
 'Deep': 0.05,
 'REM': 0.3,
 'Wake-Light': 3,
 'Wake-Deep': 0,
 'Wake-REM': 2,
 'Light-Wake': 2,
 'Light-Deep': 1,
 'Light-REM': 0,
 'Deep-Wake': 0,
 'Deep-Light': 0,
 'Deep-REM': 1,
 'REM-Wake': 3,
 'REM-Light': 0,
 'REM-Deep': 0,
 'Longest-Wake': 4,
 'Longest-Light': 1,
 'Longest-Deep': 1,
 'Longest-REM': 4}

In [4]:
sleep_metrics_df = pd.DataFrame()
for i, patient in tqdm(enumerate(patients)):
    sleep = get_sleep(patient)
    patient_sleep_metrics = sleep_metrics(sleep)
    sleep_metrics_df = sleep_metrics_df.append({**{'PatientID': patient}, **patient_sleep_metrics}, ignore_index=True)
sleep_metrics_df = sleep_metrics_df.set_index('PatientID')
sleep_metrics_df

1969it [00:34, 57.39it/s]


Unnamed: 0_level_0,Wake,Light,Deep,REM,Wake-Light,Wake-Deep,Wake-REM,Light-Wake,Light-Deep,Light-REM,Deep-Wake,Deep-Light,Deep-REM,REM-Wake,REM-Light,REM-Deep,Longest-Wake,Longest-Light,Longest-Deep,Longest-REM
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0001,0.522585,0.410007,0.013204,0.054204,50.0,0.0,10.0,41.0,10.0,9.0,1.0,9.0,0.0,18.0,1.0,0.0,172.0,42.0,5.0,24.0
0002,0.432904,0.311600,0.118271,0.137225,26.0,0.0,2.0,19.0,31.0,3.0,3.0,26.0,2.0,6.0,1.0,0.0,306.0,51.0,37.0,68.0
0006,0.337349,0.482854,0.069509,0.110287,39.0,0.0,18.0,36.0,11.0,3.0,1.0,10.0,0.0,20.0,1.0,0.0,115.0,64.0,32.0,27.0
0012,0.618080,0.323055,0.011212,0.047652,42.0,0.0,1.0,36.0,16.0,7.0,0.0,16.0,0.0,7.0,1.0,0.0,303.0,51.0,1.0,42.0
0014,0.497915,0.240024,0.156045,0.106015,19.0,0.0,1.0,11.0,32.0,7.0,4.0,27.0,1.0,5.0,4.0,0.0,428.0,41.0,61.0,56.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6804,0.203503,0.632193,0.011676,0.152627,31.0,0.0,0.0,28.0,14.0,10.0,0.0,14.0,0.0,3.0,7.0,0.0,148.0,56.0,1.0,62.0
6807,0.336113,0.550459,0.028357,0.085071,63.0,0.0,1.0,62.0,17.0,4.0,0.0,17.0,0.0,3.0,2.0,0.0,196.0,39.0,6.0,50.0
6810,0.486732,0.407127,0.000000,0.106141,32.0,0.0,19.0,25.0,0.0,7.0,0.0,0.0,0.0,26.0,0.0,0.0,279.0,70.0,0.0,14.0
6811,0.387500,0.518333,0.005000,0.089167,74.0,0.0,2.0,69.0,5.0,5.0,0.0,5.0,0.0,7.0,0.0,0.0,124.0,41.0,2.0,25.0


In [5]:
sleep_metrics_df.describe()

Unnamed: 0,Wake,Light,Deep,REM,Wake-Light,Wake-Deep,Wake-REM,Light-Wake,Light-Deep,Light-REM,Deep-Wake,Deep-Light,Deep-REM,REM-Wake,REM-Light,REM-Deep,Longest-Wake,Longest-Light,Longest-Deep,Longest-REM
count,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0,1969.0
mean,0.423404,0.412793,0.058213,0.10559,37.628746,0.027425,3.746064,32.405282,18.464195,7.859319,1.083291,17.311833,0.136618,7.906552,3.789233,0.026409,275.206196,66.543423,21.395124,38.471305
std,0.132487,0.105801,0.054822,0.049124,21.915229,0.197184,4.335817,21.267444,14.135662,5.494825,1.511867,13.409978,0.845315,6.253812,3.948187,0.779119,170.591663,29.523128,22.686295,21.310693
min,0.050046,0.050876,0.0,0.0,4.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,6.0,0.0,0.0
25%,0.330553,0.347464,0.010842,0.070892,23.0,0.0,1.0,19.0,7.0,4.0,0.0,7.0,0.0,4.0,1.0,0.0,177.0,47.0,3.0,23.0
50%,0.415566,0.413678,0.045872,0.104727,33.0,0.0,2.0,27.0,17.0,7.0,1.0,16.0,0.0,6.0,3.0,0.0,245.0,61.0,14.0,36.0
75%,0.503244,0.483333,0.091084,0.13829,45.0,0.0,5.0,39.0,27.0,10.0,2.0,25.0,0.0,11.0,5.0,0.0,345.0,79.0,34.0,51.0
max,0.948575,0.769444,0.332748,0.322421,202.0,3.0,35.0,202.0,131.0,46.0,10.0,124.0,29.0,45.0,34.0,30.0,2968.0,354.0,153.0,146.0


In [None]:
high_limit =stages.quantile(0.99)
low_limit = stages.quantile(0.01)

interesting_stages = copy.deepcopy(stages)
for index, row in interesting_stages.iterrows():
    if np.sum((row > high_limit).values) > 0 or np.sum((row < low_limit).values) > 0:
        interesting_stages.at[index, 'Interesting'] = 'Yes'

In [None]:
interesting_stages[interesting_stages.Interesting =='Yes']

In [None]:
pd.set_option('display.max_columns', 100)


In [None]:
patient = '0070'
z = np.array(['-' for c in stages.columns])
z[(stages.loc[patient] > high_limit).values] = '>'
z[(stages.loc[patient] < low_limit).values] = '<'
df = pd.DataFrame()
df = df.append({k:v for k,v in zip(stages.columns,z)}, ignore_index=True)
display(df)
plt.close('all')
plt.plot(get_sleep(patient))