# Sleepmeter non-24 circadian rhythm disorder single-case analysis

Notebook to analyze the sleep log data of a single case of non-24 circadian rhythm disorder, acquired with Sleepmeter on Android.

By Stephen Karl Larroque (2020)

Licensed under MIT

Still a work in progress. Runs under Python 3

In [None]:
# Forcefully autoreload all python modules
%load_ext autoreload
%autoreload 2

In [None]:
from io import StringIO
import pandas as pd
import datetime
import matplotlib.pyplot as plt

In [None]:
# PARAMETERS - EDIT ME
csvpath = r'sleep_history_2020-05-04.1.csv'  # path to the exported sleep log data (from Sleepmeter menu, select Manage Databases and then send the csv to your email)


## Load and cleaning sleep log data

In [None]:
def load_csv_multi_tables(filepath):
    """Load a csv containing multiple tables into multiple pandas DataFrames"""
    # From: https://stackoverflow.com/questions/36904691/pd-read-csv-multiple-tables-and-parse-data-frames-using-index-0
    
    # Load the csv and detect when there is a space, then split into a virtual file
    subfiles = [StringIO()]
    with open(filepath) as bigfile:
        for line in bigfile:
            if line.strip() == "": # blank line, new subfile                                                                                                                                       
                subfiles.append(StringIO())
            else: # continuation of same subfile                                                                                                                                                   
                subfiles[-1].write(line)

    # For each virtual file, load up as a csv in a pandas DataFrame
    tables = []
    for subfile in subfiles:
        subfile.seek(0)
        tables.append(pd.read_csv(subfile, sep=','))
    return tables

In [None]:
tables = load_csv_multi_tables(csvpath)

In [None]:
# Load the main sleep log table and convert some columns to datetime objects, will ease time difference calculations
sleeplog = tables[3] #.set_index(tables[3].columns[0])
#sleeplog.index = pd.to_datetime(sleeplog.index)
sleeplog['wake'] = pd.to_datetime(sleeplog['wake'], utc=True)
sleeplog['sleep'] = pd.to_datetime(sleeplog['sleep'], utc=True)
sleeplog['bedtime'] = pd.to_datetime(sleeplog['bedtime'], utc=True)
sleeplog

In [None]:
# Calculate sleep duration
sleeplog['duration'] = sleeplog['wake'] - sleeplog['sleep']
# Reorder columns
sleeplog = sleeplog[['bedtime', 'sleep', 'wake', 'duration'] + list(sleeplog.columns[3:-1])]
sleeplog

In [None]:
# Detect naps that were wrongly typed as night sleeps

nightsleepmin = {'hours': 4, 'minutes': 35}  # minimum duration in hours to consider a sleep night, any shorter sleep will be considered a nap
napdiff_threshold = {'hours': 7}  # in datetime.timedelta() format

def detect_naps(sleeplog_in, nightsleepmin=None, napdiff_threshold=None):
    """Detect naps based on timing and duration
    This is very important to remove outliers that will mess up the calculation of longitudinal wake up time difference (because if a nap is wrongly typed, then the wake up time of the nap will interfere between 2 real night sleeps!)"""
    sleeplog = sleeplog_in.copy()
    # Copy the type column
    sleeplog.loc[:, 'type_cleaned'] = sleeplog.loc[:, 'type']
    # NAP DETECTED BY SHORT DURATION
    if nightsleepmin is not None:
        sleeplog.loc[sleeplog['duration'] < datetime.timedelta(**nightsleepmin), 'type_cleaned'] = 'NAP'
    # NAP DETECTED BY TIME DIFFERENCE
    if napdiff_threshold is not None:
        for idx in sleeplog.index[:-1]:
            if (
                not (sleeplog.loc[idx, 'type'] == 'NAP' or sleeplog.loc[idx+1, 'type'] == 'NAP')  # check if neither is a nap, then we check the time difference
                and (sleeplog.loc[idx, 'sleep'] - sleeplog.loc[idx+1, 'wake'] < datetime.timedelta(**napdiff_threshold))  # check if the difference between the previous sleep wake up time and next asleep time is under the threshold (ie, only a few hours between the last wake up and the next asleep state, one of them is likely a nap)
               ):
                # NAP DETECTED BY TIME DIFFERENCE
                # We now have to select which one is a nap, we select the one with the smallest duration (TODO: we could find another more precise way to infer?)
                if sleeplog.loc[idx, 'duration'] < sleeplog.loc[idx+1, 'duration']:
                    sleeplog.loc[idx, 'type_cleaned'] = 'NAP'
                elif sleeplog.loc[idx, 'duration'] > sleeplog.loc[idx+1, 'duration']:
                    sleeplog.loc[idx+1, 'type_cleaned'] = 'NAP'
                else:
                    sleeplog.loc[idx, 'type_cleaned'] = 'NAP'
                    sleeplog.loc[idx+1, 'type_cleaned'] = 'NAP'
    return sleeplog

sleeplog_napsfixed = detect_naps(sleeplog, nightsleepmin, napdiff_threshold)
sleeplog_napsfixed

In [None]:
# Show sleep log where nap was fixed
sleeplog_napsfixed[sleeplog_napsfixed['type'] != sleeplog_napsfixed['type_cleaned']]

## Wake-up time variability analysis

Analyze how much the wake-up time (which is a reliable enough predictor of the circadian rhythm) varies over the whole sleep log.

In [None]:
# Extract night sleeps only
sleeplognight = sleeplog_napsfixed[sleeplog_napsfixed['type_cleaned'] == 'NIGHT_SLEEP']
sleeplognight

In [None]:
# Extract only wake up times as a pandas Series
wakeuptimes = pd.Series(sleeplognight['wake'])
wakeuptimes

In [None]:
# Calculate the longitudinal difference of wake-up time, ie, how much time difference there is in the wake up time between each consecutive night sleeps

def calc_timediff(wakeuptimes):
    """Calculates the framewise displacement of wake up times, in other words the difference of time between the wake up time at one day compared to the previous one
    Expects a Series in pd.to_datetime format, with the latest date at the top and earliest at the bottom
    It is important to trim naps and consider only night sleeps here.
    Also note that the date is NOT accounted for, which has the advantage of allowing for gaps between multiple days, or considering sleeps that happen in the same day (eg, sleep at midnight one day, then at 23h later the same day).
    The goal here is to evaluate the stability of the wake up time, hence we only care about the wake up time difference irrespective of what day it happened."""
    timediff = []
    wakeuptimes = wakeuptimes.reset_index(drop=True)
    for i in range(len(wakeuptimes)-1):
        # Old way by subtracting one day on the next day, incorrect and mess up calculations if naps were wrongly labeled as night_sleep
        #timediff.append((wakeuptimes[i] - datetime.timedelta(days=1) - wakeuptimes[i+1]).total_seconds() / 60)

        # Correct calculation by removing the date and subtracts only the time
        # Naps can still mess up things but that's not somethin to fix here but in preprocessing (try to detect naps, but it's difficult)
        # Note that with this calculation we don't care if there is a gap, ie, when pulling an all nighter and skipping sleep altogether for a day, we only care about what time we wake up compared to last time, even if several days ago
        nextday_delta = datetime.timedelta(hours=wakeuptimes[i].hour, minutes=wakeuptimes[i].minute, seconds=wakeuptimes[i].second)
        prevday_delta = datetime.timedelta(hours=wakeuptimes[i+1].hour, minutes=wakeuptimes[i+1].minute, seconds=wakeuptimes[i+1].second)
        diff = ((nextday_delta - prevday_delta).total_seconds() / 60 / 60)

        # Trick to keep the timing difference in bounds: since a day is cyclic (24h), we compute the smallest distance as the smallest absolute Galois Field value. In other words: is it shorter to consider that I slept earlier the next day, or later? We keep the shortest value, but restore the sign.
        if abs(diff % 24) < abs(diff):
            diff = diff % 24
            # Implicit else: we keep diff not mod 24

        # Add in the list of timediffs
        timediff.append(diff * 60)

    return timediff[::-1]  # reverse order and return

timediff = pd.Series(calc_timediff(wakeuptimes), index=wakeuptimes[::-1][1:]) # [0:-1])
timediffhours = timediff/60
timediffhours

In [None]:
# Plot the raw result!
timediffhours.plot()

In [None]:
# Plot rolling standard deviation (over 3 days)
timediffhours.rolling(3).std().plot()

In [None]:
# Plot rolling standard deviation (over 15 days)
timediffhours.rolling(15).std().plot()

In [None]:
# Plot rolling standard deviation (over 31 days)
timediffhours.rolling(31).std().plot()

In [None]:
# Plot rolling median (over 3 days)
timediffhours.rolling(3).median().plot()

In [None]:
# Plot rolling median (over 15 days)
timediffhours.rolling(15).median().plot()

In [None]:
# Plot rolling median (over 31 days)
timediffhours.rolling(31).median().plot()

In [None]:
# Plot smoothed signal using a savgol filter
from scipy.signal import savgol_filter
yhat = savgol_filter(timediffhours, 51, 3) # window size 51, polynomial order 3
plt.plot(yhat)

In [None]:
# Plot with a median filter (from scipy)
from scipy.signal import medfilt
yhat = medfilt(timediffhours, 9)
plt.plot(yhat)

In [None]:
# Outliers? Hidden Naps (positive sign = sleep way later)?
timediffhours[timediffhours > 10]

In [None]:
# Outliers? Hidden naps again (negative sign = sleep way earlier)?
timediffhours[timediffhours < -10]

In [None]:
# Show descriptive stats, in hour format
print("The daily phase delay is %g hours at median and %g (varying +-%g) hours on average." % (timediffhours.median(), timediffhours.mean(), timediffhours.std()))  # median daily phase delay, in hours
timediffhours.describe()

In [None]:
# Show descriptive stats, in minutes format
print("The daily phase delay is %g minutes at median and %g (varying +-%g) minutes on average." % (timediff.median(), timediff.mean(), timediff.std()))  # median daily phase delay, in minutes
timediff.describe()

## Naps and sleep disruption analysis

Analyze the evolution of naps and sleep disruptions (holes), which are good indicators of sleep quality (the less holes and naps, the better sleep quality).

In [None]:
# Extract naps only
sleeplognaps = sleeplog_napsfixed[sleeplog_napsfixed['type_cleaned'] == 'NAP']
sleeplognaps

In [None]:
# Naps count evolution
sleeplognaps2 = sleeplognaps[['wake', 'type_cleaned']]
sleeplognaps2.loc[:, 'wake'] = pd.to_datetime(sleeplognaps2['wake'], utc=True)
napcount = sleeplognaps2.groupby([pd.Grouper(freq='W', key='wake')]).count()
# Remove first and last months because they have incomplete data
#napcount = napcount[1:-1]
napcount.plot()

In [None]:
napcount

In [None]:
# Count number of nights with holes (sleep disruption)
holescount = sleeplognight.dropna(subset=['holes'])[['wake', 'type_cleaned']].groupby([pd.Grouper(freq='W', key='wake')]).count()
holescount = holescount[1:-1]
holescount.plot()

In [None]:
holescount

In [None]:
# Plot evolution of both naps and sleep disruptions (holes)
(napcount + holescount).plot()

In [None]:
(napcount + holescount)

In [None]:
# TODO: Split into periods given a list of start dates for the next period, and name them on graph with
# vertical lines to separate
