In [2]:
%matplotlib inline

import io
from collections import OrderedDict
import datetime
import pytz

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

# plt.style.use('ggplot')
plt.style.use('fivethirtyeight')

In [3]:
from IPython.display import Markdown, display, HTML

def printhtml(string):
    display(HTML(string))

def printmd(string):
    display(Markdown(string))

# Parse the file into DataFrames

In [4]:
tables = ['ZPILLOWUSER', 'ZSLEEPNOTE', 'Z_2SLEEPSESSION', 'ZSLEEPSESSION', 'ZSLEEPSTAGEDATAPOINT', 'ZSNOOZELAB', 'ZSOUNDDATAPOINT', 'Z_PRIMARYKEY', 'Z_METADATA', 'Z_MODELCACHE', 'Y_UBMETA', 'Y_UBRANGE', 'Y_UBKVS']
# tables = ['ZSLEEPSESSION', 'ZSLEEPSTAGEDATAPOINT']

In [5]:
def getDictFromString(string):
    tokens = string.split()
    # tokens.reverse()
    row = OrderedDict()
    
    while tokens:
        # Values may be multiple words. We build up a value until we reach a 
        # separator. The value is then the concatenation of the reverse of the list.
        value_parts = [tokens.pop()]
        value_part_or_sep = tokens.pop() # ' -> '
        while value_part_or_sep != '->':
            value_parts.append(value_part_or_sep)
            value_part_or_sep = tokens.pop()
        value_parts.reverse()
        
        # Keys are one word.
        key = tokens.pop()
        row[key] = ' '.join(value_parts)
        
    return row

In [6]:
filename = 'PillowData.txt'
reading_table = None
dataframes = {}
rows = []
count = 0

with open(filename) as file:
    for line in file:
#         count += 1
#         if count >2:
#             break
        
        line = line.strip()
        
        # if we are at a new table heading
        if line in tables:
            # If we are finished reading the last table
            if reading_table:
                dataframes[reading_table] = pd.DataFrame(rows)
            
            # record which table we are reading. 
            reading_table = line
            # Init the list of rows.
            rows = []
            continue
            
        elif line == '':
            # EOF has extra newline, conveniently signalling that we need to 
            # make the dataframe for the last table.
            dataframes[reading_table] = pd.DataFrame(rows)
            del rows
            break
        
        rows.append(getDictFromString(line))

# Reverse the order of the columns in each dataframe.
for k, v in dataframes.items():
    dataframes[k] = v.iloc[:, ::-1]


# Convert each column to numeric datatype if possible.
for df in dataframes.values():
    for col in df.columns:
        try:
            df[col] = pd.to_numeric(df[col])
        except:
            pass

In [7]:
df_sessions = dataframes['ZSLEEPSESSION']
df_stages = dataframes['ZSLEEPSTAGEDATAPOINT']
df_audio = dataframes['ZSOUNDDATAPOINT']
df_session_notes = dataframes['Z_2SLEEPSESSION']
df_notes = dataframes['ZSLEEPNOTE']

# Deal with timestamps

Timestamps are dates and times, but with incorrect years.

In [8]:
def makeDateTime(timestamp, year=2018):
    return datetime.datetime.fromtimestamp(timestamp, pytz.timezone('US/Eastern')).replace(year=year)

df_stages.sort_values('ZTIMESTAMP', inplace=True)
df_stages['ZTIMESTAMP'] = df_stages['ZTIMESTAMP'].apply(makeDateTime)
df_sessions['ZSTARTTIME'] = df_sessions['ZSTARTTIME'].apply(makeDateTime)
df_sessions['ZENDTIME'] = df_sessions['ZENDTIME'].apply(makeDateTime)
df_audio.sort_values('ZTIMESTAMP', inplace=True)
df_audio['ZTIMESTAMP'] = df_audio['ZTIMESTAMP'].apply(makeDateTime)

# Plotting

In [20]:
audio_color = 'purple'
colors=['dodgerblue', 'mediumaquamarine', 'deeppink', 'darkorange']

def plotSession(session):
    df = df_stages[df_stages['ZSLEEPSESSION']==session]
    dfa = df_audio[df_audio['ZSLEEPSESSION']==session]

    x = df['ZTIMESTAMP'].values
    y = df['ZSLEEPSTAGE'].values

    # Plot initialization boilerplate
    plt.close('all')
    fig = plt.figure(figsize=(10, 4))
    ax = fig.add_subplot(111)

    # First plot the audio intervals.
    for row in dfa.iterrows():
        ax.bar(row[1]['ZTIMESTAMP'], 4, 5*np.timedelta64(row[1]['ZDURATION'], 's')/np.timedelta64(1, 'D') , 0, align='edge', color=audio_color)


    # Now plot the intervals for the different stages.
    left = x[0]
    width = datetime.timedelta(0)
    bottom = y[0]
    height = 1
    for i, val in enumerate(y):
        if val != bottom:
            box = ax.bar(left, height, width, bottom, align='edge', color=colors[int(bottom)])
            left = x[i]
            bottom = val
            width = datetime.timedelta(0)
        # The division below converts to a fraction of a day which is expected by matplotlib for
        # some reason. Basically, matplotlib doesn't support timedeltas as widths.
        width = (x[i] - left)/np.timedelta64(1, 'D') 

    # finish last box:
    box = ax.bar(left, height, width, bottom, align='edge', color=colors[int(bottom)])

    ax.set_yticks([0, 1, 2, 3, 4])
    ax.set_yticklabels(['', 'Deep', 'Light', 'REM', 'Awake'])

    plt.tick_params(
        axis='y',          # changes apply to the x-axis
        which='minor',      # both major and minor ticks are affected
        bottom=False,      # ticks along the bottom edge are off
        top=False,         # ticks along the top edge are off
        labelbottom=False)

    # Format the xticklabels.
    ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%I:%M %p', tz=pytz.timezone('US/Eastern')))

    fig.tight_layout()
    output = io.StringIO()
    plt.savefig(output, format="svg")
    plt.close()
    # The first 214 characters are unnecessary header material. The last character is \n.
    return output.getvalue()[214:-1]

In [21]:
def plotStackedPercentages(*args):
    # Plot initialization boilerplate
    plt.close('all')
    fig = plt.figure(figsize=(10, 1))
    ax = fig.add_subplot(111)
    
    height = 1
    left = 0
    bottom = 0
    
    for i, c in enumerate(reversed(colors)):
        width = args[i]
        ax.bar(left, height, width, bottom, align='edge', color=c)
        left += width
        
    plt.axis('off')
    ax.set_facecolor(None)
    
    fig.tight_layout()
    output = io.StringIO()
    plt.savefig(output, format="svg", transparent=True)
    plt.close()
    # The first 214 characters are unnecessary header material. The last character is \n.
    return output.getvalue()[214:-1]

# Summary Data Table

In [12]:
moods = ['Bad', 'Not so good', 'OK', 'Good', 'Great']
mood_emojis = ['😧', '🙁', '😐', '🙂', '😀']

In [13]:
def roundTimedelta(t):
    if hasattr(t, 'total_seconds'):
        # Must be datetime.timedelta or np.timedelta64 or equivalent.
        return datetime.timedelta(seconds=round(t.total_seconds()))
    # Must be float or equivalent.
    return datetime.timedelta(seconds=round(t))

In [18]:
def showSession(row):
    
    
    session = row['Z_PK']
    when = row['ZSTARTTIME'].strftime('%A, %B %-d, %Y at %-I:%M {%p}').format(AM='a.m.', PM='p.m.')
    total_time = row['ZENDTIME'] - row['ZSTARTTIME']
    duration = str(roundTimedelta(total_time))
    time_to_sleep = roundTimedelta(row['ZTIMEAWAKE'])
    asleep = roundTimedelta(total_time.total_seconds() - row['ZTIMEAWAKE'])
    quality = row['ZSLEEPQUALITY']
    mood = row['ZWAKEUPMOOD'] - 1
    mood_string = '{emoji} {description}'.format(emoji=mood_emojis[mood], description=moods[mood])
    
    rem = round(roundTimedelta(row['ZTIMEINREMSLEEP'])/total_time*100)
    light = round(roundTimedelta(row['ZTIMEINLIGHTSLEEP'])/total_time*100)
    deep = round(roundTimedelta(row['ZTIMEINDEEPSLEEP'])/total_time*100)
    awake = 100-(rem+light+deep)
    
    # Get notes.
    notes = []
    for note in df_session_notes[df_session_notes['Z_3SLEEPSESSION']==session].iterrows():
        note_pk = note[1]['Z_2SLEEPNOTE']
        # In classic parsimonious Pandas style, this looks up the text for a note code.
        note_text = df_notes[df_notes['Z_PK']==note_pk].iloc[0].at['ZCONTENTTEXT']
        notes.append(note_text)
    
    # Display.
    # Date
#     printmd('<div style="text-align: center">**{when}**</div>'.format(when=when))
    printmd('# {when}'.format(when=when))
    
    # Sleep stages graph
    stages_graph = plotSession(session)
    printhtml(stages_graph)
    
    # Summary Information Table
    table = """|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | {duration} | Sleep quality |{quality:.0%} |
| Time asleep | {asleep} | Mood | {mood} |
| Time to sleep | {time_to_sleep} | Notes | {notes}&nbsp; |"""
    printmd(table.format(when=when,
                         duration=duration, 
                         asleep=asleep, 
                         time_to_sleep=time_to_sleep,
                         quality=quality, 
                         mood=mood_string,
                         notes='\n'.join(notes)
                        ))
    
    
    # Stacked percentages plot.
    printhtml(plotStackedPercentages(awake, rem, light, deep))

    # Table of percentages.
    colored_dots = ['<font style="color:{color}">\u2B24</font>'.format(color=c) for c in colors]
    table_percents = """| | | | |
|-----------:|-----:|----:|----:|
| Awake | {awake}%{dota} | REM | {rem}%{dotr} |
| Light Sleep | {light}%{dotl} | Deep Sleep | {deep}%{dotd} |"""
    printmd(table_percents.format(awake=awake,
                                  rem=rem,
                                  light=light,
                                  deep=deep,
                                  dotd=colored_dots[0],
                                  dotl=colored_dots[1],
                                  dotr=colored_dots[2],
                                  dota=colored_dots[3]
                                 ))

In [22]:
for row in df_sessions.iterrows():   
    showSession(row[1])

# Thursday, April 26, 2018 at 11:11 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 6:49:34 | Sleep quality |58% |
| Time asleep | 6:34:34 | Mood | 🙁 Not so good |
| Time to sleep | 0:15:00 | Notes | &nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 17%<font style="color:darkorange">⬤</font> | REM | 9%<font style="color:deeppink">⬤</font> |
| Light Sleep | 23%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 51%<font style="color:dodgerblue">⬤</font> |

# Friday, April 27, 2018 at 11:54 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 9:20:43 | Sleep quality |70% |
| Time asleep | 8:05:43 | Mood | 🙁 Not so good |
| Time to sleep | 1:15:00 | Notes | Read a book&nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 13%<font style="color:darkorange">⬤</font> | REM | 19%<font style="color:deeppink">⬤</font> |
| Light Sleep | 30%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 38%<font style="color:dodgerblue">⬤</font> |

# Saturday, April 28, 2018 at 11:24 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 9:24:35 | Sleep quality |68% |
| Time asleep | 7:54:35 | Mood | 😐 OK |
| Time to sleep | 1:30:00 | Notes | &nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 16%<font style="color:darkorange">⬤</font> | REM | 31%<font style="color:deeppink">⬤</font> |
| Light Sleep | 27%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 26%<font style="color:dodgerblue">⬤</font> |

# Sunday, April 29, 2018 at 10:43 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 7:47:04 | Sleep quality |72% |
| Time asleep | 7:07:04 | Mood | 😐 OK |
| Time to sleep | 0:40:00 | Notes | Read a book&nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 9%<font style="color:darkorange">⬤</font> | REM | 17%<font style="color:deeppink">⬤</font> |
| Light Sleep | 30%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 44%<font style="color:dodgerblue">⬤</font> |

# Monday, April 30, 2018 at 11:26 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 6:38:07 | Sleep quality |75% |
| Time asleep | 6:23:07 | Mood | 🙂 Good |
| Time to sleep | 0:15:00 | Notes | &nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 5%<font style="color:darkorange">⬤</font> | REM | 14%<font style="color:deeppink">⬤</font> |
| Light Sleep | 36%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 45%<font style="color:dodgerblue">⬤</font> |

# Tuesday, May 1, 2018 at 10:49 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 7:46:25 | Sleep quality |73% |
| Time asleep | 7:11:25 | Mood | 😐 OK |
| Time to sleep | 0:35:00 | Notes | &nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 8%<font style="color:darkorange">⬤</font> | REM | 20%<font style="color:deeppink">⬤</font> |
| Light Sleep | 25%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 47%<font style="color:dodgerblue">⬤</font> |

# Thursday, May 3, 2018 at 12:01 a.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 7:43:24 | Sleep quality |63% |
| Time asleep | 6:23:24 | Mood | 😐 OK |
| Time to sleep | 1:20:00 | Notes | Read a book&nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 17%<font style="color:darkorange">⬤</font> | REM | 14%<font style="color:deeppink">⬤</font> |
| Light Sleep | 26%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 43%<font style="color:dodgerblue">⬤</font> |

# Thursday, May 3, 2018 at 11:31 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 6:53:34 | Sleep quality |63% |
| Time asleep | 5:51:35 | Mood | 😐 OK |
| Time to sleep | 1:01:59 | Notes | Read a book&nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 17%<font style="color:darkorange">⬤</font> | REM | 19%<font style="color:deeppink">⬤</font> |
| Light Sleep | 28%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 36%<font style="color:dodgerblue">⬤</font> |

# Friday, May 4, 2018 at 11:13 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 11:03:21 | Sleep quality |62% |
| Time asleep | 7:28:21 | Mood | 😐 OK |
| Time to sleep | 3:35:00 | Notes | Read a book&nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 32%<font style="color:darkorange">⬤</font> | REM | 17%<font style="color:deeppink">⬤</font> |
| Light Sleep | 22%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 29%<font style="color:dodgerblue">⬤</font> |

# Sunday, May 6, 2018 at 12:37 a.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 9:30:49 | Sleep quality |75% |
| Time asleep | 8:55:47 | Mood | 😀 Great |
| Time to sleep | 0:35:01 | Notes | &nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 6%<font style="color:darkorange">⬤</font> | REM | 16%<font style="color:deeppink">⬤</font> |
| Light Sleep | 26%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 52%<font style="color:dodgerblue">⬤</font> |

# Sunday, May 6, 2018 at 10:57 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 7:21:12 | Sleep quality |78% |
| Time asleep | 6:53:52 | Mood | 🙁 Not so good |
| Time to sleep | 0:27:20 | Notes | Read a book&nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 5%<font style="color:darkorange">⬤</font> | REM | 22%<font style="color:deeppink">⬤</font> |
| Light Sleep | 25%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 48%<font style="color:dodgerblue">⬤</font> |

# Monday, May 7, 2018 at 10:42 p.m.

|  | | | |
|------------:|:-----|----:|:----|
| Time in bed | 7:18:20 | Sleep quality |85% |
| Time asleep | 7:08:20 | Mood | 🙂 Good |
| Time to sleep | 0:10:00 | Notes | Read a book&nbsp; |

| | | | |
|-----------:|-----:|----:|----:|
| Awake | 3%<font style="color:darkorange">⬤</font> | REM | 19%<font style="color:deeppink">⬤</font> |
| Light Sleep | 31%<font style="color:mediumaquamarine">⬤</font> | Deep Sleep | 47%<font style="color:dodgerblue">⬤</font> |