# Extract Oura Data

In [59]:
import pandas as pd
import json
import os
import datetime

## The following functions are used within this notebook


def sec2hours(secs):
    """
    Function takes in an integer value of seconds and returns a datetime object in hh:mm:ss
    Basically just to reduce typing in the code
    """
    return str(datetime.timedelta(seconds=secs))


def sleepData(rawdata):
    """
    Function sleepData takes in raw data from the Oura JSON file, separates the desired sleep data, and
    returns a pandas Dataframe with a datetime index of dates in YYYY-MM-DD.
    """
    sdata = rawdata['sleep']
    
    slist = []
    for item in range(len(sdata)):
        sdate = sdata[item]['summary_date']
        sleep = sdata[item]['score']
        resprate = round(sdata[item]['breath_average'], 1)  # breaths/minute
        awake = sdata[item]['awake']   # seconds
        rem = sdata[item]['rem']   # seconds
        deep = sdata[item]['deep']   # seconds
        light = sdata[item]['light']   # seconds
        sleepTot = sdata[item]['total']   # seconds
        hravg = int(sdata[item]['hr_average'])
        hrlow = sdata[item]['hr_lowest']   # bpm
        hrvavg = sdata[item]['rmssd']
        hrvhigh = max(sdata[item]['rmssd_5min'])
        
        slist.append([sdate, sleep, resprate, sec2hours(awake), sec2hours(rem), sec2hours(deep), sec2hours(light),
                      sec2hours(sleepTot), hravg, hrlow, hrvavg, hrvhigh])
    scolnames = ['Date', 'Sleep Score', 'Respiratory Rate', 'Awake', 'Rem Sleep', 'Deep Sleep',
                    'Light Sleep', 'Total Sleep', 'HR avg', 'HR low', 'HRV avg', 'HRV high']
    sleepDF = pd.DataFrame(slist, columns=scolnames)
    sleepDF.Date = pd.to_datetime(sleepDF.Date)
    sleepDF = sleepDF.set_index('Date')
        
    return sleepDF



def readinessData(rawdata):
    """
    readinessData takes in raw data from an Oura JSON file, separates out the desired readiness data, and
    returns a pandas Dataframe with a datetime index of dates in YYYY-MM-DD.
    """
    rdata = rawdata['readiness']
    
    rlist = []
    for item in range(len(rdata)):
        rdate = rdata[item]['summary_date']
        rscore = rdata[item]['score']
        rlist.append([rdate, rscore])
    rcolnames = ['Date', 'Readiness Score']
    readyDF = pd.DataFrame(rlist, columns=rcolnames)
    readyDF.Date = pd.to_datetime(readyDF.Date)
    readyDF = readyDF.set_index('Date')
    return readyDF



Open the Oura JSON file and load the data.  Run the sleepData() and readinessData() functions to create pandas Dataframes for the sleep and readiness data.  There was a readiness duplicate date on 2019-06-08, so a line was written to discard the wrong duplicate.  Not sure why that date had a duplicate.  There was also a date that had a readiness value but no sleep data, the 'inner' join while concatenating the dataframes gets rid of that problem.

In [85]:
# Open the Oura JSON file and load the data
with open('data_oura.json') as file:
    data = json.load(file)
    
# Run the sleepData and readinessData functions on the raw Oura data, returning a couple of dataframes
sleepDF = sleepData(data)
readyDF = readinessData(data)

# Remove the unwanted duplicate index in readiness data
readyDF = readyDF[~readyDF.index.duplicated(keep='last')]

# Join the intersection of Readiness and Sleep Dataframes
ouraDF = pd.concat([readyDF, sleepDF], axis=1, join='inner')
ouraDF.to_csv('clean_oura_data.csv')

In [82]:
# This picks out the index in the readiness data that isn't in the sleep data
# It is filtered out through the 'inner' join during concatenation

#readyDF[~readyDF.index.isin(sleepDF.index)]