In [1]:
from src.utils import data_load
import pandas as pd
import matplotlib.pyplot as plt
from src.s3_utils import pandas_from_csv_s3
import re
import datetime
import seaborn as sns
import numpy as np
from collections import defaultdict
import os
import pickle
import json
import math

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-zexhyzxo because the default path (/home/ubuntu/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [25]:
keys = {'oura_sleep', 'oura_activity', 'birth'}
data = data_load(data_keys=keys, wave=5)

In [26]:
oura_sleep_list = ['hr_5min', 'rmssd_5min', 'hypnogram_5min', 'bedtime_start']
oura_activity_list = ['class_5min', 'met_1min']
birth_list = ['user_id', 'birth_date', 'birth_scheduled', 'birth_gestage']

In [27]:
oura_df = data['oura_sleep'][['user_id', 'date'] + oura_sleep_list]
oura_activity = data['oura_activity'][['user_id', 'date'] + oura_activity_list]
oura_df = pd.merge(oura_df, oura_activity, on=['user_id', 'date'], how='inner')
oura_df['date'] = pd.to_datetime(oura_df['date'])
birth_df = data['birth'][birth_list]
birth_df['birth_date'] = pd.to_datetime(birth_df['birth_date'])

In [76]:
data['oura_sleep'].columns

Index(['id', 'user_id', 'identity_id', 'created_at', 'updated_at',
       'retrieved_at', 'subsource', 'event_date', 'awake', 'bedtime_end',
       'bedtime_end_delta', 'bedtime_start', 'bedtime_start_delta',
       'breath_average', 'deep', 'duration', 'efficiency', 'hr_5min',
       'hr_average', 'hr_lowest', 'hypnogram_5min', 'is_longest', 'light',
       'midpoint_at_delta', 'midpoint_time', 'onset_latency', 'period_id',
       'rem', 'restless', 'rmssd', 'rmssd_5min', 'score', 'score_alignment',
       'score_deep', 'score_disturbances', 'score_efficiency', 'score_latency',
       'score_rem', 'score_total', 'temperature_delta',
       'temperature_deviation', 'temperature_trend_deviation', 'timezone',
       'total', 'date'],
      dtype='object')

In [75]:
def process_str(series):
    """
    Process string into list

    :param series: (Type - str in Pandas Series) The string in each row of the Pandas Series
    E.g. "453" -> [4, 5, 3] OR "[32, 43, 21]" -> [32, 43, 21]
    """
    if isinstance(series, str):
        if series[1] == '[':
            return json.loads(series[1:-2])
        elif series[1].isnumeric():
            return [int(x) for x in list(series) if x.isnumeric()]

def process_min_data(df, field_names):
    """
    Process min-level data. E.g., 5min, 1min data etc.

    :param df: (Type - Pandas DataFrame) The DataFrame that needs to be processed.
    :param field_name: (Type - List) A list of field names that needs to be processed.
    """
    for field in field_names:
        df[field] = df[field].apply(process_str)


In [72]:
processed_data = defaultdict(list,{ k:[] for k in oura_sleep_list + ['user_id'] })
before_days = 60
for uid in birth_df['user_id'].unique():
    df = oura_df.loc[oura_df['user_id'] == uid].sort_values(by='date')
    if len(df) > 0:
        birth_date = birth_df.loc[birth_df['user_id'] == uid]['birth_date'].tolist()[0]
        start_date = birth_date + pd.to_timedelta(-before_days + 1, unit='d')
        df = df[(df['date'] >= start_date)]
        df = df[(df['date'] <= birth_date)]

        if len(df) >= round(before_days * 0.8):
            processed_data['user_id'].append(uid)
            process_min_data(df, oura_sleep_list + oura_activity_list))
            break
            # for col in oura_sleep_list:
            #     processed_data[col].append(df[col].tolist())

In [22]:
oura_df

Unnamed: 0,user_id,date,hr_5min,rmssd_5min,hypnogram_5min,class_5min,met_1min
0,992,2021-10-22,"[60, 62, 62, 64, 65, 66, 66, 64, 63, 63, 61, 6...","[46, 43, 41, 32, 24, 20, 25, 26, 18, 19, 27, 2...","""421111122112232222211111111143332222222222322...","""111111111111111111111111111111111111200000000...","[1.1, 0.9, 0.9, 0.9, 1.2, 0.9, 0.9, 0.9, 0.9, ..."
1,992,2021-10-21,"[0, 69, 68, 68, 68, 69, 69, 70, 70, 70, 70, 71...","[0, 33, 36, 39, 37, 35, 31, 29, 31, 36, 17, 17...","""422111111111122333222112222112232222222111122...","""112211111111111111111111111111121112000000000...","[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, ..."
2,992,2021-10-20,"[0, 61, 58, 58, 59, 61, 62, 61, 63, 62, 63, 65...","[0, 39, 50, 51, 40, 31, 36, 34, 30, 29, 27, 28...","""422111111111222222111122222222222112221222222...","""111111111123111111111111111111112000000000000...","[0.9, 0.9, 0.9, 0.9, 0.9, 1.1, 1.0, 1.0, 1.0, ..."
3,992,2021-11-10,"[64, 64, 64, 65, 64, 65, 65, 65, 65, 63, 63, 6...","[42, 40, 45, 33, 35, 28, 23, 22, 39, 31, 31, 3...","""421111111222222222211122111223342112221223332...","""111111111111111111111111111110000000000000000...","[1.1, 0.9, 0.9, 0.9, 1.1, 0.9, 0.9, 0.9, 0.9, ..."
4,992,2021-11-09,"[0, 66, 65, 70, 67, 67, 69, 71, 69, 69, 70, 71...","[0, 29, 27, 24, 24, 19, 19, 20, 26, 15, 13, 12...","""422222112111122112332222222222222222222234322...","""111111111111111111111111111111111200000000000...","[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, ..."
...,...,...,...,...,...,...,...
54163,2340,2022-10-09,"[64, 63, 61, 61, 60, 63, 65, 64, 62, 65, 64, 6...","[55, 45, 50, 60, 45, 32, 36, 39, 44, 28, 27, 4...","""422221112211233333333322222222122114222222111...","""111111111111111111111111111111111111111112122...","[1.3, 0.9, 0.9, 0.9, 1.2, 0.9, 0.9, 0.9, 1.0, ..."
54164,2340,2022-10-08,"[70, 67, 66, 65, 68, 69, 69, 68, 69, 69, 72, 7...","[35, 37, 35, 36, 20, 15, 17, 28, 28, 21, 10, 1...","""422221322222222223333322111144222222223332233...","""111111111111111111111111111111111111111111121...","[1.2, 0.9, 0.9, 0.9, 0.9, 1.0, 0.9, 1.0, 0.9, ..."
54165,2340,2022-10-07,"[0, 71, 69, 68, 68, 70, 71, 71, 73, 69, 67, 68...","[0, 30, 18, 18, 16, 15, 15, 15, 13, 23, 15, 14...","""422211111221111234334222222222442222222222113...","""111111111111111111111111112111120000000000000...","[1.2, 1.3, 0.9, 0.9, 0.9, 0.9, 0.9, 1.1, 0.9, ..."
54166,2340,2022-10-06,"[0, 73, 72, 72, 70, 70, 70, 68, 69, 69, 77, 76...","[0, 23, 23, 21, 32, 25, 19, 24, 18, 15, 11, 11...","""421112222233332211112122212223334422333442333...","""111111111111111111111111111111111111222200000...","[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, ..."


In [12]:
print(*test)

5 4 3


In [9]:
oura_df

Unnamed: 0,user_id,date,hr_5min,rmssd_5min,hypnogram_5min,class_5min,met_1min
0,992,2021-10-22,"[60, 62, 62, 64, 65, 66, 66, 64, 63, 63, 61, 6...","[46, 43, 41, 32, 24, 20, 25, 26, 18, 19, 27, 2...","""421111122112232222211111111143332222222222322...","""111111111111111111111111111111111111200000000...","[1.1, 0.9, 0.9, 0.9, 1.2, 0.9, 0.9, 0.9, 0.9, ..."
1,992,2021-10-21,"[0, 69, 68, 68, 68, 69, 69, 70, 70, 70, 70, 71...","[0, 33, 36, 39, 37, 35, 31, 29, 31, 36, 17, 17...","""422111111111122333222112222112232222222111122...","""112211111111111111111111111111121112000000000...","[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, ..."
2,992,2021-10-20,"[0, 61, 58, 58, 59, 61, 62, 61, 63, 62, 63, 65...","[0, 39, 50, 51, 40, 31, 36, 34, 30, 29, 27, 28...","""422111111111222222111122222222222112221222222...","""111111111123111111111111111111112000000000000...","[0.9, 0.9, 0.9, 0.9, 0.9, 1.1, 1.0, 1.0, 1.0, ..."
3,992,2021-11-10,"[64, 64, 64, 65, 64, 65, 65, 65, 65, 63, 63, 6...","[42, 40, 45, 33, 35, 28, 23, 22, 39, 31, 31, 3...","""421111111222222222211122111223342112221223332...","""111111111111111111111111111110000000000000000...","[1.1, 0.9, 0.9, 0.9, 1.1, 0.9, 0.9, 0.9, 0.9, ..."
4,992,2021-11-09,"[0, 66, 65, 70, 67, 67, 69, 71, 69, 69, 70, 71...","[0, 29, 27, 24, 24, 19, 19, 20, 26, 15, 13, 12...","""422222112111122112332222222222222222222234322...","""111111111111111111111111111111111200000000000...","[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, ..."
...,...,...,...,...,...,...,...
54163,2340,2022-10-09,"[64, 63, 61, 61, 60, 63, 65, 64, 62, 65, 64, 6...","[55, 45, 50, 60, 45, 32, 36, 39, 44, 28, 27, 4...","""422221112211233333333322222222122114222222111...","""111111111111111111111111111111111111111112122...","[1.3, 0.9, 0.9, 0.9, 1.2, 0.9, 0.9, 0.9, 1.0, ..."
54164,2340,2022-10-08,"[70, 67, 66, 65, 68, 69, 69, 68, 69, 69, 72, 7...","[35, 37, 35, 36, 20, 15, 17, 28, 28, 21, 10, 1...","""422221322222222223333322111144222222223332233...","""111111111111111111111111111111111111111111121...","[1.2, 0.9, 0.9, 0.9, 0.9, 1.0, 0.9, 1.0, 0.9, ..."
54165,2340,2022-10-07,"[0, 71, 69, 68, 68, 70, 71, 71, 73, 69, 67, 68...","[0, 30, 18, 18, 16, 15, 15, 15, 13, 23, 15, 14...","""422211111221111234334222222222442222222222113...","""111111111111111111111111112111120000000000000...","[1.2, 1.3, 0.9, 0.9, 0.9, 0.9, 0.9, 1.1, 0.9, ..."
54166,2340,2022-10-06,"[0, 73, 72, 72, 70, 70, 70, 68, 69, 69, 77, 76...","[0, 23, 23, 21, 32, 25, 19, 24, 18, 15, 11, 11...","""421112222233332211112122212223334422333442333...","""111111111111111111111111111111111111222200000...","[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, ..."
