## Imports

In [1]:
import pandas as pd
import json
from datetime import *

## Loading the data

In [2]:
# Loading sleep data from Oura json file available on https://cloud.ouraring.com/profile 
with open('/Users/user/Desktop/Msc AI RU/Internship/Code/oura_json/oura_sleep_2022-12-06T10-07-33.json') as f:
    data1 = json.load(f)

sleep_df = pd.json_normalize(data1['sleep'], max_level=2)
print('sleep_df', sleep_df.shape)

sleep_df (176, 56)


In [3]:
# Loading raw wake up data from Google Form csv
wakeup_df = pd.read_csv ('questionnaire_data/raw/Wakeup_3_v1_raw.csv')
print('wakeup_df', wakeup_df.shape)

# Loading raw bedtime data from Google Form csv
bedtime_df = pd.read_csv ('questionnaire_data/raw/Bedtime_3_v1_raw.csv')
bedtime_df.columns = ['Timestamp_bedtime' if x=='Timestamp' else x for x in bedtime_df.columns]
print('bedtime_df', bedtime_df.shape)

wakeup_df (151, 9)
bedtime_df (149, 10)


## Combining the three dataframes

In [4]:
sleep_start_date = sleep_df.iloc[0]['day']
sleep_end_date = sleep_df.iloc[-1]['day']

wakeup_start_date = wakeup_df.iloc[0]['Timestamp']
wakeup_end_date = wakeup_df.iloc[-1]['Timestamp']

bedtime_start_date = bedtime_df.iloc[0]['Timestamp_bedtime']
bedtime_end_date = bedtime_df.iloc[-1]['Timestamp_bedtime']

def remove_hour(string_date):
    split_string = string_date.split(" ")
    return split_string[0]

def convert_sleep_date(sleep_date):
    y, m, d = [int(x) for x in sleep_date.split('-')]
    return date(y, m, d)

def convert_questionnaire_date(quest_date):
    m, d, y = [int(x) for x in remove_hour(quest_date).split('/')]
    return date(y, m, d)

def compare_two_dates(date1, date2):
    if date1 == date2:
        earliest_date = latest_date = date1
    elif date1 > date2:
        earliest_date = date2
        latest_date = date1  
    else:
        earliest_date = date1
        latest_date = date2
    return earliest_date, latest_date 

def compare_three_dates(sleep_date, wakeup_date, bedtime_date, earliest):
    date1 = convert_sleep_date(sleep_date)
    date2 = convert_questionnaire_date(wakeup_date)
    date3 = convert_questionnaire_date(bedtime_date)
    
    earliest_date, latest_date = compare_two_dates(date1, date2)

    if earliest_date > date3:
        earliest_date = date3
    if latest_date < date3:
        latest_date = date3

    if earliest:
        return earliest_date
    else:
        return latest_date

latest_start = compare_three_dates(sleep_start_date, wakeup_start_date, bedtime_start_date, earliest = False)

earliest_end = compare_three_dates(sleep_end_date, wakeup_end_date, bedtime_end_date, earliest = True)
print('latest date', latest_start, "\nearliest date", earliest_end)

def find_missing_days(df, start_date, end_date):
    return df.reindex(pd.date_range(start_date, end_date)).isnull().all(1)

missing_sleep_days = find_missing_days(sleep_df, latest_start, earliest_end)
missing_wakeup_days = find_missing_days(wakeup_df, latest_start, earliest_end)
missing_bedtime_days = find_missing_days(bedtime_df, latest_start, earliest_end)

latest date 2022-06-19 
earliest date 2022-11-19


In [5]:
print(missing_sleep_days)

2022-06-19    True
2022-06-20    True
2022-06-21    True
2022-06-22    True
2022-06-23    True
              ... 
2022-11-15    True
2022-11-16    True
2022-11-17    True
2022-11-18    True
2022-11-19    True
Freq: D, Length: 154, dtype: bool


In [6]:
print(missing_sleep_days[0])

True


In [18]:
complete_sleep_df = pd.DataFrame()
# printed = False
counter = 0 
for index, row in sleep_df.iterrows():
    # print(index)
    # print(sleep_df.iloc[index]['day'])
    if (convert_sleep_date(row['day']) < latest_start):
        continue

    # if (printed == False):
    #     print (convert_sleep_date(row['day']))
    #     printed = True

    if missing_sleep_days[counter] == True:
        complete_sleep_df = complete_sleep_df.append(row)
    else:
        # print(missing_sleep_days[index])
        complete_sleep_df = complete_sleep_df.append(pd.Series(), ignore_index=True)
        # if (printed == False):
        #     print (complete_sleep_df)
        #     printed = True
        counter += 1

    latest_date = convert_sleep_date(row['day'])
    if (convert_sleep_date(row['day']) == earliest_end):
        break

print(complete_sleep_df)


     average_breath  average_breath_variation  average_heart_rate  \
10           16.500                     2.625               56.29   
11              NaN                       NaN                 NaN   
12           16.500                     2.625               56.05   
13           16.750                     2.625               56.07   
14           16.500                     2.625               57.22   
..              ...                       ...                 ...   
154          15.750                     2.625               55.67   
155          15.250                     2.250               57.62   
156          15.750                     2.500               63.72   
157          15.875                     2.750               55.93   
158          15.625                     2.875               60.12   

     average_hrv  awake_time                bedtime_end  bedtime_end_delta  \
10          78.0      2730.0  2022-06-19T07:41:27+02:00            27687.0   
11           Na

In [17]:
complete_wakeup_df = pd.DataFrame()
# printed = False
counter = 0 
for index, row in wakeup_df.iterrows():
    # print(index)
    # print(wakeup_df.iloc[index]['Timestamp'])
    if (convert_questionnaire_date(row['Timestamp']) < latest_start):
        continue

    # if (printed == False):
    #     print (convert_questionnaire_date(row['Timestamp']))
    #     printed = True
    
    if missing_wakeup_days[counter] == True:
        complete_wakeup_df = complete_wakeup_df.append(row)
    else:
        # print(missing_wakeup_days[index])
        complete_wakeup_df = complete_wakeup_df.append(pd.Series(), ignore_index=True)
        # if (printed == False):
        #     print (complete_wakeup_df)
        #     printed = True
        counter += 1

    latest_date = convert_questionnaire_date(row['Timestamp'])
    if (convert_questionnaire_date(row['Timestamp']) == earliest_end):
        break

print(complete_wakeup_df)

    Did anything in particular prevent you from sleeping better?  \
0                                              Nothing             
1                                              Nothing             
2                                              Nothing             
3                                              Nothing             
4                                              Nothing             
..                                                 ...             
145                                            Nothing             
146                                            Nothing             
147                                            Nothing             
148                                            Nothing             
149                                            Nothing             

    Did anything in particular prevent you from sleeping earlier last night?  \
0                                                  Yes                         
1                      

In [19]:
complete_bedtime_df = pd.DataFrame()
# printed = False
counter = 0 
for index, row in bedtime_df.iterrows():
    # print(index)
    # print(bedtime_df.iloc[index]['Timestamp_bedtime'])
    if (convert_questionnaire_date(row['Timestamp_bedtime']) < latest_start):
        continue

    # if (printed == False):
    #     print (convert_questionnaire_date(row['Timestamp_bedtime']))
    #     printed = True
    
    if missing_bedtime_days[counter] == True:
        complete_bedtime_df = complete_bedtime_df.append(row)
    else:
        # print(missing_bedtime_days[index])
        complete_bedtime_df = complete_bedtime_df.append(pd.Series(), ignore_index=True)
        # if (printed == False):
        #     print (complete_bedtime_df)
        #     printed = True
        counter += 1

    latest_date = convert_questionnaire_date(row['Timestamp_bedtime'])
    if (convert_questionnaire_date(row['Timestamp_bedtime']) == earliest_end):
        break

print(complete_bedtime_df)

    Did you take medication that might affect your sleep?  \
1                                                   No      
2                                                   No      
3                                                   No      
4                                                   No      
5                                                   No      
..                                                 ...      
144                                                 No      
145                                                 No      
146                                                 No      
147                                                 No      
148                                                 No      

     How much did you eat in the last 3 hours before going to bed?  \
1                                                  0.0               
2                                                  0.0               
3                                                  0.0   

In [None]:
df = pd.concat([sleep_df, wakeup_df, bedtime_df])

df