In [1]:
import numpy as np
import pandas as pd
import os
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import sys
import os

In [2]:
%matplotlib inline

In [67]:
def process_user_scanwatch_activity(watch_u):
    sub_daily = []
    for watch_u_sub in watch_u:
        hr = False
        for c in watch_u_sub.columns:
            if "date" in c:
                watch_u_sub[c] = pd.to_datetime(watch_u_sub[c], utc=True).dt.date
                date_col = c
            if "Heart Rate" in c or "HR" in c:
                hr = True
        if hr:
            sub_daily.append(watch_u_sub.groupby(date_col).agg({"Heart Rate":['mean','min','max']}))
            sub_daily[-1].columns = ['_'.join(col).strip() for col in sub_daily[-1].columns.values]
        else:
            sub_daily.append(watch_u_sub.groupby(date_col).sum())
    return pd.concat(sub_daily,axis=1)
    

In [95]:
def process_user_sleep_state(sleep_u):
    for c in sleep_u.columns:
        if "date" in c:
            sleep_u[c] = pd.to_datetime(sleep_u[c], utc=True)

    sleep_u['startdate'] = sleep_u['startdate'] - pd.to_timedelta(12, unit='h')
    sleep_u['enddate'] = sleep_u['enddate'] - pd.to_timedelta(12, unit='h')
    sleep_u['sleep_state_duration'] = (sleep_u['enddate'] - sleep_u['startdate']).astype('timedelta64[m]')
    sleep_u['date'] = sleep_u['startdate'].dt.date

    one_hot = pd.get_dummies(sleep_u['Sleep state'])
    one_hot = one_hot.rename({0:"wakeup",1:"light",2:"deep",3:"REM",4:"manual",5:"unspecified"},axis=1)
    one_hot[one_hot.columns] = one_hot.values * sleep_u['sleep_state_duration'].values.reshape(-1,1)

    sleep_daily = pd.concat([sleep_u['date'],one_hot],axis=1)
    sleep_daily = sleep_daily.groupby('date').sum()
    return sleep_daily

In [85]:
def process_user_sleep_physio(sleep_u):
    for c in sleep_u.columns:
        if "date" in c:
            sleep_u[c] = pd.to_datetime(sleep_u[c], utc=True)        
    ## the timestamps of physio features are the same
    sleep_u['date'] = sleep_u['Heart Rate date'].dt.date
    
    sleep_u = sleep_u.groupby('date').agg({"Heart Rate":['mean','min','max'],"Respiration rate":['mean','min','max'],"Snoring":'sum',"sdnn_1":['mean','min','max']})
    sleep_u.columns = ['_'.join(col).strip() for col in sleep_u.columns.values]
    return sleep_u

In [100]:
## read raw sleep data of each participant
path = "../data/Sleepmat_Watch_Data/"
files = os.listdir(path)
all_users_daily = pd.DataFrame()
for i,uid in enumerate(files):
    print('user id: ',uid)

    fpath = os.path.join(path,uid)
    if not os.path.isdir(fpath):
        print('not dir: ',fpath)
        continue
    try:
        watch_u_hr = pd.read_csv(os.path.join(fpath,"ScanWatch_HR.csv"))
        watch_u_stp = pd.read_csv(os.path.join(fpath,"ScanWatch_Steps.csv"))
        watch_u_ca = pd.read_csv(os.path.join(fpath,"ScanWatch_Calories.csv"))
        watch_u = [watch_u_hr,watch_u_stp,watch_u_ca]
        
        sleep_u_state = pd.read_csv(os.path.join(fpath,"Sleep_state.csv"))
        sleep_u_physio = pd.read_csv(os.path.join(fpath,"Sleep_physio.csv"))
    except FileNotFoundError:
        print(fpath+'not exist')

    
    watch_u_daily = process_user_scanwatch_activity(watch_u)
    watch_u_daily = watch_u_daily.rename(columns={c:'Watch_'+c for c in watch_u_daily.columns})
    
    sleep_u_state_daily = process_user_sleep_state(sleep_u_state)
    sleep_u_phsio_daily = process_user_sleep_physio(sleep_u_physio)
    
    user_daily = pd.concat([watch_u_daily,sleep_u_state_daily,sleep_u_phsio_daily],axis=1)
    user_daily["user_id"] = uid
    
    
    all_users_daily = pd.concat([all_users_daily,user_daily],axis=0)
    

    
# all_users_daily['date']=pd.to_datetime(all_users_daily['date'],utc=True)
# all_users_daily=all_users_daily.reset_index()
# all_users_daily=all_users_daily.drop('index',axis=1)

user id:  7a61b537
user id:  31489056
user id:  f276ebe4
user id:  f5ca38f7
user id:  4ec9599f
user id:  9400f1b2
user id:  2c624232
user id:  811786ad
user id:  2858dcd1
user id:  bf6dbcba
user id:  .DS_Store
not dir:  ../data/Sleepmat_Watch_Data/.DS_Store
user id:  b7a56873
user id:  eb1e33e8
user id:  3d914f93
user id:  6b51d431
user id:  8527a891
user id:  c6f3ac57
user id:  ddd9cd98
user id:  73475cb4
user id:  4a44dc15
user id:  44cb730c
user id:  4b227777
user id:  1a656259
user id:  5e86cb4b
user id:  25fc0e70
user id:  ef2d127d
user id:  4e074085
user id:  4523540f
user id:  3ff1feb9
user id:  76a50887
user id:  44d88da8
user id:  2fca346d
user id:  535fa30d
user id:  6b86b273
user id:  86e50149
user id:  41cfc0d1
user id:  c2356069
user id:  0e17daca
user id:  35135aaa
user id:  e629fa65
user id:  71ee45a3
user id:  0b918943
user id:  5f9c4ab0
user id:  c6d2115a
user id:  4fc82b26
user id:  41d8fe4e
user id:  7077831c
user id:  b17ef6d1
user id:  98010bd9
user id:  e7f6c011
u

In [101]:
all_users_daily.columns

Index(['Watch_Heart Rate_mean', 'Watch_Heart Rate_min', 'Watch_Heart Rate_max',
       'Watch_Steps', 'Watch_Calories', 'wakeup', 'light', 'deep', 'REM',
       'Heart Rate_mean', 'Heart Rate_min', 'Heart Rate_max',
       'Respiration rate_mean', 'Respiration rate_min', 'Respiration rate_max',
       'Snoring_sum', 'sdnn_1_mean', 'sdnn_1_min', 'sdnn_1_max', 'user_id'],
      dtype='object')

In [104]:
all_users_daily.user_id.nunique()

65