In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from datenspende.utils import query_ch_df, query_pg_df
import datetime
from datetime import date
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## functions

In [2]:
# get all epoch data on user
def get_epoch(user_ids):
    
   
    ft = tuple(user_ids)    
    df = query_ch_df(
            #"""DESCRIBE TABLE rocs.test_table"""
        #"""SELECT * FROM rocs.vital_data_epoch WHERE vital_data_epoch.customer IN {formatter}"""
        """SELECT * FROM rocs.vital_data_epoch WHERE vital_data_epoch.customer IN {}""".format(ft) 
        #"""SELECT * FROM rocs.vital_data_epoch LIMIT 5000"""
        )
    return df

In [3]:
# get age
def get_demo(user_ids):
    
    if isinstance(user_ids, int) or isinstance(user_ids, np.int64):
        formatter = f'({user_ids})'
    elif len(user_ids) == 1:
        formatter = f'({user_ids[0]})'
    else:
        formatter = tuple(user_ids) 
    
    
    
    query = f"""
    SELECT 
        user_id, salutation, birth_date, weight, height, creation_timestamp
    FROM 
        rocs.datenspende.users
    WHERE 
        users.user_id IN {formatter} 
   
    """ 

    users = query_pg_df(query)
    users.creation_timestamp = pd.to_datetime(users['creation_timestamp'],unit='ms') 
    users.creation_timestamp = users.creation_timestamp.dt.date
    users['age'] = np.floor((2023 + 1 / 12) - users['birth_date'] + 2.5)

    
    
    return users

In [4]:
# get sex
def get_info(user_ids):
    
    # Make sure that the IN-condition for the SQL query either takes the form '(userid)' in the case
    # of a single requested user id or '(userid1, userid2, ..., useridN)' in the case of multiple
    # requested user ids
    if isinstance(user_ids, int) or isinstance(user_ids, np.int64):
        formatter = f'({user_ids})'
    elif len(user_ids) == 1:
        formatter = f'({user_ids[0]})'
    else:
        formatter = tuple(user_ids)
    
    
    
    qu = f"""
    select
        
        a.user_id,
        a.created_at,
        a.question,
        a.element
        
    from 
        rocs.datenspende.answers a
    where 
        a.user_id IN {formatter}
    AND
        a.question = 127
        

    """

    users = query_pg_df(qu)
    users.created_at = pd.to_datetime(users['created_at'],unit='ms')
    users.created_at = users.created_at.dt.date
    
    
    return users

In [5]:
value_types = pd.read_csv('epoch_value_types.csv')
value_types = value_types.rename(columns={"id": "type"})

In [6]:
def add_s(st, end):
    
    if st == end:
        end += pd.Timedelta(seconds=1)
    return end

In [7]:
def clean_endv(st, end):
    if end == pd.Timestamp('1970-01-01 00:00:00'):
        end = st
    return end

In [8]:
# modify epoch dataframe to get time info and bin in 15 minute intervals
def modify_df(user_df):
    
    user_df = udf.copy()
    user_df.startTimestamp = user_df.startTimestamp//1000
    user_df.endTimestamp = user_df.endTimestamp//1000
    user_df.startTimestamp = user_df.startTimestamp.apply(lambda x: datetime.datetime.fromtimestamp(x))
    user_df.endTimestamp = user_df.endTimestamp.apply(lambda x: datetime.datetime.fromtimestamp(x))
    user_df['date'] = user_df.startTimestamp.apply(lambda x: x.date())
    
    user_df = user_df.merge(value_types, how='left', on = 'type')
    #user_df['entries'] = 1
    
    user_df['tdelta'] = user_df['endTimestamp'] - user_df['startTimestamp']
    user_df['tdelta_min'] = user_df['tdelta'].apply(lambda x: x.total_seconds()//60)
    user_df['tdelta_sec'] = user_df['tdelta'].apply(lambda x: x.total_seconds())
    user_df['date'] = pd.to_datetime(user_df['date'])
    
    bins = list(range(0,97))
    
    
    # if end timestamp = 1970-01-01 use start timestamp
    # if tdeltasec = 0 add one second
    
    user_df['endTimestamp'] = user_df.apply(lambda x: clean_endv(x['startTimestamp'], x['endTimestamp']),axis=1)
    user_df['endTimestamp'] = user_df.apply(lambda x: add_s(x['startTimestamp'], x['endTimestamp']),axis=1)
    
    user_df['Interval'] = user_df.apply(lambda x: pd.Interval(x['startTimestamp'],x['endTimestamp'],closed='right'), axis=1)   
    
    user_df['Time Bin 1'] = pd.cut((user_df.startTimestamp.dt.minute//15) + (user_df.startTimestamp.dt.hour * 4), bins,right=False)
    user_df['Time Bin 2'] = pd.cut((user_df.endTimestamp.dt.minute//15) + (user_df.endTimestamp.dt.hour * 4), bins,right=False)
    

    
    return user_df

In [9]:
def combine_overlaps(pdt, vt):
    # per day find measurements which were recorded in overlapping time intervals 
    # combine them by summing up steps and adjusting the interval
    # add new entry to df and drop old entries
    pdt = pdt.copy()
    pdt.index = np.arange(1, len(pdt) + 1)
    drop_i = []
    cps = []    
    for iv in pdt['Interval']:
        i = pdt[pdt['Interval'] == iv].index
        ov = pdt[pdt['Time Bin 1'] == pdt['Time Bin 1'].loc[i].values[0]].copy()
        ov['ov'] = ov['Interval'].apply(lambda x: x.overlaps(iv))
        ov = ov[ov['ov']==True]
        if len(ov) > 1:
            i_min_s = ov[ov['startTimestamp'] == min(ov['startTimestamp'])].index[0]
            i_max_e = ov[ov['endTimestamp'] == max(ov['endTimestamp'])].index[0]            
            ts = (ov['endTimestamp'].loc[i_min_s] - ov['startTimestamp'].loc[i_min_s]).total_seconds()
            
            if vt == 'doubleValue':
                td = (ov['startTimestamp'].loc[i_max_e]- ov['startTimestamp'].loc[i_min_s]).total_seconds()
                f1 = td/ts
                comb_val = (f1 * ov[vt].loc[i_min_s]) + ov[vt].loc[i_max_e]
                comb_val = comb_val
            elif vt == 'longValue':
                te = (ov['endTimestamp'].loc[i_max_e]- ov['startTimestamp'].loc[i_max_e]).total_seconds()
                tt = (ov['endTimestamp'].loc[i_max_e]- ov['startTimestamp'].loc[i_min_s]).total_seconds()
                comb_val = np.average([ov[vt].loc[i_min_s],  ov[vt].loc[i_max_e]], weights=[ts/tt, te/tt])
            elif vt == 'booleanValue':
                
                comb_val = ov[vt].loc[i_min_s]
                
            cp = ov.iloc[0].copy()
            cp[vt] = comb_val
            cp['ov'] = False
            cp['startTimestamp'] = pd.Timestamp(ov['startTimestamp'].loc[i_min_s])
            cp['endTimestamp'] = pd.Timestamp(ov['endTimestamp'].loc[i_max_e])
            cp['Interval'] = pd.Interval(cp['startTimestamp'],cp['endTimestamp'],closed='neither')
            cp['Time Bin 1'] = ov['Time Bin 1'].loc[i_min_s]
            cp['Time Bin 2'] = ov['Time Bin 2'].loc[i_max_e]
            #pdt = pdt.drop(ov.index,axis=0)
            drop_i.append(ov.index)
            cps.append(cp)
            
            #pdt.loc[len(pdt)+1] = cp
            #print(i)
           
        pdt['ov'] = False
    drop_i = list(set(sum([list(a) for a in drop_i], [])))
    pdt = pdt.drop(drop_i,axis=0)
    for c in cps:
        pdt.loc[len(pdt)+1] = c
    
    return pdt

In [10]:
# for measurements which fall into multiple time bins, calculate fraction of measured steps for each one of the time bins
# for every unique time bin create a new row and drop the old one
def multiple_bins(i_multiple_bins, dn, time_bins, bin_s, bin_e,vt):
    cps = []
    for mbi in i_multiple_bins:
        mult_bin = dn.loc[mbi].copy()
        dn = dn.drop(mbi,axis=0)
        # get index of first time bin and last time bin of the multiple time bins to get all bins in between
        first_bini = [t for t in range(len(time_bins)) if time_bins[t] == mult_bin['Time Bin 1']][0]
        last_bini = [t for t in range(len(time_bins)) if time_bins[t] == mult_bin['Time Bin 2']][0]

        t = mult_bin['endTimestamp']-mult_bin['startTimestamp'] # duration of the measurement
        for b in range(first_bini, last_bini+1):
            cp = mult_bin.copy()

            if b == first_bini:
                dur = datetime.datetime.combine(datetime.date.min, datetime.datetime.strptime(bin_e[b], '%H:%M:%S').time()) - datetime.datetime.combine(datetime.date.min,  mult_bin['startTimestamp'].time())
                if vt == 'doubleValue':
                    val = (dur/t) * mult_bin[vt]
                elif (vt == 'longValue') or (vt == 'booleanValue'):
                    val = mult_bin[vt]
                cp['startTimestamp'] = mult_bin['startTimestamp']
                cp['endTimestamp'] = cp['startTimestamp'] + dur

            elif b == last_bini:
                # in order to account for that second (when there are only two time bins the measurement falls into) time bin starts at the same time that first one ends, add one second to duration w factor a
                if len(range(first_bini, last_bini+1)) > 2: 
                    a = datetime.timedelta(seconds=0) 
                else:
                    a = datetime.timedelta(seconds=1)
                dur = datetime.datetime.combine(datetime.date.min, mult_bin['endTimestamp'].time() ) - datetime.datetime.combine(datetime.date.min,  datetime.datetime.strptime(bin_s[b], '%H:%M:%S').time())
                
                if vt == 'doubleValue':
                    val = ((dur+a)/t) * mult_bin[vt]
                elif (vt == 'longValue') or (vt == 'booleanValue'):
                    val = mult_bin[vt]
                cp['startTimestamp'] = mult_bin['endTimestamp'] - dur
                cp['endTimestamp'] = mult_bin['endTimestamp']
            else:
                # in order to account for that the n'th (when there are more than two time bins the measurement falls into) time bin starts at the same time that n-1 one ends, add one second to duration w factor a
                if len(range(first_bini, last_bini+1)) < 2:
                    a = datetime.timedelta(seconds=0)
                else:
                    a = datetime.timedelta(seconds=1)
                bin_st = datetime.datetime.strptime(bin_s[b], '%H:%M:%S').time()
                bin_et = datetime.datetime.strptime(bin_e[b], '%H:%M:%S').time() 
                dur = (datetime.datetime.combine(datetime.date.min, bin_et) - datetime.datetime.combine(datetime.date.min, bin_st ))#+1
                if vt == 'doubleValue':
                    al = ((dur+a)/t) * mult_bin[vt]
                elif (vt == 'longValue') or (vt == 'booleanValue'):
                    val = mult_bin[vt]
                cp['startTimestamp'] = mult_bin['date'] + datetime.timedelta(hours=bin_st.hour, minutes=bin_st.minute, seconds=bin_st.second)
                cp['endTimestamp'] = mult_bin['date'] + datetime.timedelta(hours=bin_et.hour, minutes=bin_et.minute, seconds=bin_et.second)
            cp[vt] = val
            cp['Time Bin 1'] = time_bins[b]
            cp['Time Bin 2'] = time_bins[b]
            cp['Interval'] = pd.Interval(cp['startTimestamp'],cp['endTimestamp'],closed='neither')
            cps.append(cp)

    # append new rows to dataframe
    dn.index = np.arange(1, len(dn) + 1)
    for c in cps:
        dn.loc[len(dn)+1] = c
    return dn

In [11]:
# get starting and end times of hourly bins
bin_s = []
bin_e = []
for h in range(24):
    for m in range(4):
    
        time_string_s = '%02d:%02d:%02d' % (h,m * 15,0)
        time_string_e = '%02d:%02d:%02d' % (h,(m * 15) + 14,59)
        bin_s.append(time_string_s)
        bin_e.append(time_string_e)

In [32]:
# create df for steps, hr and sleepstatebinary
# one entry per day and time bin
# combine with test info 
def get_df(udf):
    
    dates = []
    tb = []

    for d in udf['date'].unique():
        for t in time_bins:
            dates.append(d)
            tb.append(t)
            
    df_comb = pd.DataFrame(data={'date':dates ,'Time Bin 1':tb})
    
    for v in [1000, 3000, 2000]:
        vt_i = udf[udf['type'] == v]['valueType'].iloc[0]
        if vt_i == 0:
            vt = 'doubleValue'
        elif vt_i == 1:
            vt = 'longValue'
        elif vt_i == 2:
            vt = 'booleanValue'

        u_val_df = udf[udf['type'] == v][[vt, 'date', 'code', 'Time Bin 1', 'Time Bin 2', 'Interval','startTimestamp', 'endTimestamp']]
        val_code = u_val_df['code'].iloc[0]

        pdt_df = []
        for date in u_val_df['date'].unique():
            pdt_df.append(combine_overlaps(u_val_df[u_val_df['date']==date],vt)) 
        dn = pd.concat(pdt_df, axis=0)
        # re-index
        dn.index = np.arange(1, len(dn) + 1)
        

        if vt_i == 0:
            i_multiple_bins = dn[dn['Time Bin 1']!= dn['Time Bin 2']].index
            dn = multiple_bins(i_multiple_bins, dn, time_bins, bin_s, bin_e,vt) 
            dng = dn[['date', 'Time Bin 1',vt]].groupby(['date', 'Time Bin 1']).sum( ).reset_index(level=[0,1])
            dng = dng.rename(columns={vt:'steps'})
                    
            df_comb = pd.merge(df_comb,dng, how='outer', on=['date', 'Time Bin 1'])
        
        elif vt_i == 1:
            
            dn['s'] = dn['Interval'].apply(lambda x: x.length.total_seconds())
            i_multiple_bins = dn[dn['Time Bin 1']!= dn['Time Bin 2']].index
            i_multiple_bins = dn[(dn['Time Bin 1']!= dn['Time Bin 2'])&(dn['s'] != 60)].index
            dn = multiple_bins(i_multiple_bins, dn, time_bins, bin_s, bin_e,vt) 
    
            dng = dn[['date', 'Time Bin 1',vt]].groupby(['date', 'Time Bin 1']).mean( ).reset_index(level=[0,1])
            dng = dng.rename(columns = {vt:'hr'})         
                       
            #merge w df above
            df_comb = pd.merge(df_comb, dng, how='outer', on=['date', 'Time Bin 1'])
            
        elif vt_i == 2:
            i_multiple_bins = dn[(dn['Time Bin 1']!= dn['Time Bin 2'])].index
            dn = multiple_bins(i_multiple_bins, dn, time_bins, bin_s, bin_e,vt) 
            dng = dn[['date', 'Time Bin 1',vt]].groupby(['date', 'Time Bin 1']).mean( ).reset_index(level=[0,1])
            dng = dng.rename(columns = {vt:'sleep'})    
            #merge w df above
            df_comb = pd.merge(df_comb, dng, how='outer', on=['date', 'Time Bin 1'])
    return df_comb

In [13]:
def phases(week):
    if week < 0:
        ph = 0
    elif (week >= 0 and week <= 4):
        ph = 1
    elif (week >= 5 and week <= 12):
        ph = 2
    elif week > 12:
        ph = 3
    return ph

In [14]:
lc_u = np.load('uid_per_shb_fatigue.npy')

In [15]:
pos_test = pd.read_csv('pos_testdate.csv',index_col=0)

In [None]:
for us_id in lc_u:
    sex = 'female' if get_info([us_id])['element'].values[0] == 773 else 'male'
    age = get_demo([us_id])['age'].values[0]

    udf = get_epoch([us_id])
    udf = udf.rename(columns={"customer": "user_id"})
    udf = modify_df(udf)
    time_bins = sorted(udf['Time Bin 1'].unique())

    df_comb = get_df(udf)

    df_comb['day_of_week'] = pd.to_datetime(df_comb['date']).dt.dayofweek
    df_comb['weekend'] = df_comb['day_of_week'].apply(lambda x: True if x >= 4 else False)

    df_comb['dt'] = pd.to_datetime(pos_test['dt'][pos_test['user_id'].isin([us_id])].iloc[0])
    td = pd.to_datetime(df_comb['date']) - df_comb['dt'] 
    df_comb['week_totest'] = td.apply(lambda x: -(x.days// - 7))

    df_comb['time'] = df_comb['Time Bin 1'].map(dict(zip(list(df_comb['Time Bin 1'].unique()),list(range(97)))))
    df_comb['phase'] = df_comb['week_totest'].apply(lambda x: phases(x))

    if sex == 'female':
        MAX_HR = 206 - (0.88 * age)
    else:
        MAX_HR = 208 - (0.7 * age)

    df_comb['d to max hr [%]'] = (df_comb['hr'])/ MAX_HR * 100

    df_comb.to_csv('user_df/'+sex+str(age)+str(us_id)+'.csv')

  return pd.read_sql(query, conn)
  return pd.read_sql(query, conn)
  return pd.read_sql(query, conn)
  return pd.read_sql(query, conn)
  return pd.read_sql(query, conn)
  return pd.read_sql(query, conn)


In [23]:
    
    dates = []
    tb = []

    for d in udf['date'].unique():
        for t in time_bins:
            dates.append(d)
            tb.append(t)
            
    df_comb = pd.DataFrame(data={'date':dates ,'Time Bin 1':tb})
    
    for v in [1000, 3000, 2000]:
        vt_i = udf[udf['type'] == v]['valueType'].iloc[0]
        if vt_i == 0:
            vt = 'doubleValue'
        elif vt_i == 1:
            vt = 'longValue'
        elif vt_i == 2:
            vt = 'booleanValue'

        u_val_df = udf[udf['type'] == v][[vt, 'date', 'code', 'Time Bin 1', 'Time Bin 2', 'Interval','startTimestamp', 'endTimestamp']]
        val_code = u_val_df['code'].iloc[0]

        pdt_df = []
        for date in u_val_df['date'].unique():
            pdt_df.append(combine_overlaps(u_val_df[u_val_df['date']==date],vt)) 
        dn = pd.concat(pdt_df, axis=0)
        # re-index
        dn.index = np.arange(1, len(dn) + 1)
        

        if vt_i == 0:
            i_multiple_bins = dn[dn['Time Bin 1']!= dn['Time Bin 2']].index
            dn = multiple_bins(i_multiple_bins, dn, time_bins, bin_s, bin_e,vt) 
            dng = dn[['date', 'Time Bin 1',vt]].groupby(['date', 'Time Bin 1']).sum( ).reset_index(level=[0,1])
            dng = dng.rename(columns={vt:'steps'})
                    
            df_comb = pd.merge(df_comb,dng, how='outer', on=['date', 'Time Bin 1'])
        
        elif vt_i == 1:
            
            dn['s'] = dn['Interval'].apply(lambda x: x.length.total_seconds())
            i_multiple_bins = dn[dn['Time Bin 1']!= dn['Time Bin 2']].index
            i_multiple_bins = dn[(dn['Time Bin 1']!= dn['Time Bin 2'])&(dn['s'] != 60)].index
            dn = multiple_bins(i_multiple_bins, dn, time_bins, bin_s, bin_e,vt) 
    
            dng = dn[['date', 'Time Bin 1',vt]].groupby(['date', 'Time Bin 1']).mean( ).reset_index(level=[0,1])
            dng = dng.rename(columns = {vt:'hr'})         
                       
            #merge w df above
            df_comb = pd.merge(df_comb, dng, how='outer', on=['date', 'Time Bin 1'])
            
        elif vt_i == 2:
            i_multiple_bins = dn[(dn['Time Bin 1']!= dn['Time Bin 2'])].index
            dn = multiple_bins(i_multiple_bins, dn, time_bins, bin_s, bin_e,vt) 
            dng = dn[['date', 'Time Bin 1',vt]].groupby(['date', 'Time Bin 1']).mean( ).reset_index(level=[0,1])
            dng = dng.rename(columns = {vt:'sleep'})    
            #merge w df above
            df_comb = pd.merge(df_comb, dng, how='outer', on=['date', 'Time Bin 1'])

KeyError: 'longValue'

In [26]:
dn

Unnamed: 0,longValue,date,code,Time Bin 1,Time Bin 2,Interval,startTimestamp,endTimestamp,ov,s
1,96.0,2020-12-29,HeartRate,"[48, 49)","[48, 49)","(2020-12-29 12:04:14, 2020-12-29 12:14:51]",2020-12-29 12:04:14,2020-12-29 12:14:51,False,637.0
2,123.0,2021-05-06,HeartRate,"[45, 46)","[45, 46)","(2021-05-06 11:16:31, 2021-05-06 11:22:04]",2021-05-06 11:16:31,2021-05-06 11:22:04,False,333.0
3,104.0,2021-09-22,HeartRate,"[11, 12)","[11, 12)","(2021-09-22 02:46:47, 2021-09-22 02:58:18]",2021-09-22 02:46:47,2021-09-22 02:58:18,False,691.0
4,101.0,2021-12-14,HeartRate,"[14, 15)","[14, 15)","(2021-12-14 03:30:48, 2021-12-14 03:37:52]",2021-12-14 03:30:48,2021-12-14 03:37:52,False,424.0
5,98.0,2022-04-04,HeartRate,"[11, 12)","[11, 12)","(2022-04-04 02:46:26, 2022-04-04 02:57:06]",2022-04-04 02:46:26,2022-04-04 02:57:06,False,640.0
...,...,...,...,...,...,...,...,...,...,...
1956,103.0,2022-12-09,HeartRate,"[14, 15)","[14, 15)","(2022-12-09 03:44:15, 2022-12-09 03:44:59)",2022-12-09 03:44:15,2022-12-09 03:44:59,False,1027.0
1957,103.0,2022-12-09,HeartRate,"[15, 16)","[15, 16)","(2022-12-09 03:45:00, 2022-12-09 03:59:59)",2022-12-09 03:45:00,2022-12-09 03:59:59,False,1027.0
1958,103.0,2022-12-09,HeartRate,"[16, 17)","[16, 17)","(2022-12-09 04:00:00, 2022-12-09 04:01:22)",2022-12-09 04:00:00,2022-12-09 04:01:22,False,1027.0
1959,127.0,2022-12-12,HeartRate,"[14, 15)","[14, 15)","(2022-12-12 03:30:56, 2022-12-12 03:44:59)",2022-12-12 03:30:56,2022-12-12 03:44:59,False,917.0


In [31]:
dn[['date', 'Time Bin 1',vt]].groupby(['date', 'Time Bin 1']).mean( ).reset_index([0,1])

Unnamed: 0,date,Time Bin 1,longValue
0,2020-08-03,"[60, 61)",102.0
1,2020-08-03,"[61, 62)",102.0
2,2020-08-03,"[62, 63)",102.0
3,2020-08-04,"[57, 58)",118.0
4,2020-08-04,"[58, 59)",118.0
...,...,...,...
1927,2022-12-09,"[14, 15)",103.0
1928,2022-12-09,"[15, 16)",103.0
1929,2022-12-09,"[16, 17)",103.0
1930,2022-12-12,"[14, 15)",127.0


In [None]:
plt.figure(figsize=(25,10))
df = dfs[1]
w = -10
plt.plot(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['steps'].values)
plt.xticks(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['Time Bin 1'],rotation
           =90);
w = 0
plt.plot(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['steps'].values)
plt.xticks(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['Time Bin 1'],rotation
           =90);

w = 10
plt.plot(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['steps'].values)
plt.xticks(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['Time Bin 1'],rotation
           =90);

plt.legend(['-10 w', '0 w', '10 w']);

In [None]:
plt.figure(figsize=(25,10))
df = dfs[1]
w = -10
plt.plot(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['hr'].values)
plt.xticks(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['Time Bin 1'],rotation
           =90);
w = 0
plt.plot(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['hr'].values)
plt.xticks(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['Time Bin 1'],rotation
           =90);

w = 10
plt.plot(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['hr'].values)
plt.xticks(range(len(df[df['week_totest']==w])),df[df['week_totest']==w]['Time Bin 1'],rotation
           =90);

plt.legend(['-10 w', '0 w', '10 w']);

In [None]:
df

In [None]:
fig, ax = plt.subplots(2,2, figsize=(16,8))
u = ['negative user', 'persistent symtpoms user']
for i in range(2):
    for ph in [0,1,2,3]:
        df = dfs[i]
        dfph = df[df['phase'] == ph]
        dfph_we = dfph[dfph['weekend'] == True]
        dfph_wd = dfph[dfph['weekend'] == False]
        ax[0,i].errorbar(dfph_we.groupby('time').mean().index, dfph_we.groupby('time').mean()['hr'], yerr=0)
        ax[1,i].errorbar(dfph_wd.groupby('time').mean().index, dfph_wd.groupby('time').mean()['hr'], yerr=0)
        ax[0,i].set_ylabel('heart rate weekend')
        ax[1,i].set_ylabel('heart rate week')
        ax[1,i].set_xlabel('time bins')
        ax[0,i].set_title(u[i])
plt.legend(['pre', 'acute', 'sub-acute', 'post']);

In [None]:
fig, ax = plt.subplots(1,2, figsize=(16,8))
u = ['negative user', 'persistent symtpoms user']
for i in range(2):
    for ph in [0,1,2,3]:
        df = dfs[i]
        dfph = df[df['phase'] == ph]
        ax[i].errorbar(dfph.groupby('time').mean().index, dfph.groupby('time').mean()['rhr'], yerr=dfph.groupby('time').std()['rhr'])
        ax[i].set_title(u[i])
plt.legend(['pre', 'acute', 'sub-acute', 'post']);

In [None]:
fig, ax = plt.subplots(1,2, figsize=(16,8))
u = ['negative user', 'persistent symtpoms user']
for i in range(2):
    for ph in [0,1,2,3]:
        df = dfs[i]
        dfph = df[df['phase'] == ph]
        ax[i].errorbar(dfph.groupby('time').mean().index, dfph.groupby('time').mean()['steps_per_s'], yerr=dfph.groupby('time').std()['steps_per_s'])
        ax[i].set_title(u[i])
plt.legend(['pre', 'acute', 'sub-acute', 'post']);

In [None]:
fig, ax = plt.subplots(1,4, figsize=(16,8))
phases = ['pre', 'acute', 'sub-acute', 'post']
for ph in [0,1,2,3]:
    df = dfs[1]
    dfph = df[df['phase'] == ph]
    ax[ph].hist(dfph[dfph['sleep'] != 1]['hr'], density = True, histtype='step')
    ax[ph].hist(dfph[dfph['sleep'] == 1]['hr'], density = True, histtype='step')
    ax[ph].set_title(phases[ph])
plt.legend(['day', 'night']);