In [2]:
import pandas as pd
import datetime
import numpy as np

import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
import plotly.express as px
import jupyter_dash
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from IPython.display import display

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

In [3]:
# pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', None)

### Steps

In [79]:
df1 = pd.read_csv('~/qs/fitbit/data/steps_2020.csv')
df1

Unnamed: 0,time,Step Count
0,2020-01-01,1041
1,2020-01-02,321
2,2020-01-03,1659
3,2020-01-04,8407
4,2020-01-05,4520
...,...,...
361,2020-12-27,4143
362,2020-12-28,5340
363,2020-12-29,3503
364,2020-12-30,6311


In [80]:
df2 = pd.read_csv('~/qs/fitbit/data/steps_2021.csv')
df2

Unnamed: 0,time,Step Count
0,2021-01-01,3218
1,2021-01-02,7788
2,2021-01-03,5569
3,2021-01-04,4095
4,2021-01-05,2091
...,...,...
360,2021-12-27,8837
361,2021-12-28,8085
362,2021-12-29,3452
363,2021-12-30,4071


In [81]:
df3 = pd.read_csv('~/qs/fitbit/data/steps_2022.csv')
df3

Unnamed: 0,time,Step Count
0,2022-01-01,3605
1,2022-01-02,6357
2,2022-01-03,8505
3,2022-01-04,8315
4,2022-01-05,2808
...,...,...
360,2022-12-27,519
361,2022-12-28,4961
362,2022-12-29,3453
363,2022-12-30,9216


In [83]:
df4 = pd.read_csv('~/qs/fitbit/data/steps_2023.csv')
df4 = df4.drop(df4.index[-1])
df4

Unnamed: 0,time,Step Count
0,2023-01-01,3091
1,2023-01-02,3921
2,2023-01-03,9329
3,2023-01-04,7558
4,2023-01-05,3988
...,...,...
65,2023-03-07,4960
66,2023-03-08,358
67,2023-03-09,749
68,2023-03-10,3203


In [84]:
df = pd.concat([df1, df2, df3, df4])
df

Unnamed: 0,time,Step Count
0,2020-01-01,1041
1,2020-01-02,321
2,2020-01-03,1659
3,2020-01-04,8407
4,2020-01-05,4520
...,...,...
65,2023-03-07,4960
66,2023-03-08,358
67,2023-03-09,749
68,2023-03-10,3203


In [85]:
df = df.rename(columns={'time':'Date'})
df

Unnamed: 0,Date,Step Count
0,2020-01-01,1041
1,2020-01-02,321
2,2020-01-03,1659
3,2020-01-04,8407
4,2020-01-05,4520
...,...,...
65,2023-03-07,4960
66,2023-03-08,358
67,2023-03-09,749
68,2023-03-10,3203


In [86]:
# any dates missing?
start = '2020-01-01'
end = '2023-03-12'
pd.date_range(start=start, end=end).difference(df['Date'])

DatetimeIndex(['2023-03-12'], dtype='datetime64[ns]', freq=None)

In [87]:
# df.to_csv('data/steps.csv', index=None, encoding='utf-8')

### Resting HR

In [88]:
df1 = pd.read_csv('~/qs/fitbit/data/resting_hr_2020.csv')
df1 = df1.rename(columns={'time':'Date'})
df1['Date'] = pd.to_datetime(df1['Date'])
df1 = df1.set_index('Date')
df1 = df1.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df1 = df1.reset_index()
df1

Unnamed: 0,Date,resting_HR
0,2020-01-01,56.0
1,2020-01-02,58.0
2,2020-01-03,57.0
3,2020-01-04,58.0
4,2020-01-05,57.0
...,...,...
361,2020-12-27,56.0
362,2020-12-28,56.0
363,2020-12-29,57.0
364,2020-12-30,58.0


In [89]:
# https://stackoverflow.com/questions/51656065/pandas-resampling-typeerror-only-valid-with-datetimeindex-timedeltaindex-or-p
# https://stackoverflow.com/questions/54592536/find-gaps-in-pandas-time-series-dataframe-sampled-at-1-minute-intervals-and-na
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq

df2 = pd.read_csv('~/qs/fitbit/data/resting_hr_2021.csv')
df2 = df2.rename(columns={'time':'Date'})
df2['Date'] = pd.to_datetime(df2['Date'])
df2 = df2.set_index('Date')
df2 = df2.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df2 = df2.reset_index()
df2

Unnamed: 0,Date,resting_HR
0,2021-01-01,56.0
1,2021-01-02,57.0
2,2021-01-03,55.0
3,2021-01-04,56.0
4,2021-01-05,55.0
...,...,...
360,2021-12-27,51.0
361,2021-12-28,51.0
362,2021-12-29,52.0
363,2021-12-30,51.0


In [90]:
# https://stackoverflow.com/questions/51656065/pandas-resampling-typeerror-only-valid-with-datetimeindex-timedeltaindex-or-p
# https://stackoverflow.com/questions/54592536/find-gaps-in-pandas-time-series-dataframe-sampled-at-1-minute-intervals-and-fill
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq

df3 = pd.read_csv('~/qs/fitbit/data/resting_hr_2022.csv')
df3 = df3.rename(columns={'time':'Date'})
df3['Date'] = pd.to_datetime(df3['Date'])
df3 = df3.set_index('Date')
df3 = df3.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df3 = df3.reset_index()
df3

Unnamed: 0,Date,resting_HR
0,2022-01-01,53.0
1,2022-01-02,54.0
2,2022-01-03,53.0
3,2022-01-04,53.0
4,2022-01-05,54.0
...,...,...
360,2022-12-27,50.0
361,2022-12-28,50.0
362,2022-12-29,51.0
363,2022-12-30,51.0


In [91]:
# https://stackoverflow.com/questions/51656065/pandas-resampling-typeerror-only-valid-with-datetimeindex-timedeltaindex-or-p
# https://stackoverflow.com/questions/54592536/find-gaps-in-pandas-time-series-dataframe-sampled-at-1-minute-intervals-and-fill
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq

df4 = pd.read_csv('~/qs/fitbit/data/resting_hr_2023.csv')
df4 = df4.rename(columns={'time':'Date'})
df4['Date'] = pd.to_datetime(df4['Date'])
df4 = df4.set_index('Date')
df4 = df4.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df4 = df4.reset_index()
df4

Unnamed: 0,Date,resting_HR
0,2023-01-01,51
1,2023-01-02,51
2,2023-01-03,52
3,2023-01-04,52
4,2023-01-05,51
...,...,...
65,2023-03-07,55
66,2023-03-08,53
67,2023-03-09,54
68,2023-03-10,53


In [92]:
df = pd.concat([df1, df2, df3, df4])
df

Unnamed: 0,Date,resting_HR
0,2020-01-01,56.0
1,2020-01-02,58.0
2,2020-01-03,57.0
3,2020-01-04,58.0
4,2020-01-05,57.0
...,...,...
65,2023-03-07,55.0
66,2023-03-08,53.0
67,2023-03-09,54.0
68,2023-03-10,53.0


In [93]:
# any dates missing?
start = '2020-01-01'
end = '2023-03-12'
pd.date_range(start=start, end=end).difference(df['Date'])

DatetimeIndex(['2023-03-12'], dtype='datetime64[ns]', freq=None)

In [94]:
# df.to_csv('data/resting_hr.csv', index=None, encoding='utf-8')

### Floors

In [95]:
df1 = pd.read_csv('~/qs/fitbit/data/floors_2020.csv')
df1 = df1.rename(columns={'time':'Date'})
df1['Date'] = pd.to_datetime(df1['Date'])
df1 = df1.set_index('Date')
df1 = df1.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df1 = df1.reset_index()
df1

Unnamed: 0,Date,Number of Floors
0,2020-01-01,0
1,2020-01-02,0
2,2020-01-03,1
3,2020-01-04,8
4,2020-01-05,2
...,...,...
361,2020-12-27,3
362,2020-12-28,3
363,2020-12-29,1
364,2020-12-30,2


In [96]:
df2 = pd.read_csv('~/qs/fitbit/data/floors_2021.csv')
df2 = df2.rename(columns={'time':'Date'})
df2['Date'] = pd.to_datetime(df2['Date'])
df2 = df2.set_index('Date')
df2 = df2.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df2 = df2.reset_index()
df2

Unnamed: 0,Date,Number of Floors
0,2021-01-01,2
1,2021-01-02,4
2,2021-01-03,4
3,2021-01-04,3
4,2021-01-05,4
...,...,...
360,2021-12-27,5
361,2021-12-28,5
362,2021-12-29,0
363,2021-12-30,1


In [97]:
df3 = pd.read_csv('~/qs/fitbit/data/floors_2022.csv')
df3 = df3.rename(columns={'time':'Date'})
df3['Date'] = pd.to_datetime(df3['Date'])
df3 = df3.set_index('Date')
df3 = df3.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df3 = df3.reset_index()
df3

Unnamed: 0,Date,Number of Floors
0,2022-01-01,3
1,2022-01-02,5
2,2022-01-03,3
3,2022-01-04,4
4,2022-01-05,5
...,...,...
360,2022-12-27,0
361,2022-12-28,4
362,2022-12-29,2
363,2022-12-30,4


In [100]:
df4 = pd.read_csv('~/qs/fitbit/data/floors_2023.csv')
df4 = df4.rename(columns={'time':'Date'})
df4['Date'] = pd.to_datetime(df4['Date'])
df4 = df4.set_index('Date')
df4 = df4.resample('D', convention='start').asfreq() # 'T' = seconds, 'D' = days
df4 = df4.reset_index()
df4 = df4.drop(df4.index[-1])
df4

Unnamed: 0,Date,Number of Floors
0,2023-01-01,2
1,2023-01-02,1
2,2023-01-03,9
3,2023-01-04,1
4,2023-01-05,2
...,...,...
65,2023-03-07,8
66,2023-03-08,0
67,2023-03-09,0
68,2023-03-10,1


In [101]:
df = pd.concat([df1, df2, df3, df4])
df

Unnamed: 0,Date,Number of Floors
0,2020-01-01,0
1,2020-01-02,0
2,2020-01-03,1
3,2020-01-04,8
4,2020-01-05,2
...,...,...
65,2023-03-07,8
66,2023-03-08,0
67,2023-03-09,0
68,2023-03-10,1


In [102]:
# any dates missing?
start = '2020-01-01'
end = '2023-03-12'
pd.date_range(start=start, end=end).difference(df['Date'])

DatetimeIndex(['2023-03-12'], dtype='datetime64[ns]', freq=None)

In [103]:
# df.to_csv('data/floors.csv', index=None, encoding='utf-8')

# Intra-minute HR

In [219]:
df1 = pd.read_csv('~/qs/fitbit/data/hr1.csv')
df1['time'] = pd.to_datetime(df1['time'])
df1 = df1.set_index('time')
df1

Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
2023-01-01 00:00:00,102
2023-01-01 00:01:00,102
2023-01-01 00:02:00,103
2023-01-01 00:03:00,99
2023-01-01 00:04:00,100
...,...
2023-03-31 23:55:00,67
2023-03-31 23:56:00,63
2023-03-31 23:57:00,68
2023-03-31 23:58:00,61


In [220]:
# df1[df1.index.duplicated()]

In [221]:
# # https://stackoverflow.com/questions/51656065/pandas-resampling-typeerror-only-valid-with-datetimeindex-timedeltaindex-or-p
# # https://stackoverflow.com/questions/54592536/find-gaps-in-pandas-time-series-dataframe-sampled-at-1-minute-intervals-and-fill
# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq
df1 = df1.resample('T', convention='start').asfreq() # 'T' = seconds, 'D' = days
df1

Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
2023-01-01 00:00:00,102.0
2023-01-01 00:01:00,102.0
2023-01-01 00:02:00,103.0
2023-01-01 00:03:00,99.0
2023-01-01 00:04:00,100.0
...,...
2023-03-31 23:55:00,67.0
2023-03-31 23:56:00,63.0
2023-03-31 23:57:00,68.0
2023-03-31 23:58:00,61.0


In [222]:
df1 = df1.reset_index()
df1.to_csv('data/hr1.csv', index=None, encoding='utf-8')

In [223]:
df1 = df1.set_index('time')
agg1 = df1.resample('D')['value'].agg('describe')
df1 = df1.reset_index()
agg1 = agg1.reset_index()
agg1

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2023-01-01,1412.0,63.032578,14.127507,44.0,51.0,57.0,77.0,107.0
1,2023-01-02,1408.0,61.527699,14.902119,44.0,51.0,56.0,66.0,126.0
2,2023-01-03,1414.0,67.765205,16.522434,47.0,56.0,62.0,74.0,147.0
3,2023-01-04,1440.0,62.602778,15.863709,44.0,52.0,57.0,67.0,141.0
4,2023-01-05,1412.0,62.929887,13.344375,44.0,53.0,58.0,69.0,123.0
...,...,...,...,...,...,...,...,...,...
85,2023-03-27,1404.0,66.323362,14.589249,46.0,55.0,62.5,75.0,121.0
86,2023-03-28,1390.0,64.802878,13.133254,45.0,53.0,62.0,75.0,112.0
87,2023-03-29,1404.0,59.169516,8.418392,43.0,52.0,58.0,65.0,96.0
88,2023-03-30,1410.0,62.871631,13.200312,44.0,52.0,58.0,73.0,101.0


In [224]:
agg1.to_csv('data/hr1_describe.csv', index=None, encoding='utf-8')

In [194]:
df2 = pd.read_csv('~/qs/fitbit/data/hr2.csv')
df2['time'] = pd.to_datetime(df2['time'])
df2 = df2.set_index('time')
df2

Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
2022-05-01 00:00:00,71
2022-05-01 00:01:00,76
2022-05-01 00:02:00,69
2022-05-01 00:03:00,77
2022-05-01 00:04:00,86
...,...
2022-08-31 23:55:00,69
2022-08-31 23:56:00,63
2022-08-31 23:57:00,61
2022-08-31 23:58:00,66


In [195]:
# # https://stackoverflow.com/questions/51656065/pandas-resampling-typeerror-only-valid-with-datetimeindex-timedeltaindex-or-p
# # https://stackoverflow.com/questions/54592536/find-gaps-in-pandas-time-series-dataframe-sampled-at-1-minute-intervals-and-fill
# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq
df2 = df2.resample('T', convention='start').asfreq() # 'T' = seconds, 'D' = days
df2

Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
2022-05-01 00:00:00,71.0
2022-05-01 00:01:00,76.0
2022-05-01 00:02:00,69.0
2022-05-01 00:03:00,77.0
2022-05-01 00:04:00,86.0
...,...
2022-08-31 23:55:00,69.0
2022-08-31 23:56:00,63.0
2022-08-31 23:57:00,61.0
2022-08-31 23:58:00,66.0


In [196]:
df2 = df2.reset_index()
df2.to_csv('data/hr2.csv', index=None, encoding='utf-8')

In [197]:
df2 = df2.set_index('time')
agg2 = df2.resample('D')['value'].agg('describe')
df2 = df2.reset_index()
agg2 = agg2.reset_index()
agg2

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2022-05-01,1375.0,62.360727,16.895024,43.0,52.0,56.0,64.0,137.0
1,2022-05-02,1413.0,64.735315,12.903955,46.0,56.0,62.0,69.0,146.0
2,2022-05-03,1406.0,61.901849,13.002339,46.0,53.0,59.0,66.0,140.0
3,2022-05-04,1408.0,64.596591,14.643862,40.0,54.0,61.0,71.0,139.0
4,2022-05-05,1410.0,66.063830,14.635439,46.0,56.0,63.0,70.0,141.0
...,...,...,...,...,...,...,...,...,...
118,2022-08-27,1360.0,68.941912,13.960166,49.0,59.0,64.0,77.0,132.0
119,2022-08-28,1414.0,65.741867,15.033880,48.0,56.0,59.0,72.0,145.0
120,2022-08-29,1438.0,63.484701,12.082849,47.0,56.0,60.0,67.0,198.0
121,2022-08-30,1336.0,78.591317,17.058104,53.0,65.0,75.0,90.0,144.0


In [198]:
agg2.to_csv('data/hr2_describe.csv', index=None, encoding='utf-8')

In [199]:
df3 = pd.read_csv('~/qs/fitbit/data/hr3.csv')
df3['time'] = pd.to_datetime(df3['time'])
df3 = df3.set_index('time')
df3

Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
2022-09-01 00:00:00,68
2022-09-01 00:01:00,69
2022-09-01 00:02:00,76
2022-09-01 00:03:00,77
2022-09-01 00:04:00,72
...,...
2022-12-31 23:55:00,104
2022-12-31 23:56:00,102
2022-12-31 23:57:00,102
2022-12-31 23:58:00,103


In [200]:
# # https://stackoverflow.com/questions/51656065/pandas-resampling-typeerror-only-valid-with-datetimeindex-timedeltaindex-or-p
# # https://stackoverflow.com/questions/54592536/find-gaps-in-pandas-time-series-dataframe-sampled-at-1-minute-intervals-and-fill
# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html
# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq
df3 = df3.resample('T', convention='start').asfreq() # 'T' = seconds, 'D' = days
df3

Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
2022-09-01 00:00:00,68.0
2022-09-01 00:01:00,69.0
2022-09-01 00:02:00,76.0
2022-09-01 00:03:00,77.0
2022-09-01 00:04:00,72.0
...,...
2022-12-31 23:55:00,104.0
2022-12-31 23:56:00,102.0
2022-12-31 23:57:00,102.0
2022-12-31 23:58:00,103.0


In [201]:
df3 = df3.reset_index()
df3.to_csv('data/hr3.csv', index=None, encoding='utf-8')

In [202]:
df3 = df3.set_index('time')
agg3 = df3.resample('D')['value'].agg('describe')
df3 = df3.reset_index()
agg3 = agg3.reset_index()
agg3

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2022-09-01,1349.0,72.206079,19.713239,47.0,55.0,67.0,89.0,127.0
1,2022-09-02,1364.0,58.771261,11.365037,45.0,52.0,56.0,61.0,118.0
2,2022-09-03,1409.0,65.398155,14.453531,47.0,55.0,60.0,72.0,129.0
3,2022-09-04,1378.0,63.947025,16.053042,48.0,54.0,58.0,66.0,146.0
4,2022-09-05,1416.0,66.006356,16.234608,39.0,54.0,61.0,73.0,142.0
...,...,...,...,...,...,...,...,...,...
117,2022-12-27,1440.0,58.090972,8.264271,43.0,52.0,56.0,63.0,100.0
118,2022-12-28,1400.0,65.501429,17.540355,45.0,53.0,61.0,72.0,152.0
119,2022-12-29,1397.0,63.011453,14.249160,46.0,54.0,58.0,65.0,130.0
120,2022-12-30,1401.0,61.516774,11.711711,45.0,53.0,57.0,66.0,101.0


In [203]:
agg3.to_csv('data/hr3_describe.csv', index=None, encoding='utf-8')

# Concatenating HR within each year

In [225]:
df1

Unnamed: 0,time,value
0,2023-01-01 00:00:00,102.0
1,2023-01-01 00:01:00,102.0
2,2023-01-01 00:02:00,103.0
3,2023-01-01 00:03:00,99.0
4,2023-01-01 00:04:00,100.0
...,...,...
129595,2023-03-31 23:55:00,67.0
129596,2023-03-31 23:56:00,63.0
129597,2023-03-31 23:57:00,68.0
129598,2023-03-31 23:58:00,61.0


In [226]:
# df2

In [227]:
# df3

In [207]:
df = pd.concat([df1, df2, df3])
df

Unnamed: 0,time,value
0,2023-01-01 00:00:00,102.0
1,2023-01-01 00:01:00,102.0
2,2023-01-01 00:02:00,103.0
3,2023-01-01 00:03:00,99.0
4,2023-01-01 00:04:00,100.0
...,...,...
129595,2023-03-31 23:55:00,67.0
129596,2023-03-31 23:56:00,63.0
129597,2023-03-31 23:57:00,68.0
129598,2023-03-31 23:58:00,61.0


Unnamed: 0,time,value
0,2022-01-01 00:00:00,68.0
1,2022-01-01 00:01:00,64.0
2,2022-01-01 00:02:00,62.0
3,2022-01-01 00:03:00,63.0
4,2022-01-01 00:04:00,64.0
...,...,...
175675,2022-12-31 23:55:00,104.0
175676,2022-12-31 23:56:00,102.0
175677,2022-12-31 23:57:00,102.0
175678,2022-12-31 23:58:00,103.0


In [None]:
df.to_csv('data/hr_2023.csv', index=None, encoding='utf-8')

In [230]:
agg1

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2023-01-01,1412.0,63.032578,14.127507,44.0,51.0,57.0,77.0,107.0
1,2023-01-02,1408.0,61.527699,14.902119,44.0,51.0,56.0,66.0,126.0
2,2023-01-03,1414.0,67.765205,16.522434,47.0,56.0,62.0,74.0,147.0
3,2023-01-04,1440.0,62.602778,15.863709,44.0,52.0,57.0,67.0,141.0
4,2023-01-05,1412.0,62.929887,13.344375,44.0,53.0,58.0,69.0,123.0
...,...,...,...,...,...,...,...,...,...
85,2023-03-27,1404.0,66.323362,14.589249,46.0,55.0,62.5,75.0,121.0
86,2023-03-28,1390.0,64.802878,13.133254,45.0,53.0,62.0,75.0,112.0
87,2023-03-29,1404.0,59.169516,8.418392,43.0,52.0,58.0,65.0,96.0
88,2023-03-30,1410.0,62.871631,13.200312,44.0,52.0,58.0,73.0,101.0


In [212]:
agg2

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2022-05-01,1375.0,62.360727,16.895024,43.0,52.0,56.0,64.0,137.0
1,2022-05-02,1413.0,64.735315,12.903955,46.0,56.0,62.0,69.0,146.0
2,2022-05-03,1406.0,61.901849,13.002339,46.0,53.0,59.0,66.0,140.0
3,2022-05-04,1408.0,64.596591,14.643862,40.0,54.0,61.0,71.0,139.0
4,2022-05-05,1410.0,66.063830,14.635439,46.0,56.0,63.0,70.0,141.0
...,...,...,...,...,...,...,...,...,...
118,2022-08-27,1360.0,68.941912,13.960166,49.0,59.0,64.0,77.0,132.0
119,2022-08-28,1414.0,65.741867,15.033880,48.0,56.0,59.0,72.0,145.0
120,2022-08-29,1438.0,63.484701,12.082849,47.0,56.0,60.0,67.0,198.0
121,2022-08-30,1336.0,78.591317,17.058104,53.0,65.0,75.0,90.0,144.0


In [213]:
agg3

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2022-09-01,1349.0,72.206079,19.713239,47.0,55.0,67.0,89.0,127.0
1,2022-09-02,1364.0,58.771261,11.365037,45.0,52.0,56.0,61.0,118.0
2,2022-09-03,1409.0,65.398155,14.453531,47.0,55.0,60.0,72.0,129.0
3,2022-09-04,1378.0,63.947025,16.053042,48.0,54.0,58.0,66.0,146.0
4,2022-09-05,1416.0,66.006356,16.234608,39.0,54.0,61.0,73.0,142.0
...,...,...,...,...,...,...,...,...,...
117,2022-12-27,1440.0,58.090972,8.264271,43.0,52.0,56.0,63.0,100.0
118,2022-12-28,1400.0,65.501429,17.540355,45.0,53.0,61.0,72.0,152.0
119,2022-12-29,1397.0,63.011453,14.249160,46.0,54.0,58.0,65.0,130.0
120,2022-12-30,1401.0,61.516774,11.711711,45.0,53.0,57.0,66.0,101.0


In [231]:
agg = pd.concat([agg1, agg2, agg3])
agg

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2023-01-01,1412.0,63.032578,14.127507,44.0,51.0,57.0,77.0,107.0
1,2023-01-02,1408.0,61.527699,14.902119,44.0,51.0,56.0,66.0,126.0
2,2023-01-03,1414.0,67.765205,16.522434,47.0,56.0,62.0,74.0,147.0
3,2023-01-04,1440.0,62.602778,15.863709,44.0,52.0,57.0,67.0,141.0
4,2023-01-05,1412.0,62.929887,13.344375,44.0,53.0,58.0,69.0,123.0
...,...,...,...,...,...,...,...,...,...
85,2023-03-27,1404.0,66.323362,14.589249,46.0,55.0,62.5,75.0,121.0
86,2023-03-28,1390.0,64.802878,13.133254,45.0,53.0,62.0,75.0,112.0
87,2023-03-29,1404.0,59.169516,8.418392,43.0,52.0,58.0,65.0,96.0
88,2023-03-30,1410.0,62.871631,13.200312,44.0,52.0,58.0,73.0,101.0


In [232]:
# any dates missing?
start = '2023-01-01'
end = '2023-04-01'
pd.date_range(start=start, end=end).difference(agg['time'])

DatetimeIndex(['2023-04-01'], dtype='datetime64[ns]', freq=None)

In [233]:
agg.to_csv('data/hr_describe_2023.csv', index=None, encoding='utf-8')

# Concatenating HR for each year

In [234]:
df1 = pd.read_csv('data/hr_2020.csv')
df1

Unnamed: 0,time,value
0,2020-01-01 00:00:00,57.0
1,2020-01-01 00:01:00,57.0
2,2020-01-01 00:02:00,56.0
3,2020-01-01 00:03:00,56.0
4,2020-01-01 00:04:00,57.0
...,...,...
525595,2020-12-31 23:55:00,57.0
525596,2020-12-31 23:56:00,60.0
525597,2020-12-31 23:57:00,59.0
525598,2020-12-31 23:58:00,58.0


In [235]:
df2 = pd.read_csv('data/hr_2021.csv')
df2

Unnamed: 0,time,value
0,2021-01-01 00:00:00,59.0
1,2021-01-01 00:01:00,62.0
2,2021-01-01 00:02:00,59.0
3,2021-01-01 00:03:00,60.0
4,2021-01-01 00:04:00,59.0
...,...,...
525595,2021-12-31 23:55:00,64.0
525596,2021-12-31 23:56:00,65.0
525597,2021-12-31 23:57:00,66.0
525598,2021-12-31 23:58:00,68.0


In [236]:
df3 = pd.read_csv('data/hr_2022.csv')
df3

Unnamed: 0,time,value
0,2022-01-01 00:00:00,68.0
1,2022-01-01 00:01:00,64.0
2,2022-01-01 00:02:00,62.0
3,2022-01-01 00:03:00,63.0
4,2022-01-01 00:04:00,64.0
...,...,...
525595,2022-12-31 23:55:00,104.0
525596,2022-12-31 23:56:00,102.0
525597,2022-12-31 23:57:00,102.0
525598,2022-12-31 23:58:00,103.0


In [237]:
df4 = pd.read_csv('data/hr_2023.csv')
df4

Unnamed: 0,time,value
0,2023-01-01 00:00:00,102.0
1,2023-01-01 00:01:00,102.0
2,2023-01-01 00:02:00,103.0
3,2023-01-01 00:03:00,99.0
4,2023-01-01 00:04:00,100.0
...,...,...
129595,2023-03-31 23:55:00,67.0
129596,2023-03-31 23:56:00,63.0
129597,2023-03-31 23:57:00,68.0
129598,2023-03-31 23:58:00,61.0


In [238]:
df = pd.concat([df1, df2, df3, df4])
df

Unnamed: 0,time,value
0,2020-01-01 00:00:00,57.0
1,2020-01-01 00:01:00,57.0
2,2020-01-01 00:02:00,56.0
3,2020-01-01 00:03:00,56.0
4,2020-01-01 00:04:00,57.0
...,...,...
129595,2023-03-31 23:55:00,67.0
129596,2023-03-31 23:56:00,63.0
129597,2023-03-31 23:57:00,68.0
129598,2023-03-31 23:58:00,61.0


In [239]:
df.to_csv('data/hr_2020-present.csv', index=None, encoding='utf-8')

# Concatenating aggregates for each year

In [240]:
df1 = pd.read_csv('data/hr_describe_2020.csv')
df1

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2020-01-01,1349.0,61.689400,7.496629,46.0,57.0,60.0,65.0,98.0
1,2020-01-02,1421.0,61.500352,7.566385,39.0,56.0,61.0,66.0,102.0
2,2020-01-03,1298.0,63.006934,10.599273,48.0,54.0,62.0,70.0,135.0
3,2020-01-04,1390.0,66.987050,10.906427,53.0,60.0,64.0,71.0,126.0
4,2020-01-05,1407.0,64.260128,9.231062,51.0,58.0,62.0,68.0,118.0
...,...,...,...,...,...,...,...,...,...
361,2020-12-27,1386.0,62.660895,11.370321,47.0,55.0,60.0,66.0,123.0
362,2020-12-28,1399.0,65.039314,11.082886,36.0,58.0,62.0,68.0,116.0
363,2020-12-29,1433.0,70.011165,13.201273,52.0,60.0,65.0,80.0,186.0
364,2020-12-30,1390.0,76.064029,14.014836,52.0,64.0,76.0,83.0,134.0


In [241]:
df2 = pd.read_csv('data/hr_describe_2021.csv')
df2

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2021-01-01,1386.0,62.406926,10.030585,40.0,57.0,60.0,64.00,127.0
1,2021-01-02,1392.0,70.492098,13.562750,52.0,60.0,66.0,80.00,129.0
2,2021-01-03,1395.0,60.852330,10.857945,49.0,55.0,58.0,62.00,122.0
3,2021-01-04,1429.0,67.034990,13.543587,46.0,57.0,64.0,74.00,156.0
4,2021-01-05,1400.0,67.942857,10.301232,49.0,61.0,67.0,73.25,117.0
...,...,...,...,...,...,...,...,...,...
360,2021-12-27,1439.0,62.424600,16.062499,42.0,52.0,58.0,67.00,142.0
361,2021-12-28,1414.0,64.285714,14.734608,44.0,54.0,59.0,71.00,145.0
362,2021-12-29,1407.0,57.329780,13.420992,43.0,49.0,53.0,60.00,123.0
363,2021-12-30,1418.0,64.264457,9.900629,42.0,58.0,63.0,68.00,121.0


In [242]:
df3 = pd.read_csv('data/hr_describe_2022.csv')
df3

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2022-01-01,1282.0,60.951638,9.454315,46.0,55.0,58.0,65.0,112.0
1,2022-01-02,1408.0,60.167614,10.964906,46.0,53.0,57.0,64.0,129.0
2,2022-01-03,1440.0,60.441667,14.198853,46.0,52.0,56.0,62.0,137.0
3,2022-01-04,1414.0,65.193777,14.264733,45.0,54.0,60.0,73.0,126.0
4,2022-01-05,1394.0,58.221664,8.651442,46.0,53.0,56.0,60.0,114.0
...,...,...,...,...,...,...,...,...,...
360,2022-12-27,1440.0,58.090972,8.264271,43.0,52.0,56.0,63.0,100.0
361,2022-12-28,1400.0,65.501429,17.540355,45.0,53.0,61.0,72.0,152.0
362,2022-12-29,1397.0,63.011453,14.249160,46.0,54.0,58.0,65.0,130.0
363,2022-12-30,1401.0,61.516774,11.711711,45.0,53.0,57.0,66.0,101.0


In [243]:
df4 = pd.read_csv('data/hr_describe_2023.csv')
df4

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2023-01-01,1412.0,63.032578,14.127507,44.0,51.0,57.0,77.0,107.0
1,2023-01-02,1408.0,61.527699,14.902119,44.0,51.0,56.0,66.0,126.0
2,2023-01-03,1414.0,67.765205,16.522434,47.0,56.0,62.0,74.0,147.0
3,2023-01-04,1440.0,62.602778,15.863709,44.0,52.0,57.0,67.0,141.0
4,2023-01-05,1412.0,62.929887,13.344375,44.0,53.0,58.0,69.0,123.0
...,...,...,...,...,...,...,...,...,...
85,2023-03-27,1404.0,66.323362,14.589249,46.0,55.0,62.5,75.0,121.0
86,2023-03-28,1390.0,64.802878,13.133254,45.0,53.0,62.0,75.0,112.0
87,2023-03-29,1404.0,59.169516,8.418392,43.0,52.0,58.0,65.0,96.0
88,2023-03-30,1410.0,62.871631,13.200312,44.0,52.0,58.0,73.0,101.0


In [247]:
agg = pd.concat([df1, df2, df3, df4])
agg

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2020-01-01,1349.0,61.689400,7.496629,46.0,57.0,60.0,65.0,98.0
1,2020-01-02,1421.0,61.500352,7.566385,39.0,56.0,61.0,66.0,102.0
2,2020-01-03,1298.0,63.006934,10.599273,48.0,54.0,62.0,70.0,135.0
3,2020-01-04,1390.0,66.987050,10.906427,53.0,60.0,64.0,71.0,126.0
4,2020-01-05,1407.0,64.260128,9.231062,51.0,58.0,62.0,68.0,118.0
...,...,...,...,...,...,...,...,...,...
85,2023-03-27,1404.0,66.323362,14.589249,46.0,55.0,62.5,75.0,121.0
86,2023-03-28,1390.0,64.802878,13.133254,45.0,53.0,62.0,75.0,112.0
87,2023-03-29,1404.0,59.169516,8.418392,43.0,52.0,58.0,65.0,96.0
88,2023-03-30,1410.0,62.871631,13.200312,44.0,52.0,58.0,73.0,101.0


In [249]:
agg

Unnamed: 0,time,count,mean,std,min,25%,50%,75%,max
0,2020-01-01,1349.0,61.689400,7.496629,46.0,57.0,60.0,65.0,98.0
1,2020-01-02,1421.0,61.500352,7.566385,39.0,56.0,61.0,66.0,102.0
2,2020-01-03,1298.0,63.006934,10.599273,48.0,54.0,62.0,70.0,135.0
3,2020-01-04,1390.0,66.987050,10.906427,53.0,60.0,64.0,71.0,126.0
4,2020-01-05,1407.0,64.260128,9.231062,51.0,58.0,62.0,68.0,118.0
...,...,...,...,...,...,...,...,...,...
85,2023-03-27,1404.0,66.323362,14.589249,46.0,55.0,62.5,75.0,121.0
86,2023-03-28,1390.0,64.802878,13.133254,45.0,53.0,62.0,75.0,112.0
87,2023-03-29,1404.0,59.169516,8.418392,43.0,52.0,58.0,65.0,96.0
88,2023-03-30,1410.0,62.871631,13.200312,44.0,52.0,58.0,73.0,101.0


In [255]:
agg = agg.rename(columns={'time':'Date', 
                          'count':'Heart Rate Count', 
                          'mean':'Heart Rate Mean', 
                          'std':'Heart Rate STD', 
                          'min':'Heart Rate Minimum', 
                          '25%':'Heart Rate 25%', 
                          '50%':'Heart Rate Median', 
                          '75%':'Heart Rate 75%', 
                          'max':'Heart Rate Maximum'})
agg

Unnamed: 0,Date,Heart Rate Count,Heart Rate Mean,Heart Rate STD,Heart Rate Minimum,Heart Rate 25%,Heart Rate Median,Heart Rate 75%,Heart Rate Maximum
0,2020-01-01,1349.0,61.689400,7.496629,46.0,57.0,60.0,65.0,98.0
1,2020-01-02,1421.0,61.500352,7.566385,39.0,56.0,61.0,66.0,102.0
2,2020-01-03,1298.0,63.006934,10.599273,48.0,54.0,62.0,70.0,135.0
3,2020-01-04,1390.0,66.987050,10.906427,53.0,60.0,64.0,71.0,126.0
4,2020-01-05,1407.0,64.260128,9.231062,51.0,58.0,62.0,68.0,118.0
...,...,...,...,...,...,...,...,...,...
85,2023-03-27,1404.0,66.323362,14.589249,46.0,55.0,62.5,75.0,121.0
86,2023-03-28,1390.0,64.802878,13.133254,45.0,53.0,62.0,75.0,112.0
87,2023-03-29,1404.0,59.169516,8.418392,43.0,52.0,58.0,65.0,96.0
88,2023-03-30,1410.0,62.871631,13.200312,44.0,52.0,58.0,73.0,101.0


In [256]:
agg.to_csv('data/hr_describe_2020-present.csv', index=None, encoding='utf-8')