In [None]:
import pandas as pd
import numpy as np
import os
import re
from datetime import datetime
from pyspark.sql import SparkSession

# Vis. modules.
import altair as alt


-  [Sleep](#sleep)
-  [Body](#body)
-  [Steps](#steps)
-  [Distance](#distance)
-  [Activity](#activity)
-  [RHR](#rhr)

In [None]:
global_export_data_path = r'~/Downloads/takeout-20240330T183803Z-001/Takeout/Fitbit/Global Export Data'
global_export_data_path = os.path.expanduser(global_export_data_path) 
file_list = os.listdir(os.path.expanduser(global_export_data_path))

In [None]:
def fitbit_json_to_df(category:str):
    # TODO: Spark handles json differently than pd.read_json().
    # Revisit after deciding how to unpack nested .json. OK for some categories for now.
    files = [file for file in file_list if re.match(rf'{category}-\d{{4}}-\d{{2}}-\d{{2}}\.json$', file)]
    files.sort()
    print(files[:5]) # Check first 5 are as expected.
    files = [os.path.join(global_export_data_path, file) for file in files]

    spark = SparkSession.builder.appName("Read JSON files").getOrCreate()
    spark_df = spark.read.option("multiline", "true").json(files)
    df = spark_df.toPandas()
    df = pd.json_normalize(df.to_dict(orient='records'))
    spark.stop()
    return (df)


<a id='sleep'></a>
#### Sleep


In [None]:
sleep_files = [file for file in file_list if re.match(r'sleep-\d{4}-\d{2}-\d{2}\.json$', file)]
sleep_files.sort()
print(sleep_files[:5])

dfs = []
for file in sleep_files:
    file_path = os.path.join(global_export_data_path + '/', file)
    df = pd.read_json(file_path)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
df

In [None]:
unnested = pd.json_normalize(df['levels'])

In [None]:
df = pd.merge(
    left=df[['startTime', 'endTime', 'minutesAsleep', 'minutesAwake', 'timeInBed']], 
    right=unnested.iloc[:, 2:], 
    left_index=True, 
    right_index=True
)

In [None]:
df.rename(columns={
    'summary.wake.count':'Number of Awakenings',
    'summary.rem.minutes': 'Minutes REM Sleep',
    'summary.light.minutes':'Minutes Light Sleep',
    'summary.deep.minutes':'Minutes Deep Sleep',
}, inplace=True)

In [None]:
tmp_col = df['Number of Awakenings'].copy()
df.drop(columns=['Number of Awakenings'], inplace=True)
df.insert(4, 'Number of Awakenings', tmp_col)

In [None]:
df = df[[
    'startTime', 
    'endTime', 
    'minutesAsleep', 
    'minutesAwake', 
    'Number of Awakenings', 
    'timeInBed', 
    'Minutes REM Sleep',
    'Minutes Light Sleep',
    'Minutes Deep Sleep',
]]
df = df.astype({'startTime':'datetime64[ns]', 'endTime':'datetime64[ns]'})

In [None]:
def convert_to_fitbit_time(cols):
    """Converts millisecond UTC timestamp into native 12h time."""
    for col in cols:
        df[col] = df[col].apply(lambda x: x.strftime('%Y-%m-%d %I:%M%p'))
        df[col] = df[col].apply(lambda x: x.replace(' 0', ' '))

        
convert_to_fitbit_time(['startTime', 'endTime'])

In [None]:
df.head()

In [None]:
df.to_csv('./fitbit_sleep_data_agg.csv', index=False, header=False, na_rep='N/A') # Save .csv.


<a id='body'></a>
#### Body

-  bodyweight
-  BMI
-  Fat

In [None]:
weight_files = [file for file in file_list if re.match(r'weight-\d{4}-\d{2}-\d{2}\.json$', file)]
weight_files.sort()
print(weight_files[:5]) # Check first 5 are as expected.

dfs = []
for file in weight_files:
    file_path = os.path.join(global_export_data_path + '/', file)
    df = pd.read_json(file_path)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
df = df.rename(
    columns={'weight':'Bodyweight', 'bmi':'BMI', 'fat':'Fat'}
).drop(
    columns=['logId', 'source']
).reindex(
    columns=['date', 'time', 'Bodyweight', 'BMI', 'Fat']
)

df


<a id='steps'></a>
#### Steps

In [None]:
df = fitbit_json_to_df('steps')
df['dateTime'] = pd.to_datetime(df['dateTime'], format='%m/%d/%y %H:%M:%S')
df['value'] = df['value'].astype('int32')
df = df.groupby(pd.Grouper(key='dateTime', freq='ME')).agg('sum')
df.to_csv('./fitbit_steps_data_agg.csv', index=False, header=False, na_rep='N/A') # Save .csv.
df


<a id='distance'></a>
#### Distance


In [None]:
df = fitbit_json_to_df('distance')
df['dateTime'] = pd.to_datetime(df['dateTime'], format='%m/%d/%y %H:%M:%S')
df['value'] = df['value'].astype('int32')
df = df.groupby(pd.Grouper(key='dateTime', freq='D')).agg('sum')
df.to_csv('./fitbit_distance_data_agg.csv', index=False, header=False, na_rep='N/A') # Save .csv
df


<a id='activity'></a>
#### Activity



We need:
-  cal_burn
-  steps
-  distance
-  floors
-  mins_sedentary
-  mins_lightactive
-  mins_fairlyactive
-  mins_veryactive
-  cal_activity


In [None]:
df = fitbit_json_to_df('calories')
df['dateTime'] = pd.to_datetime(df['dateTime'], format='%m/%d/%y %H:%M:%S')
df['value'] = df['value'].astype('float')
df = df.groupby(pd.Grouper(key='dateTime', freq='D')).agg('sum')
df


<a id='rhr'></a>
#### Resting Heart Rate (RHR)


In [None]:
rhr_files = [file for file in file_list if re.match(r'resting_heart_rate-\d{4}-\d{2}-\d{2}\.json$', file)]
rhr_files.sort()
print(rhr_files[:5])

dfs = []
for file in rhr_files:
    file_path = os.path.join(global_export_data_path + '/', file)
    df = pd.read_json(file_path)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
unnested = pd.json_normalize(df['value'])
unnested['date'] = unnested['date'].replace({None: np.nan})
unnested['date'] = pd.to_datetime(unnested['date'], format='%m/%d/%y')
unnested = unnested.dropna(subset='date')
unnested.sample(5)

In [None]:
# RHR chart over time.
alt.Chart(unnested).mark_line().encode(
    x=alt.X(
        'date', 
        axis=alt.Axis(domainOpacity=0, format='%b %y', grid=False)
    ),
    y=alt.Y(
        'value',
        axis=alt.Axis(title='RHR'),
        scale=alt.Scale(
            domain=[
                unnested['value'].min() - 3, unnested['value'].max() + 3]
        )
    )
).properties(width=800, title='Resting Heart Rate: Daily')