In [30]:
import pandas as pd
import os

# Define file paths
path1 = '../mturkfitbit_export_3.12.16-4.11.16/Fitabase Data 3.12.16-4.11.16/'
path2 = '../mturkfitbit_export_4.12.16-5.12.16/Fitabase Data 4.12.16-5.12.16/'

# List of all unique filenames across both directories
all_files = [
    'dailyActivity_merged.csv',
    'heartrate_seconds_merged.csv',
    'hourlyCalories_merged.csv',
    'hourlyIntensities_merged.csv',
    'hourlySteps_merged.csv',
    'minuteCaloriesNarrow_merged.csv',
    'minuteCaloriesWide_merged.csv',
    'minuteIntensitiesNarrow_merged.csv',
    'minuteIntensitiesWide_merged.csv',
    'minuteMETsNarrow_merged.csv',
    'minuteSleep_merged.csv',
    'minuteStepsNarrow_merged.csv',
    'minuteStepsWide_merged.csv',
    'sleepDay_merged.csv',
    'weightLogInfo_merged.csv'
]

# Dictionary to store clean dataframes
data = {}

for filename in all_files:
    file_path1 = os.path.join(path1, filename)
    file_path2 = os.path.join(path2, filename)
    
    df_list = []
    
    # Check and load from first directory
    if os.path.exists(file_path1):
        df_list.append(pd.read_csv(file_path1))
        
    # Check and load from second directory
    if os.path.exists(file_path2):
        df_list.append(pd.read_csv(file_path2))
    
    # Merge if data exists
    if df_list:
        combined_df = pd.concat(df_list, ignore_index=True)
        
        # Create clean name (remove _merged.csv)
        clean_name = filename.replace('_merged.csv', '')
        data[clean_name] = combined_df
        
        print(f"Loaded {clean_name}: {combined_df.shape}")
    else:
        print(f"Warning: {filename} not found in either directory.")

# Clean up keys for easier access (optional: create individual variables)
# locals().update(data) # Uncomment to create variables like dailyActivity, heartrate_seconds etc.

print("\nAll files loaded successfully into 'data' dictionary.")

Loaded dailyActivity: (1397, 15)
Loaded heartrate_seconds: (3638339, 3)
Loaded hourlyCalories: (46183, 3)
Loaded hourlyIntensities: (46183, 4)
Loaded hourlySteps: (46183, 3)
Loaded minuteCaloriesNarrow: (2770620, 3)
Loaded minuteCaloriesWide: (21645, 62)
Loaded minuteIntensitiesNarrow: (2770620, 3)
Loaded minuteIntensitiesWide: (21645, 62)
Loaded minuteMETsNarrow: (2770620, 3)
Loaded minuteSleep: (387080, 4)
Loaded minuteStepsNarrow: (2770620, 3)
Loaded minuteStepsWide: (21645, 62)
Loaded sleepDay: (413, 5)
Loaded weightLogInfo: (100, 8)

All files loaded successfully into 'data' dictionary.


In [31]:
daily_activity = data['dailyActivity']
daily_activity.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1397 entries, 0 to 1396
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Id                        1397 non-null   int64  
 1   ActivityDate              1397 non-null   object 
 2   TotalSteps                1397 non-null   int64  
 3   TotalDistance             1397 non-null   float64
 4   TrackerDistance           1397 non-null   float64
 5   LoggedActivitiesDistance  1397 non-null   float64
 6   VeryActiveDistance        1397 non-null   float64
 7   ModeratelyActiveDistance  1397 non-null   float64
 8   LightActiveDistance       1397 non-null   float64
 9   SedentaryActiveDistance   1397 non-null   float64
 10  VeryActiveMinutes         1397 non-null   int64  
 11  FairlyActiveMinutes       1397 non-null   int64  
 12  LightlyActiveMinutes      1397 non-null   int64  
 13  SedentaryMinutes          1397 non-null   int64  
 14  Calories

In [32]:
import re
import pandas as pd

def to_snake_case(name):
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()


for col in daily_activity.columns:
    daily_activity.rename(columns={col: to_snake_case(col)}, inplace=True)



In [33]:
daily_activity.describe

<bound method NDFrame.describe of               id activity_date  total_steps  total_distance  tracker_distance  \
0     1503960366     3/25/2016        11004        7.110000          7.110000   
1     1503960366     3/26/2016        17609       11.550000         11.550000   
2     1503960366     3/27/2016        12736        8.530000          8.530000   
3     1503960366     3/28/2016        13231        8.930000          8.930000   
4     1503960366     3/29/2016        12041        7.850000          7.850000   
...          ...           ...          ...             ...               ...   
1392  8877689391      5/8/2016        10686        8.110000          8.110000   
1393  8877689391      5/9/2016        20226       18.250000         18.250000   
1394  8877689391     5/10/2016        10733        8.150000          8.150000   
1395  8877689391     5/11/2016        21420       19.559999         19.559999   
1396  8877689391     5/12/2016         8064        6.120000          6.1200

In [35]:
import dtale 

In [36]:
d = dtale.show(daily_activity)
d.open_browser()

In [41]:
heart_rate_seconds= data['heartrate_seconds']
heart_rate_seconds.head()

Unnamed: 0,Id,Time,Value
0,2022484408,4/1/2016 7:54:00 AM,93
1,2022484408,4/1/2016 7:54:05 AM,91
2,2022484408,4/1/2016 7:54:10 AM,96
3,2022484408,4/1/2016 7:54:15 AM,98
4,2022484408,4/1/2016 7:54:20 AM,100


In [43]:
for x in data.keys():
    print(x)
    print("\n")
    print(data[x].head())

dailyActivity


           id activity_date  total_steps  total_distance  tracker_distance  \
0  1503960366     3/25/2016        11004            7.11              7.11   
1  1503960366     3/26/2016        17609           11.55             11.55   
2  1503960366     3/27/2016        12736            8.53              8.53   
3  1503960366     3/28/2016        13231            8.93              8.93   
4  1503960366     3/29/2016        12041            7.85              7.85   

   logged_activities_distance  very_active_distance  \
0                         0.0                  2.57   
1                         0.0                  6.92   
2                         0.0                  4.66   
3                         0.0                  3.19   
4                         0.0                  2.16   

   moderately_active_distance  light_active_distance  \
0                        0.46                   4.07   
1                        0.73                   3.91   
2               

In [44]:
daily_activity = data['dailyActivity']
daily_calories = data['hourlyCalories']
daily_intensity = data['hourlyIntensities']
daily_weight = data['weightLogInfo']

2026-01-08 15:14:48,628 - INFO     - Executing shutdown due to inactivity...
2026-01-08 15:14:48,894 - INFO     - Executing shutdown...
2026-01-08 15:14:48,894 - INFO     - Not running with the Werkzeug Server, exiting by searching gc for BaseWSGIServer
