# Apple Watch Data - All Users Quick View

Simple notebook to load and view raw workout data from all 3 users.

In [1]:
import sys
from pathlib import Path
import pandas as pd

# Add parser to path
sys.path.append(str(Path.cwd()))
from parse_apple_health import AppleHealthParser

## User 1 Data

In [None]:
# Load User1 workouts
base_dir_1 = Path("../../DATA/CUSTOM_DATA/apple_health_export_User1")
parser_1 = AppleHealthParser(str(base_dir_1 / "export.xml"), str(base_dir_1))
workouts_1 = parser_1.parse_workouts()

# Convert to DataFrame
df_user1 = pd.DataFrame([{
    'workout_id': w.workout_id,
    'date': w.start_date,
    'duration_min': w.duration_min,
    'distance_km': w.total_distance_km,
    'energy_kcal': w.total_energy_kcal,
    'elevation_m': w.elevation_ascended_m,
    'hr_avg': w.hr_avg,
    'hr_min': w.hr_min,
    'hr_max': w.hr_max,
    'has_gpx': w.gpx_file_path is not None
} for w in workouts_1])

print(f"User 1: {len(df_user1)} workouts")

In [4]:
df_user1.head(50)

Unnamed: 0,workout_id,date,duration_min,distance_km,energy_kcal,elevation_m,hr_avg,hr_min,hr_max,has_gpx
0,workout_20190814_135800,2019-08-14 13:58:00,12.92782,2.16432,123.912,9.56,,,,True
1,workout_20190814_141203,2019-08-14 14:12:03,18.649622,5.04219,260.433,,,,,True
2,workout_20190814_143651,2019-08-14 14:36:51,15.524642,3.45875,182.661,3.11,,,,True
3,workout_20190814_150622,2019-08-14 15:06:22,11.450354,2.44507,137.074,4.81,,,,True
4,workout_20190817_103430,2019-08-17 10:34:30,35.009897,7.55926,434.789,4.01,,,,True
5,workout_20190818_185610,2019-08-18 18:56:10,10.314462,2.5089,126.669,2.0,,,,True
6,workout_20190821_092234,2019-08-21 09:22:34,12.710597,2.19068,135.575,3.44,,,,True
7,workout_20190821_094014,2019-08-21 09:40:14,4.125151,1.24385,61.9912,,,,,True
8,workout_20190821_094832,2019-08-21 09:48:32,7.632395,1.81391,92.1529,,,,,True
9,workout_20190821_100019,2019-08-21 10:00:19,3.202936,0.88449,44.0846,,,,,True


In [3]:
# User 1 summary
df_user1.describe()

Unnamed: 0,date,duration_min,distance_km,energy_kcal,elevation_m,hr_avg,hr_min,hr_max
count,285,285.0,285.0,263.0,213.0,150.0,150.0,150.0
mean,2023-02-23 02:17:55.631578880,79.841568,8.51084,571.016505,95.439202,161.356473,113.206667,179.853333
min,2019-08-14 13:58:00,0.141609,0.0,0.36017,0.97,137.242,48.0,150.0
25%,2020-06-27 09:47:24,24.766692,5.03066,292.256,16.0,154.1015,99.0,174.0
50%,2024-11-26 17:45:11,49.634722,8.27908,570.513,63.93,162.8985,111.0,181.0
75%,2025-05-15 11:18:50,65.619146,11.0061,749.6075,148.68,168.7745,127.0,188.0
max,2025-11-23 10:37:25,4619.266667,41.9186,3182.85,387.53,185.546,161.0,193.0
std,,329.055168,5.172021,383.45272,94.158257,9.999852,20.55482,9.264486


## User 2 Data

In [5]:
# Load User2 workouts
base_dir_2 = Path("../../DATA/CUSTOM_DATA/apple_health_export_User2")
parser_2 = AppleHealthParser(str(base_dir_2 / "export.xml"), str(base_dir_2))
workouts_2 = parser_2.parse_workouts()

# Convert to DataFrame
df_user2 = pd.DataFrame([{
    'workout_id': w.workout_id,
    'date': w.start_date,
    'duration_min': w.duration_min,
    'distance_km': w.total_distance_km,
    'energy_kcal': w.total_energy_kcal,
    'elevation_m': w.elevation_ascended_m,
    'hr_avg': w.hr_avg,
    'hr_min': w.hr_min,
    'hr_max': w.hr_max,
    'has_gpx': w.gpx_file_path is not None
} for w in workouts_2])

print(f"User 2: {len(df_user2)} workouts")

Parsing workouts from ../../DATA/CUSTOM_DATA/apple_health_export_User2/export.xml...
  Parsed 50 running workouts...
  Parsed 100 running workouts...
  Parsed 150 running workouts...
  Parsed 200 running workouts...
✓ Found 209 running workouts
User 2: 209 workouts


In [6]:
df_user2.head(10)

Unnamed: 0,workout_id,date,duration_min,distance_km,energy_kcal,elevation_m,hr_avg,hr_min,hr_max,has_gpx
0,workout_20240716_170749,2024-07-16 17:07:49,25.187642,3.53417,184.311,,172.909,116.0,196.0,True
1,workout_20240719_083608,2024-07-19 08:36:08,28.983677,3.87763,202.48,3.84,170.887,133.0,196.0,True
2,workout_20240720_091140,2024-07-20 09:11:40,26.957881,4.21128,220.856,,181.267,151.0,191.0,True
3,workout_20240722_154523,2024-07-22 15:45:23,26.79535,4.39092,219.914,5.59,182.747,148.0,195.0,True
4,workout_20240723_171429,2024-07-23 17:14:29,26.104116,4.23607,208.777,17.19,175.521,127.0,192.0,True
5,workout_20240724_093257,2024-07-24 09:32:57,24.008016,3.80784,191.192,16.77,179.199,150.0,191.0,True
6,workout_20240813_060628,2024-08-13 06:06:28,23.136562,3.78911,174.623,2.56,184.563,145.0,196.0,True
7,workout_20240818_164037,2024-08-18 16:40:37,20.240308,3.37869,166.174,,183.091,105.0,195.0,False
8,workout_20240819_091113,2024-08-19 09:11:13,20.367644,3.51269,169.09,16.55,181.568,127.0,192.0,True
9,workout_20240821_085526,2024-08-21 08:55:26,3.279108,0.541638,25.4352,8.59,,,,True


In [7]:
# User 2 summary
df_user2.describe()

Unnamed: 0,date,duration_min,distance_km,energy_kcal,elevation_m,hr_avg,hr_min,hr_max
count,209,209.0,209.0,201.0,128.0,199.0,199.0,199.0
mean,2025-04-24 02:09:56.387559424,57.451273,6.709595,331.987197,23.495859,166.367071,112.683417,181.718593
min,2024-07-16 17:07:49,1.445512,0.004965,1.08521,2.0,75.0,50.0,75.0
25%,2024-12-20 14:37:18,26.975963,4.55493,216.884,7.265,154.056,90.0,167.0
50%,2025-05-23 14:32:05,40.183339,6.06506,293.883,14.57,169.673,115.0,189.0
75%,2025-08-31 17:59:40,54.85,8.66707,424.63,27.185,180.159,133.0,195.0
max,2025-11-25 10:24:06,3053.333333,19.8413,996.494,294.55,191.775,175.0,206.0
std,,209.49101,3.447216,176.276991,36.932699,18.172597,25.982031,17.403158


## User 3 Data

In [8]:
# Load User3 workouts
base_dir_3 = Path("../../DATA/CUSTOM_DATA/apple_health_export_User3")
parser_3 = AppleHealthParser(str(base_dir_3 / "export.xml"), str(base_dir_3))
workouts_3 = parser_3.parse_workouts()

# Convert to DataFrame
df_user3 = pd.DataFrame([{
    'workout_id': w.workout_id,
    'date': w.start_date,
    'duration_min': w.duration_min,
    'distance_km': w.total_distance_km,
    'energy_kcal': w.total_energy_kcal,
    'elevation_m': w.elevation_ascended_m,
    'hr_avg': w.hr_avg,
    'hr_min': w.hr_min,
    'hr_max': w.hr_max,
    'has_gpx': w.gpx_file_path is not None
} for w in workouts_3])

print(f"User 3: {len(df_user3)} workouts")

Parsing workouts from ../../DATA/CUSTOM_DATA/apple_health_export_User3/export.xml...
  Parsed 50 running workouts...
  Parsed 100 running workouts...
✓ Found 120 running workouts
User 3: 120 workouts


In [9]:
df_user3.head(10)

Unnamed: 0,workout_id,date,duration_min,distance_km,energy_kcal,elevation_m,hr_avg,hr_min,hr_max,has_gpx
0,workout_20230219_181423,2023-02-19 18:14:23,33.416667,4.9813,,,,,,True
1,workout_20230220_180957,2023-02-20 18:09:57,29.116667,5.2586,,,,,,True
2,workout_20230228_180702,2023-02-28 18:07:02,35.366667,6.45986,,,,,,True
3,workout_20230303_170708,2023-03-03 17:07:08,39.55,5.4713,,,,,,True
4,workout_20230305_183943,2023-03-05 18:39:43,56.966667,9.98882,,,,,,False
5,workout_20230306_181328,2023-03-06 18:13:28,60.716667,11.5353,,,,,,True
6,workout_20230313_170856,2023-03-13 17:08:56,28.06126,5.15341,334.554,37.6,,,,True
7,workout_20230317_180229,2023-03-17 18:02:29,38.0,7.0,,,,,,False
8,workout_20230319_090236,2023-03-19 09:02:36,54.630465,9.41845,602.033,28.18,,,,True
9,workout_20230321_181510,2023-03-21 18:15:10,29.639532,5.69542,357.254,7.64,,,,True


In [10]:
# User 3 summary
df_user3.describe()

Unnamed: 0,date,duration_min,distance_km,energy_kcal,elevation_m
count,120,120.0,120.0,113.0,103.0
mean,2024-07-26 04:25:29.575000320,42.765822,6.876092,443.505865,20.43466
min,2023-02-19 18:14:23,1.636899,0.074835,6.141,0.76
25%,2023-11-01 21:10:30.750000128,29.331756,5.034572,320.594,8.295
50%,2024-10-20 12:17:27,36.93109,6.077855,383.406,13.47
75%,2025-04-12 06:43:55,52.631598,8.301388,545.365,19.105
max,2025-09-07 16:16:42,251.078676,31.3421,2209.18,229.44
std,,28.740554,4.101309,284.553331,30.349198


## Combined Summary

In [11]:
# Add user column and combine
df_user1['user'] = 'User1'
df_user2['user'] = 'User2'
df_user3['user'] = 'User3'

df_all = pd.concat([df_user1, df_user2, df_user3], ignore_index=True)

print(f"\nTotal workouts across all users: {len(df_all)}")
print(f"\nWorkouts per user:")
print(df_all['user'].value_counts())

df_all.head(20)


Total workouts across all users: 614

Workouts per user:
user
User1    285
User2    209
User3    120
Name: count, dtype: int64


Unnamed: 0,workout_id,date,duration_min,distance_km,energy_kcal,elevation_m,hr_avg,hr_min,hr_max,has_gpx,user
0,workout_20190814_135800,2019-08-14 13:58:00,12.92782,2.16432,123.912,9.56,,,,True,User1
1,workout_20190814_141203,2019-08-14 14:12:03,18.649622,5.04219,260.433,,,,,True,User1
2,workout_20190814_143651,2019-08-14 14:36:51,15.524642,3.45875,182.661,3.11,,,,True,User1
3,workout_20190814_150622,2019-08-14 15:06:22,11.450354,2.44507,137.074,4.81,,,,True,User1
4,workout_20190817_103430,2019-08-17 10:34:30,35.009897,7.55926,434.789,4.01,,,,True,User1
5,workout_20190818_185610,2019-08-18 18:56:10,10.314462,2.5089,126.669,2.0,,,,True,User1
6,workout_20190821_092234,2019-08-21 09:22:34,12.710597,2.19068,135.575,3.44,,,,True,User1
7,workout_20190821_094014,2019-08-21 09:40:14,4.125151,1.24385,61.9912,,,,,True,User1
8,workout_20190821_094832,2019-08-21 09:48:32,7.632395,1.81391,92.1529,,,,,True,User1
9,workout_20190821_100019,2019-08-21 10:00:19,3.202936,0.88449,44.0846,,,,,True,User1


In [12]:
# Summary by user
df_all.groupby('user').agg({
    'workout_id': 'count',
    'duration_min': ['mean', 'sum'],
    'distance_km': ['mean', 'sum'],
    'hr_avg': 'mean',
    'has_gpx': 'sum'
}).round(2)

Unnamed: 0_level_0,workout_id,duration_min,duration_min,distance_km,distance_km,hr_avg,has_gpx
Unnamed: 0_level_1,count,mean,sum,mean,sum,mean,sum
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
User1,285,79.84,22754.85,8.51,2425.59,161.36,271
User2,209,57.45,12007.32,6.71,1402.31,166.37,191
User3,120,42.77,5131.9,6.88,825.13,,118


## Export DataFrames

Save to CSV for data wrangler or other tools

In [None]:
# Save individual user data
output_dir = Path("outputs/quick_view")
output_dir.mkdir(parents=True, exist_ok=True)

df_user1.to_csv(output_dir / "user1_workouts.csv", index=False)
df_user2.to_csv(output_dir / "user2_workouts.csv", index=False)
df_user3.to_csv(output_dir / "user3_workouts.csv", index=False)
df_all.to_csv(output_dir / "all_users_workouts.csv", index=False)

print(f"✓ Saved CSVs to: {output_dir}")