In [1]:
import json
import os,glob,shutil
import re
import datetime, time
import math

# 3rd party classes
import numpy as np
import pandas as pd



# Read in workout data and load to data frames

In [2]:
def get_workout_data(curr_dir):
    """
    Parses files and directories in the passed curr_dir till finds a file
    that ends in rungap.json. Then loads that json data to a Dictionary.
    Returns the Dictionary storing the json data.
    """
    data = ''
    jsonFileRegex = re.compile(r'(rungap.json)$')
    jsonExtRegex = re.compile(r'(.json)$')

    for filename in os.listdir(curr_dir):
        if jsonFileRegex.search(filename):
            with open(os.path.join(curr_dir, filename)) as data_file:
                data = json.load(data_file)
                break
    return data


In [3]:
workout_dir = '/Users/mike/Library/Mobile Documents/iCloud~com~rungap~RunGap/Documents/Export_test/2023-09-24_11-14-10_hk_1695572050'
data = get_workout_data(workout_dir)

In [4]:
activityPts = data['laps'][0]['points']
eventTyps = data['events']

# Initialize Pandas DataFrames
df_events = pd.DataFrame(eventTyps)
df_activity = pd.DataFrame(activityPts)



In [5]:
df_events['start_dttm'] = pd.to_datetime(df_events['start'], unit='s')
df_events['end_dttm'] = pd.to_datetime(df_events['end'], unit='s')

In [6]:
df_activity['dttm'] = pd.to_datetime(df_activity['time'], unit='s')


In [7]:
print(df_events.head())
print(df_activity.head())

      type       start         end          start_dttm            end_dttm
0  segment  1695572051  1695572238 2023-09-24 16:14:11 2023-09-24 16:17:18
1  segment  1695572051  1695572344 2023-09-24 16:14:11 2023-09-24 16:19:04
2  segment  1695572051  1695573369 2023-09-24 16:14:11 2023-09-24 16:36:09
3  segment  1695572238  1695572407 2023-09-24 16:17:18 2023-09-24 16:20:07
4  segment  1695572344  1695572600 2023-09-24 16:19:04 2023-09-24 16:23:20
         va        lat        ha        lon        time         ele      dist  \
0  1.246552  40.608390  1.572949 -89.442004  1695572054  211.038954       NaN   
1  1.112878  40.608406  1.413646 -89.442014  1695572055  211.073297       NaN   
2  0.979557  40.608427  1.321864 -89.442029  1695572056  211.113711  2.468179   
3  0.874906  40.608451  1.220179 -89.442048  1695572057  211.153140  5.545458   
4  0.797835  40.608476  1.121695 -89.442073  1695572058  211.190907  9.011913   

     hr  kcal                dttm  
0   NaN   NaN 2023-09-24 16

# Cleanup values
- Forward fill distance and elevation

In [8]:
activity = df_activity.copy()

In [9]:
activity['dist'].ffill(inplace=True)
activity['ele'].ffill(inplace=True)

In [10]:
activity.tail()

Unnamed: 0,va,lat,ha,lon,time,ele,dist,hr,kcal,dttm
2266,1.065806,40.608591,1.749599,-89.442267,1695574356,212.426673,13317.180777,,0.182,2023-09-24 16:52:36
2267,1.165122,40.608547,1.855758,-89.442224,1695574357,212.422056,13323.17618,,,2023-09-24 16:52:37
2268,1.288762,40.608506,2.033962,-89.442177,1695574358,212.418334,13329.50501,93.0,,2023-09-24 16:52:38
2269,1.437751,40.608468,2.34785,-89.442127,1695574359,212.415016,13335.497124,,,2023-09-24 16:52:39
2270,,,,,1695574360,212.415016,13335.497124,,0.276,2023-09-24 16:52:40


# Merge Pause and Lap splits from df_events into df_activity

Get start time as int and date/time
This is needed for setting first marker

In [11]:
wrkt_start = activity['time'].min()
wrkt_start_dttm = activity['dttm'].min()
print(wrkt_start)
print(wrkt_start_dttm)

1695572054
2023-09-24 16:14:14


## Mark records in Activity when paused

In [12]:
event_pause_resume = df_events.loc[df_events['type'].isin(['pause','resume'])].copy()
event_pause_resume.drop(['end','end_dttm'], axis=1, inplace=True)

Merge pause row with resume row below it to get the start to end time of pause

In [13]:
event_pause_resume['end'] = event_pause_resume['start'].shift(-1)
event_pause_resume.dropna(subset=['end'],inplace=True)
event_pause_resume['end'] = event_pause_resume['end'].astype('int64')
event_pause_resume['end_dttm'] = pd.to_datetime(event_pause_resume['end'], unit='s')
event_pause_resume['dur'] = event_pause_resume['end'] - event_pause_resume['start']

In [14]:
event_pause_times = event_pause_resume = event_pause_resume.loc[event_pause_resume['type'].isin(['pause'])].copy()
event_pause_times = event_pause_times.reset_index(drop=True)
event_pause_times.index = event_pause_times.index+1

In [15]:
# event_pause_times.to_csv('/Users/mike/Downloads/event_pause_times.csv')

In [16]:
pause_range = event_pause_times[['start','end']].values.tolist()
print(pause_range)
pause_choices = event_pause_times.index.values.tolist()
print(pause_choices)

[[1695573390, 1695573402], [1695573722, 1695573749], [1695573996, 1695573997], [1695573998, 1695574007]]
[1, 2, 3, 4]


In [17]:
activity_pause_conditions = []
for i in range(len(pause_range)):
    condition = activity['time'].ge(pause_range[i][0]) & activity['time'].le(pause_range[i][1])
    activity_pause_conditions.append(condition)
activity['pause'] = np.select(activity_pause_conditions, pause_choices)

In [18]:
activity['pause'].value_counts()

pause
0    2254
2       9
1       4
4       3
3       1
Name: count, dtype: int64

## Merge Resume Splits into Activity

In [19]:
event_pause = df_events.loc[df_events['type'].isin(['pause'])].copy()
event_pause = event_pause.reset_index(drop=True)
event_pause.head()

Unnamed: 0,type,start,end,start_dttm,end_dttm
0,pause,1695573390,1695573390,2023-09-24 16:36:30,2023-09-24 16:36:30
1,pause,1695573722,1695573722,2023-09-24 16:42:02,2023-09-24 16:42:02
2,pause,1695573996,1695573996,2023-09-24 16:46:36,2023-09-24 16:46:36
3,pause,1695573998,1695573998,2023-09-24 16:46:38,2023-09-24 16:46:38
4,pause,1695574360,1695574360,2023-09-24 16:52:40,2023-09-24 16:52:40


In [20]:
activity.head()

Unnamed: 0,va,lat,ha,lon,time,ele,dist,hr,kcal,dttm,pause
0,1.246552,40.60839,1.572949,-89.442004,1695572054,211.038954,,,,2023-09-24 16:14:14,0
1,1.112878,40.608406,1.413646,-89.442014,1695572055,211.073297,,,,2023-09-24 16:14:15,0
2,0.979557,40.608427,1.321864,-89.442029,1695572056,211.113711,2.468179,65.0,,2023-09-24 16:14:16,0
3,0.874906,40.608451,1.220179,-89.442048,1695572057,211.15314,5.545458,,,2023-09-24 16:14:17,0
4,0.797835,40.608476,1.121695,-89.442073,1695572058,211.190907,9.011913,,,2023-09-24 16:14:18,0


In [21]:
pause_conditions = event_pause['start'].tolist()
print(pause_conditions)
pause_choices = event_pause.index.values.tolist()
print(pause_choices)

[1695573390, 1695573722, 1695573996, 1695573998, 1695574360]
[0, 1, 2, 3, 4]


In [22]:
activity_pause_conditions = []
for i in range(len(pause_conditions)-1):
    condition = activity['time'].ge(pause_conditions[i]) & activity['time'].lt(pause_conditions[i+1])
    activity_pause_conditions.append(condition)
activity_pause_conditions.append(activity['time'].ge(pause_conditions[-1]))
activity['resume'] = np.select(activity_pause_conditions, pause_choices)

In [23]:
activity.head()

Unnamed: 0,va,lat,ha,lon,time,ele,dist,hr,kcal,dttm,pause,resume
0,1.246552,40.60839,1.572949,-89.442004,1695572054,211.038954,,,,2023-09-24 16:14:14,0,0
1,1.112878,40.608406,1.413646,-89.442014,1695572055,211.073297,,,,2023-09-24 16:14:15,0,0
2,0.979557,40.608427,1.321864,-89.442029,1695572056,211.113711,2.468179,65.0,,2023-09-24 16:14:16,0,0
3,0.874906,40.608451,1.220179,-89.442048,1695572057,211.15314,5.545458,,,2023-09-24 16:14:17,0,0
4,0.797835,40.608476,1.121695,-89.442073,1695572058,211.190907,9.011913,,,2023-09-24 16:14:18,0,0


## Merge marker/lap into activity

In [24]:
df_laps = df_events.loc[df_events['type'] =='marker'].reset_index(drop=True)

In [25]:
df_laps.loc[-1] = ['marker', wrkt_start, wrkt_start, wrkt_start_dttm, wrkt_start_dttm] # add first marker at start of workout
df_laps.index = df_laps.index +1 #shifting index
df_laps = df_laps.sort_index() # sorting on index

In [26]:
df_laps.head()

Unnamed: 0,type,start,end,start_dttm,end_dttm
0,marker,1695572054,1695572054,2023-09-24 16:14:14,2023-09-24 16:14:14
1,marker,1695573129,1695573129,2023-09-24 16:32:09,2023-09-24 16:32:09


In [27]:
lap_conditions = df_laps['start'].tolist()
print(lap_conditions)
lap_choices = df_laps.index.values.tolist()
print(lap_choices)

[1695572054, 1695573129]
[0, 1]


In [28]:
activity_lap_conditions = []
for i in range(len(lap_conditions)-1):
    condition = activity['time'].ge(lap_conditions[i]) & activity['time'].lt(lap_conditions[i+1])
    activity_lap_conditions.append(condition)
activity_lap_conditions.append(activity['time'].ge(lap_conditions[-1]))
activity['lap'] = np.select(activity_lap_conditions, lap_choices)

In [29]:
activity.tail()

Unnamed: 0,va,lat,ha,lon,time,ele,dist,hr,kcal,dttm,pause,resume,lap
2266,1.065806,40.608591,1.749599,-89.442267,1695574356,212.426673,13317.180777,,0.182,2023-09-24 16:52:36,0,3,1
2267,1.165122,40.608547,1.855758,-89.442224,1695574357,212.422056,13323.17618,,,2023-09-24 16:52:37,0,3,1
2268,1.288762,40.608506,2.033962,-89.442177,1695574358,212.418334,13329.50501,93.0,,2023-09-24 16:52:38,0,3,1
2269,1.437751,40.608468,2.34785,-89.442127,1695574359,212.415016,13335.497124,,,2023-09-24 16:52:39,0,3,1
2270,,,,,1695574360,212.415016,13335.497124,,0.276,2023-09-24 16:52:40,0,4,1


# Get mile/kilometer splits

In [30]:
MILES_IN_KILOMETERS = 0.621371
METERS_IN_KILOMETERS = 1000
METERS_TO_FEET = 3.28084

In [31]:
activity_df = activity.copy()

In [32]:
activity = activity_df.copy()

In [33]:
activity.head()

Unnamed: 0,va,lat,ha,lon,time,ele,dist,hr,kcal,dttm,pause,resume,lap
0,1.246552,40.60839,1.572949,-89.442004,1695572054,211.038954,,,,2023-09-24 16:14:14,0,0,0
1,1.112878,40.608406,1.413646,-89.442014,1695572055,211.073297,,,,2023-09-24 16:14:15,0,0,0
2,0.979557,40.608427,1.321864,-89.442029,1695572056,211.113711,2.468179,65.0,,2023-09-24 16:14:16,0,0,0
3,0.874906,40.608451,1.220179,-89.442048,1695572057,211.15314,5.545458,,,2023-09-24 16:14:17,0,0,0
4,0.797835,40.608476,1.121695,-89.442073,1695572058,211.190907,9.011913,,,2023-09-24 16:14:18,0,0,0


In [34]:
print(2.468179 / METERS_IN_KILOMETERS * MILES_IN_KILOMETERS)

0.0015336548534090001


In [35]:
activity.rename(columns={'dist':'dist_m'},inplace=True)
activity['dist_m'].fillna(0, inplace=True)
activity['dist_km'] = activity['dist_m'] / METERS_IN_KILOMETERS
activity['dist_mi'] = activity['dist_km'] * MILES_IN_KILOMETERS

In [36]:
activity['delta_dist_mi'] = activity['dist_mi']-activity['dist_mi'].shift(+1)
activity['delta_dist_km'] = activity['dist_km']-activity['dist_km'].shift(+1)
activity['delta_dist_mi'].fillna(0, inplace=True)
activity['delta_dist_km'].fillna(0, inplace=True)

In [37]:
activity[['lat','lon','time','dttm','ele','dist_m','dist_km','dist_mi','resume','lap']].tail()

Unnamed: 0,lat,lon,time,dttm,ele,dist_m,dist_km,dist_mi,resume,lap
2266,40.608591,-89.442267,1695574356,2023-09-24 16:52:36,212.426673,13317.180777,13.317181,8.27491,3,1
2267,40.608547,-89.442224,1695574357,2023-09-24 16:52:37,212.422056,13323.17618,13.323176,8.278635,3,1
2268,40.608506,-89.442177,1695574358,2023-09-24 16:52:38,212.418334,13329.50501,13.329505,8.282568,3,1
2269,40.608468,-89.442127,1695574359,2023-09-24 16:52:39,212.415016,13335.497124,13.335497,8.286291,3,1
2270,,,1695574360,2023-09-24 16:52:40,212.415016,13335.497124,13.335497,8.286291,4,1


In [38]:
# Get mile number
i = 1
conditions = [activity['dist_mi'].lt(i)]
choices = [i]
while i <= math.ceil(activity['dist_mi'].max()):
    conditions.append(activity['dist_mi'].ge(i) & activity['dist_mi'].lt(i+1))
    choices.append(i+1)
    i=i+1
activity['mile'] = np.select(conditions, choices, default=0)

# Get Kilometer number
i = 1
conditions = [activity['dist_km'].lt(i)]
choices = [i]
while i <= math.ceil(activity['dist_km'].max()):
    conditions.append(activity['dist_km'].ge(i) & activity['dist_km'].lt(i+1))
    choices.append(i+1)
    i=i+1
activity['kilometer'] = np.select(conditions, choices, default=0)


In [39]:
activity[['lat','lon','dttm','dist_m','dist_km','dist_mi','resume','lap', 'mile','kilometer']].head()

Unnamed: 0,lat,lon,dttm,dist_m,dist_km,dist_mi,resume,lap,mile,kilometer
0,40.60839,-89.442004,2023-09-24 16:14:14,0.0,0.0,0.0,0,0,1,1
1,40.608406,-89.442014,2023-09-24 16:14:15,0.0,0.0,0.0,0,0,1,1
2,40.608427,-89.442029,2023-09-24 16:14:16,2.468179,0.002468,0.001534,0,0,1,1
3,40.608451,-89.442048,2023-09-24 16:14:17,5.545458,0.005545,0.003446,0,0,1,1
4,40.608476,-89.442073,2023-09-24 16:14:18,9.011913,0.009012,0.0056,0,0,1,1


# Get elevation changes

In [40]:
activity.rename(columns={'ele':'altitude_m'},inplace=True)
activity['altitude_m'].fillna(0, inplace=True)
activity['altitude_ft'] = activity['altitude_m'] * METERS_TO_FEET

activity['delta_ele_ft'] = activity['altitude_ft']-activity['altitude_ft'].shift(+1)
activity['delta_ele_ft'].fillna(0, inplace=True)

activity['ele_up'] = activity[activity['delta_ele_ft']>0]['delta_ele_ft']
activity['ele_down'] = activity[activity['delta_ele_ft']<0]['delta_ele_ft']


In [41]:
activity[['lat','lon','dttm','altitude_m','altitude_ft','delta_ele_ft','ele_up','ele_down']].head()

Unnamed: 0,lat,lon,dttm,altitude_m,altitude_ft,delta_ele_ft,ele_up,ele_down
0,40.60839,-89.442004,2023-09-24 16:14:14,211.038954,692.385043,0.0,,
1,40.608406,-89.442014,2023-09-24 16:14:15,211.073297,692.497715,0.112671,0.112671,
2,40.608427,-89.442029,2023-09-24 16:14:16,211.113711,692.630306,0.132591,0.132591,
3,40.608451,-89.442048,2023-09-24 16:14:17,211.15314,692.759669,0.129363,0.129363,
4,40.608476,-89.442073,2023-09-24 16:14:18,211.190907,692.883576,0.123907,0.123907,


# Drop records that are part of pause, then remove pause column

In [42]:
remove_rows_index = activity[activity['pause'] != 0].index
activity.drop(remove_rows_index, inplace=True)
activity.drop(columns=['pause'], inplace=True)

# Get duration of each record

In [43]:
activity.reset_index(drop=True, inplace=True)
activity['dur_sec'] = activity.index.values

# Normalize field names

In [44]:
activity.rename(columns={'lat':'latitude','lon':'longitude','dttm':'timestamp'},inplace=True)

In [45]:
activity.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2254 entries, 0 to 2253
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   va             2253 non-null   float64       
 1   latitude       2253 non-null   float64       
 2   ha             2253 non-null   float64       
 3   longitude      2253 non-null   float64       
 4   time           2254 non-null   int64         
 5   altitude_m     2254 non-null   float64       
 6   dist_m         2254 non-null   float64       
 7   hr             451 non-null    float64       
 8   kcal           874 non-null    float64       
 9   timestamp      2254 non-null   datetime64[ns]
 10  resume         2254 non-null   int64         
 11  lap            2254 non-null   int64         
 12  dist_km        2254 non-null   float64       
 13  dist_mi        2254 non-null   float64       
 14  delta_dist_mi  2254 non-null   float64       
 15  delta_dist_km  2254 n

In [46]:
activity[['latitude','longitude','timestamp','altitude_m','altitude_ft','delta_ele_ft','ele_up','ele_down']].tail()

Unnamed: 0,latitude,longitude,timestamp,altitude_m,altitude_ft,delta_ele_ft,ele_up,ele_down
2249,40.608591,-89.442267,2023-09-24 16:52:36,212.426673,696.937925,-0.020255,,-0.020255
2250,40.608547,-89.442224,2023-09-24 16:52:37,212.422056,696.922777,-0.015148,,-0.015148
2251,40.608506,-89.442177,2023-09-24 16:52:38,212.418334,696.910566,-0.012211,,-0.012211
2252,40.608468,-89.442127,2023-09-24 16:52:39,212.415016,696.899682,-0.010884,,-0.010884
2253,,,2023-09-24 16:52:40,212.415016,696.899682,0.0,,


# Export data

In [47]:
activity.to_csv('/Users/mike/Downloads/activity_hk.csv')
activity.to_pickle('/Users/mike/Downloads/activity_hk.pickle')