In [1]:
import pandas as pd
import json
import os
import xml.etree.ElementTree as ET
import requests
from zipfile import ZipFile
from dateutil import parser

## Get Data

In [2]:
data_loc = os.path.join(os.getcwd(), 'data')
hr_data  = os.path.join(data_loc, 'hr')
gps_data = os.path.join(data_loc, 'gps')

## Read Heartrate Data

In [3]:
df_list  = []
    
for json_file in os.listdir(hr_data):
    with open(os.path.join(hr_data, json_file)) as f:
        data = json.load(f)
        
    df = pd.json_normalize(data, sep='_')
    df.columns = df.columns.str.lower().str.replace('value_', '')
    df['datetime'] = ( pd.to_datetime(df['datetime'], format='%m/%d/%y %H:%M:%S', utc=True)
                         .dt.tz_localize(None)
                     )
    df_list.append(df)
        
df_hr = pd.concat(df_list, ignore_index=True).drop_duplicates(subset=['datetime'], ignore_index=True)

In [4]:
df_hr

Unnamed: 0,datetime,bpm,confidence
0,2024-01-25 07:00:03,77,2
1,2024-01-25 07:00:08,73,3
2,2024-01-25 07:00:13,72,2
3,2024-01-25 07:00:18,81,2
4,2024-01-25 07:00:23,89,2
...,...,...,...
160221,2025-03-16 02:56:21,111,1
160222,2025-03-16 02:56:23,112,1
160223,2025-03-16 02:56:25,114,1
160224,2025-03-16 02:56:27,120,1


## Read GPS Data

In [5]:
''' Read GPS data in GPX format without needing to import a separate GPX 
    package. GPX data looks like this:
        
    <?xml version="1.0" encoding="UTF-8"?>
    <gpx xmlns="http://www.topografix.com/GPX/1/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:gte="http://www.gpstrackeditor.com/xmlschemas/General/1" xsi:schemaLocation="http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd" version="1.1" creator="Slopes for Android - http://getslopes.com">
      <trk>
        <name>Jan 25, 2024 - Keystone Resort</name>
        <trkseg>
          <trkpt lat="39.605675" lon="-105.941414">
            <ele>2856.891977</ele>
            <time>2024-01-25T09:13:52.453-07:00</time>
            <hdop>19</hdop>
            <vdop>4</vdop>
            <extensions>
              <gte:gps speed="1.317580" azimuth="212.300003"/>
            </extensions>
          </trkpt>
       </trkseg>
      </trk>
    </gpx>
    
    There are two namespaces we need to use:
        1. The gpx namespace: http://www.topografix.com/GPX/1/1
        2. The gte namespace http://www.gpstrackeditor.com/xmlschemas/General/1
        
    The gte namespace is used to extract gps and azimuth data from the 
    <extensions> tag
'''
gpx_namespace = '{http://www.topografix.com/GPX/1/1}'
gte_namespace = '{http://www.gpstrackeditor.com/xmlschemas/General/1}'
    
all_gps_data = []
file_list    = [file_name for file_name in os.listdir(gps_data) if file_name.endswith(".gpx")]
    
for gpx_file in file_list:
  with open(os.path.join(gps_data, gpx_file)) as f:
    root = ET.parse(f)
        
    for trkpt in root.findall(f'.//{gpx_namespace}trkpt'):
        row = {
                "datetime":  parser.parse(trkpt.find(f'{gpx_namespace}time').text, ignoretz=True),
                "lat":       float(trkpt.get("lat")),
                "lon":       float(trkpt.get("lon")),
                "elevation": float(trkpt.find(f'{gpx_namespace}ele').text),
                "speed":     float(trkpt.find(f'.//{gpx_namespace}extensions/{gte_namespace}gps').get("speed")),
                "azimuth":   float(trkpt.find(f'.//{gpx_namespace}extensions/{gte_namespace}gps').get("azimuth"))
              }
        
        all_gps_data.append(row)

df_gps = pd.DataFrame(all_gps_data).drop_duplicates(subset=['datetime'], ignore_index=True)

## Read GPS Metadata

In [6]:
# Read in GPS metadata. This will help us more easily define runs and lifts
# and also give us some additonal information if we want to use it
   
df_list   = []
file_list = [file_name for file_name in os.listdir(gps_data) if file_name.endswith(".slopes")]
    
# .slopes files are just zip files with some CSVs and XML metadata.
# We just want to read Metadata.xml
for slopes_file in file_list:
    with ZipFile(os.path.join(gps_data, slopes_file), 'r') as zip_file:
        with zip_file.open('Metadata.xml') as xml_file:
            df = pd.read_xml(xml_file, parser='etree', xpath='.//Action')
            
    # Convert start/end to datetimes without the timezone
    df[['start', 'end']] = df[['start', 'end']].map(lambda x: parser.parse(x, ignoretz=True))
    df_list.append(df)
        
# Final GPS metadata dataframe 
df_gps_meta = pd.concat(df_list, ignore_index=True)

In [7]:
# Set this folder to your favorite output location
out = '../data'

df_hr.to_parquet(os.path.join(out, 'hr.parquet'))
df_gps.to_parquet(os.path.join(out, 'gps.parquet'))
df_gps_meta.to_parquet(os.path.join(out, 'gps_meta.parquet'))