In [15]:
import pandas as pd
import json
import os
import xml.etree.ElementTree as ET
import requests
from io import BytesIO
from zipfile import ZipFile
from dateutil import parser

## Get Data

In [11]:
def read_remote_zip(url):
    response = requests.get(url, stream=True)
    zip_file = ZipFile(BytesIO(response.content))
    return zip_file
    
zip_hr       = read_remote_zip("https://github.com/stu-code/viz/raw/refs/heads/main/snowboarding/data/biometrics/hr/hr.zip")
zip_hr_var   = read_remote_zip("https://github.com/stu-code/viz/raw/refs/heads/main/snowboarding/data/biometrics/hr_variability/hr_variability.zip")
zip_spo2     = read_remote_zip("https://github.com/stu-code/viz/raw/refs/heads/main/snowboarding/data/biometrics/spo2/spo2.zip")
zip_spo2_var = read_remote_zip("https://github.com/stu-code/viz/raw/refs/heads/main/snowboarding/data/biometrics/spo2_variability/spo2_variability.zip")
zip_gps      = read_remote_zip("https://github.com/stu-code/viz/raw/refs/heads/main/snowboarding/data/gps/gps.zip")

## Read Heartrate Data

In [18]:
df_list  = []
    
for json_file in zip_hr.namelist():
    with zip_hr.open(filename) as f:
        data = json.load(f)
        
    df = pd.json_normalize(data, sep='_')
    df.columns = df.columns.str.lower().str.replace('value_', '')
    df['datetime'] = ( pd.to_datetime(df['datetime'], format='%m/%d/%y %H:%M:%S', utc=True)
                         .dt.tz_convert('US/Mountain')
                         .dt.tz_localize(None)
                     )
    df_list.append(df)
        
df_hr = pd.concat(df_list, ignore_index=True)

## Read Biometric CSV Data

In [23]:
df_list = []

def read_bio_csv(zip_file):    
    for csv_file in zip_file.namelist():
        with zip_file.open(csv_file) as f:
            df = pd.read_csv(f, parse_dates=['timestamp'])
            
        df_list.append(df)
        
    return pd.concat(df_list, ignore_index=True)

df_hr_var   = read_bio_csv(zip_hr_var)
df_spo2     = read_bio_csv(zip_spo2)
df_spo2_var = read_bio_csv(zip_spo2_var)

  df = pd.read_csv(file, parse_dates=['timestamp'])
  df = pd.read_csv(file, parse_dates=['timestamp'])
  df = pd.read_csv(file, parse_dates=['timestamp'])
  df = pd.read_csv(file, parse_dates=['timestamp'])
  df = pd.read_csv(file, parse_dates=['timestamp'])
  df = pd.read_csv(file, parse_dates=['timestamp'])


## Read GPS Data

In [33]:
''' Read GPS data in GPX format without needing to import a separate GPX 
    package. GPX data looks like this:
        
    <?xml version="1.0" encoding="UTF-8"?>
    <gpx xmlns="http://www.topografix.com/GPX/1/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:gte="http://www.gpstrackeditor.com/xmlschemas/General/1" xsi:schemaLocation="http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd" version="1.1" creator="Slopes for Android - http://getslopes.com">
      <trk>
        <name>Jan 25, 2024 - Keystone Resort</name>
        <trkseg>
          <trkpt lat="39.605675" lon="-105.941414">
            <ele>2856.891977</ele>
            <time>2024-01-25T09:13:52.453-07:00</time>
            <hdop>19</hdop>
            <vdop>4</vdop>
            <extensions>
              <gte:gps speed="1.317580" azimuth="212.300003"/>
            </extensions>
          </trkpt>
       </trkseg>
      </trk>
    </gpx>
    
    There are two namespaces we need to use:
        1. The gpx namespace: http://www.topografix.com/GPX/1/1
        2. The gte namespace http://www.gpstrackeditor.com/xmlschemas/General/1
        
    The gte namespace is used to extract gps and azimuth data from the 
    <extensions> tag
'''
gpx_namespace = '{http://www.topografix.com/GPX/1/1}'
gte_namespace = '{http://www.gpstrackeditor.com/xmlschemas/General/1}'
    
clean_data = []
file_list  = [file_name for file_name in zip_gps.namelist() if file_name.endswith(".gpx")]
    
for gpx_file in file_list:
    with zip_gps.open(gpx_file) as f:
        raw_data = f.read()
            
    # ET.parse expects an actual file, so BytesIO makes it behave like a file
    root = ET.parse(BytesIO(raw_data))
        
    for trkpt in root.findall(f'.//{gpx_namespace}trkpt'):
        row = {
                "datetime":  parser.parse(trkpt.find(f'{gpx_namespace}time').text, ignoretz=True),
                "lat":       float(trkpt.get("lat")),
                "lon":       float(trkpt.get("lon")),
                "elevation": float(trkpt.find(f'{gpx_namespace}ele').text),
                "speed":     float(trkpt.find(f'.//{gpx_namespace}extensions/{gte_namespace}gps').get("speed")),
                "azimuth":   float(trkpt.find(f'.//{gpx_namespace}extensions/{gte_namespace}gps').get("azimuth"))
              }
        
        clean_data.append(row)

df_gps = pd.DataFrame(clean_data)

## Read GPS Metadata

In [41]:
# Read in GPS metadata. This will help us more easily define runs and lifts
# and also give us some additonal information if we want to use it
   
df_list   = []
file_list = [file_name for file_name in zip_gps.namelist() if file_name.endswith(".slopes")]
    
# .slopes files are just zip files with some CSVs and XML metadata.
# We just want to read Metadata.xml
for slopes_file in file_list:
    with zip_gps.open(slopes_file) as f:
        raw_data = f.read()

    with ZipFile(BytesIO(raw_data), 'r') as zip_file:
        with zip_file.open('Metadata.xml') as xml_file:
            df = pd.read_xml(xml_file, parser='etree', xpath='.//Action')
            
    # Convert start/end to datetimes without the timezone
    df[['start', 'end']] = df[['start', 'end']].map(lambda x: parser.parse(x, ignoretz=True))
    df_list.append(df)
        
# Final GPS metadata dataframe 
df_gps_meta = pd.concat(df_list, ignore_index=True)