In [21]:
#Load secrets from .env 
import boto3
import os
from dotenv import load_dotenv
from io import BytesIO

load_dotenv() 

#Read credentials 
s3_endpoint = os.getenv("SUPABASE_S3_ENDPOINT") 
s3_key_id = os.getenv("SUPABASE_S3_KEY_ID") 
s3_key = os.getenv("SUPABASE_S3_SECRET") 
bucket_name = os.getenv("SUPABASE_BUCKET") 

In [22]:
#Create S3 client for Supabase 
s3 = boto3.client( 
    's3', 
    endpoint_url=s3_endpoint, 
    aws_access_key_id=s3_key_id, 
    aws_secret_access_key=s3_key 
    ) 

#Example: list files in "raw/" folder 
response = s3.list_objects_v2(Bucket=bucket_name, Prefix="raw/") 
for obj in response.get("Contents", []): 
    print(obj["Key"]) 


raw/!!DO_NOT_DELETE!!.txt
raw/Evening_Ride.fit
raw/Evening_Ride.tcx
raw/Solo_Ride.fit


In [23]:
import xml.etree.ElementTree as ET
import pandas as pd
from fitparse import FitFile
import numpy as np
import os

def get_activity_date(file_bytes, extension): 
    if extension == ".fit":
        try:
            fitfile = FitFile(BytesIO(file_bytes))  
            for record in fitfile.get_messages('record'):
                for field in record:
                    if field.name == "timestamp":
                        return field.value.date().isoformat()
        except Exception as e:
            print(f"Failed to parse .fit file: {e}") 
    elif extension == ".tcx":
        try:
            tree = ET.ElementTree(ET.fromstring(file_bytes))
            namespaces = {'tcx': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2'}
            time_elem = tree.find('.//tcx:Trackpoint/tcx:Time', namespaces)
            if time_elem is not None:
                return pd.to_datetime(time_elem.text).date().isoformat()
        except Exception as e:
            print(f"Failed to parse .tcx file: {e}")
    return "unknown" 

In [24]:
#Process files in /raw/ 
import uuid

response = s3.list_objects_v2(Bucket=bucket_name, Prefix="raw/") 
for obj in response.get("Contents", []): 
    raw_key = obj["Key"] 
    if not raw_key.endswith((".fit", ".tcx")): 
        continue 
 
    raw_obj = s3.get_object(Bucket=bucket_name, Key=raw_key) 
    file_bytes = raw_obj["Body"].read() 
    extension = os.path.splitext(raw_key)[-1] 
    
    activity_date = get_activity_date(file_bytes, extension) 
    if activity_date == "unknown": 
        print(f"Could not extract date from {raw_key}") 
        continue 
 
    short_id = str(uuid.uuid4())[:8] 
    bronze_key = f"bronze/original/bronze_activity_{activity_date}_{short_id}{extension}" 
    
    # Upload renamed file to bronze/ 
    s3.put_object(Bucket=bucket_name, Key=bronze_key, Body=file_bytes) 
    
    # Delete original 
    s3.delete_object(Bucket=bucket_name, Key=raw_key) 
    
    print(f"Moved: {raw_key} ➜ {bronze_key}")


#####ONLY WORKING FOR THE TCX FILES FOR NOW

Moved: raw/Evening_Ride.fit ➜ bronze/original/bronze_activity_2025-04-14_5047d1b5.fit
Moved: raw/Evening_Ride.tcx ➜ bronze/original/bronze_activity_2025-04-14_d42c58d2.tcx
Moved: raw/Solo_Ride.fit ➜ bronze/original/bronze_activity_2025-04-26_1020f1ec.fit
