In [9]:
import json

# Path to the JSON file
file_path = 'data/activities.json'

# Function to extract all fields from the dataset
def extract_fields_from_json(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # Extracting all unique field names
    all_fields = set()
    for entry in data:
        all_fields.update(entry.keys())
    
    # Extracting all field values
    extracted_data = []
    for entry in data:
        record = {field: entry.get(field, None) for field in all_fields}
        extracted_data.append(record)

    return extracted_data

# Extract the data
dataset = extract_fields_from_json(file_path)

# Displaying a sample of the extracted dataset
#sample_dataset = dataset[:5]  # Display the first 5 entries for examination
#sample_dataset

print(len(dataset))
# print the date of the first and last activity
print(dataset[0]['start_date_local'])
print(dataset[len(dataset)-1]['start_date_local'])

# Filter all activities that are not rides 
dataset = [entry for entry in dataset if entry['type'] == 'Ride']

print(len(dataset))

# Filter all activities where distance is less than 5 km
dataset = [entry for entry in dataset if entry['distance'] >= 5000]

print(len(dataset))

# Print out the activity IDs
for entry in dataset:
    print(entry['id'])

dataset[1]

100
2023-06-30T16:04:06Z
2023-02-12T10:47:03Z
40
22
9037527192
9031302209
9031303362
9031304151
9037528515
8721596194
8721595127
8721555514
8667905944
8663453900
8663444360
8663433697
8663423773
8662385488
8663497049
8623428993
8623427408
8616947338
8607277292
8596146520
8579138367
8580333820


{'average_temp': 9,
 'average_cadence': None,
 'total_photo_count': 0,
 'total_elevation_gain': 239,
 'elev_high': 81.2,
 'has_kudoed': False,
 'max_heartrate': 162,
 'flagged': False,
 'visibility': 'everyone',
 'resource_state': 2,
 'kudos_count': 5,
 'max_watts': None,
 'distance': 90123.2,
 'photo_count': 0,
 'elev_low': 49.8,
 'display_hide_heartrate_option': True,
 'location_city': None,
 'achievement_count': 14,
 'max_speed': 12.128,
 'weighted_average_watts': None,
 'pr_count': 6,
 'comment_count': 0,
 'average_speed': 6.92,
 'device_watts': False,
 'id': 9031302209,
 'private': False,
 'athlete': {'id': 13076767, 'resource_state': 1},
 'commute': False,
 'external_id': '2023-05-07-011636-ELEMNT BOLT 5C99-113-0.fit',
 'start_date_local': '2023-05-07T11:16:40Z',
 'from_accepted_tag': False,
 'timezone': '(GMT+10:00) Australia/Melbourne',
 'name': 'Lunch Ride',
 'average_watts': 115.7,
 'start_latlng': [-37.825782811269164, 144.96354675851762],
 'upload_id_str': '9689152861',
 'u

In [13]:
import requests

# Function to download the GPX file for a given activity ID
def download_gpx(activity_id):
    # https://www.strava.com/activities/10123460836/export_gpx
    url = f'https://www.strava.com/activities/{activity_id}/export_gpx'

    # Make the GET request
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Save the file
        with open(f'data/activity_{activity_id}.gpx', 'wb') as file:
            file.write(response.content)
            print('File saved successfully.')
    else:
        print('Error:', response.status_code)

# Example usage
#activity_id = 9037527192
#download_gpx(activity_id)

for entry in dataset:
    download_gpx(entry['id'])


File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.
File saved successfully.


In [14]:
activity_id = 10271577919
download_gpx(activity_id)

File saved successfully.


In [None]:
# Convert a date to an epoch timestamp

from datetime import datetime, timezone

# Date to convert (Year, Month, Day)
date_to_convert = datetime(2023, 7, 1, 0, 0, 0)

# Convert the date to UTC
date_to_convert_utc = date_to_convert.replace(tzinfo=timezone.utc)

# Get the epoch timestamp
epoch_timestamp = int(date_to_convert_utc.timestamp())

epoch_timestamp

