Description: Get all running activities for a given athlete

In [6]:
# Imports
import os
import re
import json
import webbrowser
from datetime import datetime
from dotenv import load_dotenv
import pandas as pd
from stravalib.client import Client

In [7]:
# Constants
ACTIVITY_STREAM_TYPES = [
    'time',
    'latlng',
    'distance',
    'altitude',
    'velocity_smooth',
    'heartrate',
    'grade_smooth'
]

In [10]:
# Read stored token from file
stored_token = {}
with open('../../strava_token.json', 'r') as rf:
    stored_token = json.load(rf)

client = Client(access_token=stored_token.get('access_token'))


In [11]:
# get test data about athlete
current_athlete = client.get_athlete()
athlete_name = (current_athlete.firstname + '_' + current_athlete.lastname).lower()

# Create folder to store data for that athlete
folder_path = f'../activity_data/{athlete_name}/fit_file_csv/'
if(not os.path.exists(folder_path)):
    print('Creating new athlete folder...')
    os.mkdir(folder_path)

In [12]:
# Define dates for query. Note: there is a Strava API rate limit for given 15 minute period.
date_start = datetime(2020, 1, 1)
date_end = datetime.now()
# Get all activities
activities = client.get_activities(before=date_end, after=date_start)

In [26]:
total_count = 0
total_runs = list()
hr_count = 0
runs_with_hr = list()
for i in activities:
    if(i.type == "Run"):
        total_count += 1
        total_runs.append(i)
        if(i.has_heartrate):
            hr_count += 1
            runs_with_hr.append(i)

print("Total runs: %d, runs with hr: %d" % (total_count, hr_count))
print("Earliest run: %s" % total_runs[-1].start_date)
print("Earliest run with hr: %s" % runs_with_hr[-1].start_date)

Total runs: 72, runs with hr: 40
Earliest run: 2022-01-12 20:39:01+00:00
Earliest run with hr: 2022-10-04 14:22:18+00:00


In [14]:
# Save running activities with heart rate data as csv files
for a in activities:
    if(a.type == 'Run' and a.has_heartrate):
        # Get dictionary of activity streams
        activity_stream = client.get_activity_streams(
            activity_id=a.id, types=ACTIVITY_STREAM_TYPES, resolution='medium')
        # Extract streams of interest
        data = {key: activity_stream.get(key).data for key in ACTIVITY_STREAM_TYPES}
        # Create dataframe from streams
        df = pd.DataFrame(data=data)
        # Create file name from GMT start date and formatted activity name
        filename = a.start_date.strftime('%Y-%m-%dT%H.%M.%S') + 'GMT_' + re.sub('[^a-zA-Z]', '', a.name) 
        print(filename)
        # Save dataframe to csv file
        df.to_csv(f'{folder_path}{filename}.csv', index=False)

2023-01-28T15.30.11GMT_Lindseyranak
2023-01-24T23.48.08GMT_EveningRun
2023-01-24T23.09.20GMT_EveningRun
2023-01-15T16.07.50GMT_MorningRun
2022-12-27T19.04.52GMT_AfternoonRun
2022-12-10T14.05.02GMT_MorningRun
2022-12-09T21.28.25GMT_AfternoonRun
2022-12-07T15.17.49GMT_MorningRun
2022-12-06T00.52.40GMT_AfternoonRun
2022-12-03T17.14.47GMT_MorningRun
2022-12-02T23.11.31GMT_AfternoonRun
2022-11-28T23.15.04GMT_AfternoonRun
2022-11-27T16.18.33GMT_MorningRun
2022-11-25T18.41.28GMT_LunchRun
2022-11-17T23.42.41GMT_AfternoonRun
2022-11-13T15.56.27GMT_Teapotlongrun
2022-11-11T23.20.28GMT_AfternoonRun
2022-11-10T14.24.34GMT_MorningRun
2022-11-08T23.20.19GMT_AfternoonRun
2022-11-06T16.02.55GMT_MorningRun
2022-11-05T22.10.22GMT_AfternoonRun
2022-11-04T00.42.22GMT_AfternoonRun
2022-11-02T01.21.04GMT_EveningRun
2022-11-02T00.37.17GMT_WorkoutWarmUp
2022-10-30T15.19.37GMT_MorningRun
2022-10-29T15.41.05GMT_MorningRun
2022-10-27T12.43.20GMT_Wogtempo
2022-10-25T13.51.00GMT_MorningRun
2022-10-23T15.46.57GMT_C

In [None]:
# set parameters, file names

#select the features for EDA graphs:
eda_model_features =  ["heart_rate", "enhanced_speed","rolling_ave_alt", "cadence", "distance", "enhanced_altitude"] #  cadence, enhanced_altitude, distance, heart_rate, enhanced_speed, rolling_ave_alt

#select the predictors for the model:
model_features =  ["heart_rate", "enhanced_speed","rolling_ave_alt","cadence"] #  cadence, altitude, distance, heart_rate, enhanced_speed, rolling_ave_alt
batch_size = 250 # training batch size for the LSTM
epochs = 180 # maximum number of epochs - autostop will work on per file basis
learning_rate = 0.001
decay_rate = 0.001
n_X = 120 # number of timesteps for training
n_y = 22 # number of timesteps in future for prediction
step = 1 # step size of predictors for model training

sequence_length = int(n_X/step)
n_fit_files_test_set = 10 # number of files for validation dataset (only 1 validation file supported at the moment)

# select the training files and the validation files 
train_files = glob.glob(fit_path+"/*.csv")[0:-n_fit_files_test_set]
valid_files = glob.glob(fit_path+"/*.csv")[-n_fit_files_test_set:]
#test_files = glob.glob(fit_test_path+"/*.csv")
