In [1]:
import pandas as pd
import numpy as np

import stravalib
import arrow
import pathlib
import json

from datetime import timedelta


STREAM_TYPES = [
    "time",
    "latlng",
    "distance",
    "altitude",
    "velocity_smooth",
    "heartrate",
    "cadence",
    "watts",
    "temp",
    "moving",
    "grade_smooth",
]

COLUMN_TRANSLATIONS = {
    "altitude": "elevation",
    "velocity_smooth": "speed",
    "watts": "power",
    "temp": "temperature",
    "grade_smooth": "grade",
}

In [2]:
client = stravalib.Client(rate_limit_requests=False)

STRAVA_CLIENT_ID = "66078"
STRAVA_CLIENT_SECRET = "8f136bd4e4b40ce3591293ec8c282114c34e5219"

# STRAVA_CLIENT_ID = "114946"
# STRAVA_CLIENT_SECRET = "ac23f5a1342c1d677344f5470af0a0ce08b6a261"

In [3]:
url = client.authorization_url(
    client_id=STRAVA_CLIENT_ID,
    redirect_uri="http://127.0.0.1:5000",
    scope=["activity:read_all"],
)

In [4]:
url

'https://www.strava.com/oauth/authorize?client_id=66078&redirect_uri=http%3A%2F%2F127.0.0.1%3A5000&approval_prompt=auto&response_type=code&scope=activity%3Aread_all'

In [5]:
# CODE = "bb6d8f63fa1e21427b8a9c5d917717a43a61a5c4"
CODE = "2b9dd5518fa681cb03a214aa87ce85fd852c9b6b"
access_token = client.exchange_code_for_token(
    client_id=STRAVA_CLIENT_ID,
    client_secret=STRAVA_CLIENT_SECRET,
    code=CODE,
)

In [6]:
access_token

{'access_token': 'a53b1bc195b1f99ca22e8cfd84fc97dd26772dc3',
 'refresh_token': '474ef6ea5b89d58c88de5303d21d69599748c94d',
 'expires_at': 1706804698}

In [7]:
# client.create_subscription(
#     client_id=STRAVA_CLIENT_ID,
#     client_secret=STRAVA_CLIENT_SECRET,
#     callback_url="https://lfduxfgravhkgxzpdyqb.supabase.co/functions/v1/hello-world",
#     verify_token="STRAVA",
# )

In [8]:
client.list_subscriptions(client_id=STRAVA_CLIENT_ID, client_secret=STRAVA_CLIENT_SECRET)

<BatchedResultsIterator entity=Subscription>

In [9]:
if arrow.now().timestamp() > access_token["expires_at"]:
    client.refresh_access_token(
        "66078",
        "8f136bd4e4b40ce3591293ec8c282114c34e5219",
        access_token["refresh_token"],
    )

In [10]:
activities = []
for activity in client.get_activities():
    activities.append(activity)

In [11]:
athlete = client.get_athlete()
athlete.to_dict()

{'firstname': 'Julien',
 'lastname': 'Roullé',
 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/42265442/17138162/1/medium.jpg',
 'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/42265442/17138162/1/large.jpg',
 'city': 'Rennes',
 'state': 'Ille-et-Vilaine',
 'country': 'France',
 'sex': 'M',
 'friend': None,
 'follower': None,
 'premium': False,
 'summit': False,
 'created_at': '2019-05-16T21:20:09+00:00',
 'updated_at': '2023-12-25T16:19:35+00:00',
 'approve_followers': None,
 'badge_type_id': 0,
 'follower_count': None,
 'friend_count': None,
 'mutual_friend_count': None,
 'athlete_type': None,
 'date_preference': None,
 'measurement_preference': None,
 'email': None,
 'clubs': None,
 'bikes': None,
 'shoes': None,
 'super_user': None,
 'email_language': None,
 'weight': 71.0,
 'max_heartrate': None,
 'username': 'julienroulle',
 'description': None,
 'instagram_username': None,
 'offer_in_app_payment': None,
 'global_privacy': None,
 'rec

In [12]:
pathlib.Path("strava_data").mkdir(parents=True, exist_ok=True)

for activity in activities:
    if activity.type != "Run":
        print("Skipping activity " + str(activity.id) + " of type " + activity.type)
        continue

    if pathlib.Path(f"strava_data/{activity.id}").exists():
        print("Skipping activity " + str(activity.id) + " because it already exists")
        continue

    pathlib.Path(f"strava_data/{activity.id}").mkdir(parents=True, exist_ok=True)

    with open(f"strava_data/{activity.id}/activity.json", "w") as fp:
        json.dump(activity.to_dict(), fp)

    with open(f"strava_data/{activity.id}/activity.csv", "w") as fp:
        activity = client.get_activity(activity.id)
        start_datetime = activity.start_date_local

        streams = client.get_activity_streams(
            activity_id=activity.id,
            types=STREAM_TYPES,
            series_type="time",
        )

        raw_data = dict()

        if streams is None:
            continue
        
        for key, value in streams.items():
            if key == "latlng":
                latitude, longitude = list(zip(*value.data))
                raw_data["latitude"] = latitude
                raw_data["longitude"] = longitude
            else:
                try:
                    key = COLUMN_TRANSLATIONS[key]
                except KeyError:
                    pass

                raw_data[key] = value.data

        data = pd.DataFrame(raw_data)

        def time_to_datetime(time):
            return start_datetime + timedelta(seconds=time)

        data["datetime"] = data["time"].apply(time_to_datetime)

        data = data.drop(["time"], axis="columns")

        data = data.set_index("datetime")

        data.to_csv(fp)

Unable to set attribute visibility on entity <Activity id=10666733712 name=None>
Unable to set attribute visibility on entity <SegmentEffort id=3187740073777639242 name='longue coulée verte' resource_state=2>
Unable to set attribute visibility on entity <Activity id=10666733712 name=None>
Unable to set attribute visibility on entity <SegmentEffort id=3187740073778469706 name='Coulée Verte 2 km' resource_state=2>
Unable to set attribute visibility on entity <Activity id=10666733712 name=None>
Unable to set attribute visibility on entity <SegmentEffort id=3187740073778892618 name='Coulée verte partie 1' resource_state=2>
Unable to set attribute visibility on entity <Activity id=10666733712 name=None>
Unable to set attribute visibility on entity <SegmentEffort id=3187740073780750154 name='montée fin coulée verte' resource_state=2>
Unable to set attribute visibility on entity <Activity id=10666733712 name=None>
Unable to set attribute visibility on entity <SegmentEffort id=3187740073780244

Skipping activity 10536016787 because it already exists
Skipping activity 10532209664 because it already exists
Skipping activity 10530077986 because it already exists
Skipping activity 10525855411 because it already exists
Skipping activity 10523971592 because it already exists
Skipping activity 10523971101 because it already exists
Skipping activity 10523969303 because it already exists
Skipping activity 10518183719 because it already exists
Skipping activity 10510461050 because it already exists
Skipping activity 10507854952 because it already exists
Skipping activity 10507907674 because it already exists
Skipping activity 10507854958 because it already exists
Skipping activity 10498329743 because it already exists
Skipping activity 10491701238 because it already exists
Skipping activity 10491701230 because it already exists
Skipping activity 10491701286 because it already exists
Skipping activity 10485499662 because it already exists
Skipping activity 10481362787 because it already

In [13]:
from pathlib import Path

strava_data_path = Path("strava_data")
activities_from_folder = list(strava_data_path.glob("*"))

len(set(activities_from_folder))

1349

In [14]:
activities_from_folder = [activity.name for activity in activities_from_folder]

In [15]:
run_activities = [str(activity.id) for activity in activities if activity.type == "Run"]

In [16]:
set(activities_from_folder).difference(set(run_activities))

{'.DS_Store'}

In [17]:
import datetime

datetime.date(2023, 1, 1) - datetime.date.today()

datetime.timedelta(days=-396)