In [1]:
from dotenv import load_dotenv
import os
import requests
import urllib3
import pandas as pd
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
print(os.getenv("CLIENT_ID"))

175293


In [3]:
auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

load_dotenv()
payload = {
    'client_id': os.getenv("CLIENT_ID"),
    'client_secret': os.getenv("CLIENT_SECRET"),
    'refresh_token': os.getenv("REFRESH_TOKEN"),
    'grant_type': "refresh_token",
    'f': 'json'
}

print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']

print("Access Token = {}\n".format(access_token))
header = {'Authorization': 'Bearer ' + access_token}

# The first loop, request_page_number will be set to one, so it requests the first page. Increment this number after
# each request, so the next time we request the second page, then third, and so on...
request_page_num = 1
all_activities = []

while True:
    param = {'per_page': 200, 'page': request_page_num}
    # initial request, where we request the first page of activities
    my_dataset = requests.get(activites_url, headers=header, params=param).json()

    # check the response to make sure it is not empty. If it is empty, that means there is no more data left. So if you have
    # 1000 activities, on the 6th request, where we request page 6, there would be no more data left, so we will break out of the loop
    if len(my_dataset) == 0:
        print("breaking out of while loop because the response is zero, which means there must be no more activities")
        break

    # if the all_activities list is already populated, that means we want to add additional data to it via extend.
    if all_activities:
        print("all_activities is populated")
        all_activities.extend(my_dataset)

    # if the all_activities is empty, this is the first time adding data so we just set it equal to my_dataset
    else:
        print("all_activities is NOT populated")
        all_activities = my_dataset

    request_page_num += 1

print(len(all_activities))
for count, activity in enumerate(all_activities):
    print(activity["name"])
    print(count)

Requesting Token...

Access Token = b732aff895758e611e1e0d24fe8990795c120f3a

all_activities is NOT populated
all_activities is populated
all_activities is populated
all_activities is populated
all_activities is populated
all_activities is populated
breaking out of while loop because the response is zero, which means there must be no more activities
1079
Evening Ride
0
Afternoon Weight Training
1
Afternoon Ride
2
Afternoon Walk
3
Afternoon Ride
4
24km Race Practice Long Runü™¶
5
Coffee Race 2025‚òïÔ∏è
6
9km Easy RunüåÖ
7
Afternoon Weight Training
8
Afternoon Ride
9
Rolling 300sü™¶
10
Afternoon Weight Training
11
Progressive Runü™¶
12
22km Long Runüóø
13
Afternoon Weight Training
14
Evening Ride
15
400m RepeatsüòÆ‚Äçüí®
16
9km Easy Run‚ú®
17
Afternoon Weight Training
18
Afternoon Ride
19
Afternoon Ride
20
11km Long Runüóø
21
Evening Ride
22
Morning Ride
23
Afternoon Walk
24
Afternoon Weight Training
25
Lunch Ride
26
5km Time TrialüïØÔ∏è
27
Evening RideüöÄ
28
5km Easy Runüêù
2

# All activities DF

In [4]:
df_activities = pd.DataFrame(all_activities)
df_activities.head()

Unnamed: 0,resource_state,athlete,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,...,upload_id_str,external_id,from_accepted_tag,pr_count,total_photo_count,has_kudoed,suffer_score,average_cadence,max_watts,weighted_average_watts
0,2,"{'id': 81055898, 'resource_state': 1}",Evening Ride,14177.5,3095,3461,53.0,Ride,Ride,,...,16756458099,garmin_ping_476437881496,False,0,0,False,9.0,,,
1,2,"{'id': 81055898, 'resource_state': 1}",Afternoon Weight Training,0.0,4071,4071,0.0,Workout,WeightTraining,,...,16752910671,garmin_ping_476360234705,False,0,0,False,9.0,,,
2,2,"{'id': 81055898, 'resource_state': 1}",Afternoon Ride,11833.2,1778,7158,32.0,Ride,Ride,10.0,...,16753067661,garmin_ping_476364154361,False,0,0,False,13.0,,,
3,2,"{'id': 81055898, 'resource_state': 1}",Afternoon Walk,5062.4,3664,3840,12.0,Walk,Walk,,...,16726897935,garmin_ping_475670331039,False,0,0,False,5.0,52.4,,
4,2,"{'id': 81055898, 'resource_state': 1}",Afternoon Ride,24309.7,5770,7708,63.0,Ride,Ride,10.0,...,16716611559,garmin_ping_475336836414,False,4,0,False,12.0,,,


# Single Activity

In [5]:
id1 = 15645579969
get_activity_url = f'https://www.strava.com/api/v3/activities/{id1}'
my_activity1 = requests.get(get_activity_url, headers=header, params=param).json()


In [6]:
id2 = 15629398208
get_activity_url = f'https://www.strava.com/api/v3/activities/{id2}'
my_activity2 = requests.get(get_activity_url, headers=header, params=param).json()

In [7]:
my_activities = [my_activity1, my_activity2]
my_activities[0].keys()

dict_keys(['resource_state', 'athlete', 'name', 'distance', 'moving_time', 'elapsed_time', 'total_elevation_gain', 'type', 'sport_type', 'workout_type', 'id', 'start_date', 'start_date_local', 'timezone', 'utc_offset', 'location_city', 'location_state', 'location_country', 'achievement_count', 'kudos_count', 'comment_count', 'athlete_count', 'photo_count', 'map', 'trainer', 'commute', 'manual', 'private', 'visibility', 'flagged', 'gear_id', 'start_latlng', 'end_latlng', 'average_speed', 'max_speed', 'average_cadence', 'average_watts', 'max_watts', 'weighted_average_watts', 'device_watts', 'kilojoules', 'has_heartrate', 'average_heartrate', 'max_heartrate', 'heartrate_opt_out', 'display_hide_heartrate_option', 'elev_high', 'elev_low', 'upload_id', 'upload_id_str', 'external_id', 'from_accepted_tag', 'pr_count', 'total_photo_count', 'has_kudoed', 'suffer_score', 'description', 'calories', 'perceived_exertion', 'prefer_perceived_exertion', 'segment_efforts', 'splits_metric', 'splits_stand

# Funkcja

In [8]:
dataframes = {
}
dataframe_columns = {
  'activities' : [
    'name',
    'distance',
    'moving_time',
    'elapsed_time',
    'total_elevation_gain',
    'type',
    'sport_type',
    'workout_type',
    'id',
    'start_date',
    'start_date_local',
    'timezone',
    'utc_offset',
    'location_city',
    'location_state',
    'location_country',
    'achievement_count',
    'kudos_count',
    'comment_count',
    'athlete_count',
    'photo_count',
    'trainer',
    'commute',
    'manual',
    'private',
    'visibility',
    'flagged',
    'gear_id',
    'start_latlng',
    'end_latlng',
    'average_speed',
    'max_speed',
    'average_cadence',
    'average_watts',
    'max_watts',
    'weighted_average_watts',
    'device_watts',
    'kilojoules',
    'has_heartrate',
    'average_heartrate',
    'max_heartrate',
    'heartrate_opt_out',
    'display_hide_heartrate_option',
    'elev_high',
    'elev_low',
    'upload_id',
    'upload_id_str',
    'external_id',
    'from_accepted_tag',
    'pr_count',
    'total_photo_count',
    'has_kudoed',
    'suffer_score',
    'description',
    'calories',
    'perceived_exertion',
    'prefer_perceived_exertion',
    'hide_from_home',
    'device_name',
    'embed_token',
    'available_zones',
    'map.id',
    'gear.id'],
  'maps' : [
    'map.id',
    'map.polyline',
    'map.resource_state',
    'map.summary_polyline'],
  'gear' : [
    'gear.id',
    'gear.primary',
    'gear.name',
    'gear.nickname',
    'gear.resource_state',
    'gear.retired',
    'gear.distance',
    'gear.converted_distance'],
  'segment_efforts' : [
    'segment_efforts'],
  'splits_metric' : [
    'splits_metric'],
  'laps' : [
    'laps'],
  'best_efforts' : [
    'best_efforts'],
  'similar_activities' : [
    'similar_activities.effort_count',
    'similar_activities.average_speed',
    'similar_activities.min_average_speed',
    'similar_activities.mid_average_speed',
    'similar_activities.max_average_speed',
    'similar_activities.pr_rank',
    'similar_activities.frequency_milestone',
    'similar_activities.trend.speeds',
    'similar_activities.trend.current_activity_index',
    'similar_activities.trend.min_speed',
    'similar_activities.trend.mid_speed',
    'similar_activities.trend.max_speed',
    'similar_activities.trend.direction',
    'similar_activities.resource_state']
}

In [9]:
my_activities

[{'resource_state': 3,
  'athlete': {'id': 81055898, 'resource_state': 1},
  'name': '24km Race Practice Long Runü™¶',
  'distance': 8565.3,
  'moving_time': 2985,
  'elapsed_time': 3081,
  'total_elevation_gain': 15.0,
  'type': 'Run',
  'sport_type': 'Run',
  'workout_type': 2,
  'id': 15645579969,
  'start_date': '2025-08-31T08:19:54Z',
  'start_date_local': '2025-08-31T10:19:54Z',
  'timezone': '(GMT+01:00) Europe/Warsaw',
  'utc_offset': 7200.0,
  'location_city': None,
  'location_state': None,
  'location_country': None,
  'achievement_count': 2,
  'kudos_count': 12,
  'comment_count': 3,
  'athlete_count': 1,
  'photo_count': 0,
  'map': {'id': 'a15645579969',
   'polyline': 'w{|vHwpogBRp@PZ\\jAFFJDPINSDAJ?^`@PBVOVWJSj@c@l@o@BG\\[Va@VWZi@Z]NUZ_@f@YP?HB@Dd@~AFJLFh@@\\EVIl@MN?LDRXTd@rBhEH^BV?`@CPMVa@r@}AxBuDjEe@d@qAzAg@b@aElEu@`A]^Y`@cBzBaAdB_@j@ELiBpCm@hAcAzAa@t@}@tAQ^oAnBMXa@j@yC~EU`@y@jB{@pAa@`@[b@w@|AiA`BcAnB_AzAw@zA_AxAI\\G`AGRIPMHe@HQFKJ}B|Dc@h@oAlBoAzBaAnAWb@q@j@wDpEONUZ_

In [10]:
my_activities_df = pd.json_normalize(my_activities, sep=".")
my_activities_df.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,similar_activities.max_average_speed,similar_activities.pr_rank,similar_activities.frequency_milestone,similar_activities.trend.speeds,similar_activities.trend.current_activity_index,similar_activities.trend.min_speed,similar_activities.trend.mid_speed,similar_activities.trend.max_speed,similar_activities.trend.direction,similar_activities.resource_state
0,3,24km Race Practice Long Runü™¶,8565.3,2985,3081,15.0,Run,Run,2,15645579969,...,2.869447,,,[2.869447170749581],0,2.869447,2.869447,2.869447,0,2
1,3,9km Easy RunüåÖ,9110.3,3326,3389,12.0,Run,Run,0,15629398208,...,2.739116,1.0,,"[2.3505316755270576, 2.395654265389671, 2.4235...",4,2.322524,2.513676,2.739116,1,2


In [11]:
normalized_activity_df= pd.json_normalize(my_activities, sep=".")

for dataframe_key in dataframe_columns:
  if dataframe_key not in dataframes:
    dataframes[dataframe_key] = pd.DataFrame()

  if len(dataframe_columns[dataframe_key]) == 1:
    subset_df = pd.json_normalize(my_activities, record_path=[dataframe_key], sep=".")
  else:
    available_columns = [column for column in dataframe_columns[dataframe_key] if column in normalized_activity_df.columns]
    if available_columns:
        subset_df = normalized_activity_df[available_columns]
  dataframes[dataframe_key] = pd.concat([dataframes[dataframe_key], subset_df], ignore_index=True)

In [12]:
dataframes['segment_efforts']

Unnamed: 0,id,resource_state,name,elapsed_time,moving_time,start_date,start_date_local,distance,start_index,end_index,...,segment.end_latlng,segment.elevation_profile,segment.elevation_profiles,segment.climb_category,segment.city,segment.state,segment.country,segment.private,segment.hazardous,segment.starred
0,3397164322778388012,2,Bartoszowicki - Chrobrego,570,570,2025-08-31T08:24:23Z,2025-08-31T10:24:23Z,1611.0,269,839,...,"[51.113338, 17.10795]",,,0,Wroc≈Çaw,Dolno≈õlƒÖskie,Poland,False,False,False
1,3397164322777701932,2,most Chrobrego do g√≥rki,707,707,2025-08-31T08:34:03Z,2025-08-31T10:34:03Z,1966.3,849,1556,...,"[51.125973, 17.088604]",,,0,Wroc≈Çaw,Wojew√≥dztwo dolno≈õlƒÖskie,Poland,False,False,False
2,3397164322780875308,2,od Kochanowskiego do Mostu Warszawskiego,449,353,2025-08-31T08:51:58Z,2025-08-31T10:51:58Z,1150.6,1924,2278,...,"[51.130487, 17.061361]",,,0,Wroc≈Çaw,Wojew√≥dztwo dolno≈õlƒÖskie,Poland,False,False,False
3,3397164322779997740,2,Podbieg agrafkƒÖ,93,93,2025-08-31T08:59:28Z,2025-08-31T10:59:28Z,267.6,2279,2372,...,"[51.13058, 17.06115]",,,0,Wroc≈Çaw,Dolno≈õlƒÖskie,Poland,False,False,False
4,3396581647838912972,2,Bartoszowicki - Chrobrego,564,564,2025-08-29T17:48:54Z,2025-08-29T19:48:54Z,1611.0,269,833,...,"[51.113338, 17.10795]",,,0,Wroc≈Çaw,Dolno≈õlƒÖskie,Poland,False,False,False
5,3396581647840048588,2,most Chrobrego do g√≥rki,719,719,2025-08-29T17:58:29Z,2025-08-29T19:58:29Z,1966.3,844,1563,...,"[51.125973, 17.088604]",,,0,Wroc≈Çaw,Wojew√≥dztwo dolno≈õlƒÖskie,Poland,False,False,False
6,3396581647840285132,2,Olimpijski Clockwise,505,505,2025-08-29T18:14:43Z,2025-08-29T20:14:43Z,1386.4,1756,2261,...,"[51.116407, 17.10362]",,,0,Wroc≈Çaw,Wojew√≥dztwo dolno≈õlƒÖskie,Poland,False,False,False
7,3396581647838773708,2,Most Chrobrego- most Bartoszowicki,583,583,2025-08-29T18:26:12Z,2025-08-29T20:26:12Z,1582.0,2445,3028,...,"[51.102416, 17.122673]",,,0,Wroc≈Çaw,Wojew√≥dztwo dolno≈õlƒÖskie,Poland,False,False,False
8,3396581647838962124,2,Finisz na betonozie,127,127,2025-08-29T18:38:12Z,2025-08-29T20:38:12Z,345.6,3165,3292,...,"[51.107079, 17.123627]",,,0,Wroc≈Çaw,Lower Silesian Voivodeship,Poland,False,False,False


In [13]:
for dataframe_key in dataframe_columns:
  print(dataframe_key)
  print(len(dataframe_columns[dataframe_key]))

activities
63
maps
4
gear
8
segment_efforts
1
splits_metric
1
laps
1
best_efforts
1
similar_activities
14
