### Libraries import

In [11]:
from dotenv import load_dotenv
import os
import requests
import urllib3
import pandas as pd
from tqdm import tqdm
import time
import random
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
pd.set_option('display.max_columns', None)

In [12]:
from sqlalchemy import Text, Integer, BigInteger, Float, Boolean
from sqlalchemy import create_engine
from sqlalchemy.dialects.postgresql import JSONB, TIMESTAMP

### Strava API setup

In [2]:
auth_url = 'https://www.strava.com/oauth/token'
activites_url = 'https://www.strava.com/api/v3/athlete/activities'

load_dotenv()
payload = {
    'client_id': os.getenv('CLIENT_ID'),
    'client_secret': os.getenv('CLIENT_SECRET'),
    'refresh_token': os.getenv('REFRESH_TOKEN'),
    'grant_type': 'refresh_token',
    'f': 'json'
}

### SQL setup

In [3]:
engine = create_engine(os.getenv('DB_URI'))

### Requesing list of all activities

In [4]:
print('Requesting Token...\n')
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']

print('Access Token = {}\n'.format(access_token))
header = {'Authorization': 'Bearer ' + access_token}

# The first loop, request_page_number will be set to one, so it requests the first page. Increment this number after
# each request, so the next time we request the second page, then third, and so on...
request_page_num = 1
all_activities = []

while True:
    param = {'per_page': 200, 'page': request_page_num}
    # initial request, where we request the first page of activities
    my_dataset = requests.get(activites_url, headers=header, params=param).json()

    # check the response to make sure it is not empty. If it is empty, that means there is no more data left. So if you have
    # 1000 activities, on the 6th request, where we request page 6, there would be no more data left, so we will break out of the loop
    if len(my_dataset) == 0:
        print('breaking out of while loop because the response is zero, which means there must be no more activities')
        break

    # if the all_activities list is already populated, that means we want to add additional data to it via extend.
    if all_activities:
        print('all_activities is populated')
        all_activities.extend(my_dataset)

    # if the all_activities is empty, this is the first time adding data so we just set it equal to my_dataset
    else:
        print('all_activities is NOT populated')
        all_activities = my_dataset

    request_page_num += 1

print(len(all_activities))
for count, activity in enumerate(all_activities):
    print(activity['name'])
    print(count)

all_activities_df = pd.json_normalize(all_activities, sep='_')

Requesting Token...

Access Token = ab4ca255a0f315aaa84ce37fc18df0b666ca53dd

all_activities is NOT populated
all_activities is populated
all_activities is populated
all_activities is populated
all_activities is populated
all_activities is populated
breaking out of while loop because the response is zero, which means there must be no more activities
1080
800m RepeatsüöÄ
0
Evening Ride
1
Afternoon Weight Training
2
Afternoon Ride
3
Afternoon Walk
4
Afternoon Ride
5
24km Race Practice Long Runü™¶
6
Coffee Race 2025‚òïÔ∏è
7
9km Easy RunüåÖ
8
Afternoon Weight Training
9
Afternoon Ride
10
Rolling 300sü™¶
11
Afternoon Weight Training
12
Progressive Runü™¶
13
22km Long Runüóø
14
Afternoon Weight Training
15
Evening Ride
16
400m RepeatsüòÆ‚Äçüí®
17
9km Easy Run‚ú®
18
Afternoon Weight Training
19
Afternoon Ride
20
Afternoon Ride
21
11km Long Runüóø
22
Evening Ride
23
Morning Ride
24
Afternoon Walk
25
Afternoon Weight Training
26
Lunch Ride
27
5km Time TrialüïØÔ∏è
28
Evening RideüöÄ
2

### Limiting size of data download

In [5]:
top_n = 5
all_activities_df = all_activities_df.iloc[:top_n]

In [10]:
all_activities_df.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,start_date,start_date_local,timezone,utc_offset,location_city,location_state,location_country,achievement_count,kudos_count,comment_count,athlete_count,photo_count,trainer,commute,manual,private,visibility,flagged,gear_id,start_latlng,end_latlng,average_speed,max_speed,average_cadence,average_watts,max_watts,weighted_average_watts,device_watts,kilojoules,has_heartrate,average_heartrate,max_heartrate,heartrate_opt_out,display_hide_heartrate_option,elev_high,elev_low,upload_id,upload_id_str,external_id,from_accepted_tag,pr_count,total_photo_count,has_kudoed,suffer_score,athlete_id,athlete_resource_state,map_id,map_summary_polyline,map_resource_state,average_temp
0,2,800m RepeatsüöÄ,10130.5,3404,3404,10.0,Run,Run,3.0,15697639810,2025-09-04T17:19:53Z,2025-09-04T19:19:53Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,6,0,1,0,False,False,False,False,everyone,False,g24134620,"[51.107356, 17.124205]","[51.107781, 17.124302]",2.976,4.46,80.1,350.3,574.0,371.0,True,1191.9,True,152.9,178.0,False,True,123.0,115.4,16768190000.0,16768189716,garmin_ping_476772433908,False,0,0,False,65.0,81055898,1,a15697639810,ss{vHo~ngBH\Tn@Nt@RV|@vBb@~@X`Ab@lAxAxJZjAzArE...,2,
1,2,Evening Ride,14177.5,3095,3461,53.0,Ride,Ride,,15686796063,2025-09-03T17:34:35Z,2025-09-03T19:34:35Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,7,0,2,0,False,False,False,False,followers_only,False,b12572672,"[51.107091, 17.124618]","[51.108163, 17.123457]",4.581,13.54,,100.9,,,False,312.3,True,103.9,155.0,False,True,127.0,114.4,16756460000.0,16756458099,garmin_ping_476437881496,False,0,0,False,9.0,81055898,1,a15686796063,ok}vHw_ngBUZ[n@cAbBaAzAa@r@a@j@q@pAe@p@}@`BIHs...,2,19.0
2,2,Afternoon Weight Training,0.0,4071,4071,0.0,Workout,WeightTraining,,15683435146,2025-09-03T12:57:35Z,2025-09-03T14:57:35Z,(GMT+02:00) Africa/Blantyre,7200.0,,,,0,5,0,1,0,True,False,False,False,followers_only,False,,[],[],0.0,0.0,,,,,,,True,97.9,137.0,False,True,0.0,0.0,16752910000.0,16752910671,garmin_ping_476360234705,False,0,0,False,9.0,81055898,1,a15683435146,,2,
3,2,Afternoon Ride,11833.2,1778,7158,32.0,Ride,Ride,10.0,15683575988,2025-09-03T12:37:26Z,2025-09-03T14:37:26Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,7,0,1,0,False,True,False,False,followers_only,False,b12572672,"[51.107588, 17.123967]","[51.108269, 17.123584]",6.655,10.04,,158.1,,,False,281.0,True,134.1,149.0,False,True,130.4,117.2,16753070000.0,16753067661,garmin_ping_476364154361,False,0,0,False,13.0,81055898,1,a15683575988,{t{vHedogBZpC~@bDd@fANRPh@z@tBf@`Bh@vCV|B^tBT|...,2,26.0
4,2,Afternoon Walk,5062.4,3664,3840,12.0,Walk,Walk,,15660079944,2025-09-01T14:19:56Z,2025-09-01T16:19:56Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,0,8,0,2,0,False,False,False,False,followers_only,False,,"[51.107714, 17.124284]","[51.108194, 17.125157]",1.382,1.88,52.4,,,,,,True,81.2,108.0,False,True,124.2,116.2,16726900000.0,16726897935,garmin_ping_475670331039,False,0,0,False,5.0,81055898,1,a15660079944,wh|vHavogBPl@HPLL^DzAY\ENBLN`@n@zA`DXr@DRDj@Gf...,2,


### Requesing list of all activities with details

In [7]:
print('Requesting Token...\n')
res = requests.post(auth_url, data=payload, verify=False) # auth_url & payload referenced in code already above
access_token = res.json()['access_token']
header = {'Authorization': 'Bearer ' + access_token}
print('Access Token = {}\n'.format(access_token))

all_activities_details = []
count = 0

print('Calculating time to get all activities details....\n')
print(all_activities_df.shape[0] * 8 / 60, ' minutes to obtain data')

print('Getting details of each activity')
for index, row in tqdm(all_activities_df.iterrows(), total=all_activities_df.shape[0]):
    get_activity_url = 'https://www.strava.com/api/v3/activities/{}'.format(row['id'])
    try:
        activity_details = requests.get(get_activity_url, headers=header).json() 
        all_activities_details.append(activity_details)
        time.sleep(random.randint(7, 9))
    except requests.exception.RequestException as e:
        raise SystemExit(e)

all_activities_details_df = pd.json_normalize(all_activities_details, sep='_')

Requesting Token...

Access Token = ab4ca255a0f315aaa84ce37fc18df0b666ca53dd

Calculating time to get all activities details....

0.6666666666666666  minutes to obtain data
Getting details of each activity


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:43<00:00,  8.66s/it]


In [9]:
all_activities_details_df

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,start_date,start_date_local,timezone,utc_offset,location_city,location_state,location_country,achievement_count,kudos_count,comment_count,athlete_count,photo_count,trainer,commute,manual,private,visibility,flagged,gear_id,start_latlng,end_latlng,average_speed,max_speed,average_cadence,average_watts,max_watts,weighted_average_watts,device_watts,kilojoules,has_heartrate,average_heartrate,max_heartrate,heartrate_opt_out,display_hide_heartrate_option,elev_high,elev_low,upload_id,upload_id_str,external_id,from_accepted_tag,pr_count,total_photo_count,has_kudoed,suffer_score,description,calories,perceived_exertion,prefer_perceived_exertion,segment_efforts,splits_metric,splits_standard,laps,best_efforts,stats_visibility,hide_from_home,device_name,embed_token,available_zones,athlete_id,athlete_resource_state,map_id,map_polyline,map_resource_state,map_summary_polyline,gear_primary,gear_name,gear_nickname,gear_resource_state,gear_retired,gear_distance,gear_converted_distance,photos_primary,photos_count,similar_activities_effort_count,similar_activities_average_speed,similar_activities_min_average_speed,similar_activities_mid_average_speed,similar_activities_max_average_speed,similar_activities_pr_rank,similar_activities_frequency_milestone,similar_activities_trend_speeds,similar_activities_trend_current_activity_index,similar_activities_trend_min_speed,similar_activities_trend_mid_speed,similar_activities_trend_max_speed,similar_activities_trend_direction,similar_activities_resource_state,average_temp
0,3,800m RepeatsüöÄ,10130.5,3404,3404,10.0,Run,Run,3.0,15697639810,2025-09-04T17:19:53Z,2025-09-04T19:19:53Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,6,0,1,0,False,False,False,False,everyone,False,g24134620,"[51.107356, 17.124205]","[51.107781, 17.124302]",2.976,4.46,80.1,350.3,574.0,371.0,True,1191.9,True,152.9,178.0,False,True,123.0,115.4,16768189716,16768189716,garmin_ping_476772433908,False,0,0,False,65.0,800m Repeats with Runna ‚úÖ\n\n≈öwiate≈Çko w tunel...,769.0,,,"[{'id': 3398749956235867394, 'resource_state':...","[{'distance': 1001.6, 'elapsed_time': 348, 'el...","[{'distance': 1611.1, 'elapsed_time': 553, 'el...","[{'id': 55894421952, 'resource_state': 2, 'nam...","[{'id': 65803499354, 'resource_state': 2, 'nam...","[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 970,7f23ab1632af6dddee5e1af4bf0e9ed03591448b,"[heartrate, pace, power]",81055898,1,a15697639810,}{|vHgqogBf@xA^nAHNHBHCV[NKB@PZJHJDNANKr@y@ZYR...,3,ss{vHo~ngBH\Tn@Nt@RV|@vBb@~@X`Ab@lAxAxJZjAzArE...,False,ASICS Novablast 5,,2.0,False,268313.0,268.3,,0,1.0,2.976058,2.976058,2.976058,2.976058,,,[2.9760575793184487],0.0,2.976058,2.976058,2.976058,0.0,2.0,
1,3,Evening Ride,14177.5,3095,3461,53.0,Ride,Ride,,15686796063,2025-09-03T17:34:35Z,2025-09-03T19:34:35Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,7,0,2,0,False,False,False,False,followers_only,False,b12572672,"[51.107091, 17.124618]","[51.108163, 17.123457]",4.581,13.54,,100.9,,,False,312.3,True,103.9,155.0,False,True,127.0,114.4,16756458099,16756458099,garmin_ping_476437881496,False,0,0,False,9.0,Szybki test nowego setupuüóø,274.0,,,"[{'id': 3398400794639925554, 'resource_state':...","[{'distance': 1000.5, 'elapsed_time': 315, 'el...","[{'distance': 1613.0, 'elapsed_time': 454, 'el...","[{'id': 55856340694, 'resource_state': 2, 'nam...",,"[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Edge 840,dcab37d8f594ea69e901d6eb3b68c52d552a744d,"[heartrate, power]",81055898,1,a15686796063,iz|vHysogB^g@FEF?f@hAHDX@BBF^`@x@Pn@T\D@jAiAh@...,3,ok}vHw_ngBUZ[n@cAbBaAzAa@r@a@j@q@pAe@p@}@`BIHs...,False,Cube Nuroad Pro,Cube Nuroad Pro,2.0,False,3625151.0,3625.2,,0,,,,,,,,,,,,,,,19.0
2,3,Afternoon Weight Training,0.0,4071,4071,0.0,Workout,WeightTraining,,15683435146,2025-09-03T12:57:35Z,2025-09-03T14:57:35Z,(GMT+02:00) Africa/Blantyre,7200.0,,,,0,5,0,1,0,True,False,False,False,followers_only,False,,[],[],0.0,0.0,,,,,,,True,97.9,137.0,False,True,0.0,0.0,16752910671,16752910671,garmin_ping_476360234705,False,0,0,False,9.0,Reska8Ô∏è‚É£4Ô∏è‚É£,330.0,,,[],,,"[{'id': 55843284355, 'resource_state': 2, 'nam...",,"[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 970,af5d26a5609de65ebdc6f37c9c01da44505d3187,[heartrate],81055898,1,a15683435146,,3,,,,,,,,,,0,,,,,,,,,,,,,,,
3,3,Afternoon Ride,11833.2,1778,7158,32.0,Ride,Ride,10.0,15683575988,2025-09-03T12:37:26Z,2025-09-03T14:37:26Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,7,0,1,0,False,True,False,False,followers_only,False,b12572672,"[51.107588, 17.123967]","[51.108269, 17.123584]",6.655,10.04,,158.1,,,False,281.0,True,134.1,149.0,False,True,130.4,117.2,16753067661,16753067661,garmin_ping_476364154361,False,0,0,False,13.0,Reska dojazdü´°,312.0,,,"[{'id': 3398333712248566118, 'resource_state':...","[{'distance': 1004.6, 'elapsed_time': 176, 'el...","[{'distance': 1615.6, 'elapsed_time': 265, 'el...","[{'id': 55843769122, 'resource_state': 2, 'nam...",,"[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Edge 840,44142241968bbc7953577176cb7320709c13ccaa,"[heartrate, power]",81055898,1,a15683575988,k}|vHwoogBXa@DOBCD@X`Ax@xBHDFAb@WH@V^FFHBJARKX...,3,{t{vHedogBZpC~@bDd@fANRPh@z@tBf@`Bh@vCV|B^tBT|...,False,Cube Nuroad Pro,Cube Nuroad Pro,2.0,False,3625151.0,3625.2,,0,,,,,,,,,,,,,,,26.0
4,3,Afternoon Walk,5062.4,3664,3840,12.0,Walk,Walk,,15660079944,2025-09-01T14:19:56Z,2025-09-01T16:19:56Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,0,8,0,2,0,False,False,False,False,followers_only,False,,"[51.107714, 17.124284]","[51.108194, 17.125157]",1.382,1.88,52.4,,,,,,True,81.2,108.0,False,True,124.2,116.2,16726897935,16726897935,garmin_ping_475670331039,False,0,0,False,5.0,"Mia≈Çem i≈õƒá pobiegaƒá, ale mi siƒô nie chcia≈Çoü´∂üèª",325.0,,,"[{'id': 3397619569256619512, 'resource_state':...","[{'distance': 1001.3, 'elapsed_time': 906, 'el...","[{'distance': 1612.2, 'elapsed_time': 1357, 'e...","[{'id': 55760693891, 'resource_state': 2, 'nam...",,"[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 970,3034afa3a7e9e16959b396df1d06c6260c8db279,[heartrate],81055898,1,a15660079944,e~|vHwqogB@XFFLOHGTEL@f@vA`@t@F^JRa@l@UPm@g@`@...,3,wh|vHavogBPl@HPLL^DzAY\ENBLN`@n@zA`DXr@DRDj@Gf...,,,,,,,,,0,,,,,,,,,,,,,,,


### Requesing kudos related to activities

In [17]:
print('Requesting Token...\n')
res = requests.post(auth_url, data=payload, verify=False) # auth_url & payload referenced in code already above
access_token = res.json()['access_token']
header = {'Authorization': 'Bearer ' + access_token}
print('Access Token = {}\n'.format(access_token))

all_activities_kudos = []
count = 0

print('Calculating time to get all activities details....\n')
print(all_activities_df.shape[0] * 8 / 60, ' minutes to obtain data')

print('Getting details of each activity')
for index, row in tqdm(all_activities_df.iterrows(), total=all_activities_df.shape[0]):
    get_kudos_url = 'https://www.strava.com/api/v3/activities/{}/kudos'.format(row['id'])
    try:
        activity_kudos = requests.get(get_kudos_url, headers=header).json()
        if len(activity_kudos) > 0:
            for kudos in activity_kudos:
                kudos['activity_id'] = row['id']
                all_activities_kudos.extend(activity_kudos)
        time.sleep(random.randint(7, 9))
    except requests.exception.RequestException as e:
        raise SystemExit(e)
    
all_activities_kudos_df = pd.json_normalize(all_activities_kudos, sep='_')

Requesting Token...

Access Token = ab4ca255a0f315aaa84ce37fc18df0b666ca53dd

Calculating time to get all activities details....

0.6666666666666666  minutes to obtain data
Getting details of each activity


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:39<00:00,  7.87s/it]


In [18]:
all_activities_kudos_df

Unnamed: 0,resource_state,firstname,lastname,activity_id
0,2,Jacek,S.,15697639810
1,2,Kacper,G.,15697639810
2,2,Wies≈Çawa,C.,15697639810
3,2,Filip,C.,15697639810
4,2,Karolina,C.,15697639810
...,...,...,...,...
218,2,Ola,≈Å.,15660079944
219,2,Filip,C.,15660079944
220,2,Mal,C.,15660079944
221,2,Wies≈Çawa,C.,15660079944


### Load data to PostgreSQL

In [20]:
with engine.begin() as conn:
    conn.exec_driver_sql("CREATE SCHEMA IF NOT EXISTS bronze;")

activities_dtype_map = {
    "resource_state": Integer,
    "name": Text,
    "distance": Float,
    "moving_time": Integer,
    "elapsed_time": Integer,
    "total_elevation_gain": Float,
    "type": Text,
    "sport_type": Text,
    "workout_type": Float,
    "id": BigInteger,
    "start_date": Text,
    "start_date_local": Text,
    "timezone": Text,
    "utc_offset": Float,
    "location_city": Text,
    "location_state": Text,
    "location_country": Text,
    "achievement_count": Integer,
    "kudos_count": Integer,
    "comment_count": Integer,
    "athlete_count": Integer,
    "photo_count": Integer,
    "trainer": Boolean,
    "commute": Boolean,
    "manual": Boolean,
    "private": Boolean,
    "visibility": Text,
    "flagged": Boolean,
    "gear_id": Text,
    "start_latlng": JSONB,
    "end_latlng": JSONB,
    "average_speed": Float,
    "max_speed": Float,
    "average_cadence": Float,
    "average_watts": Float,
    "max_watts": Float,
    "weighted_average_watts": Float,
    "device_watts": Boolean,
    "kilojoules": Float,
    "has_heartrate": Boolean,
    "average_heartrate": Float,
    "max_heartrate": Float,
    "heartrate_opt_out": Boolean,
    "display_hide_heartrate_option": Boolean,
    "elev_high": Float,
    "elev_low": Float,
    "upload_id": BigInteger,
    "upload_id_str": Text,
    "external_id": Text,
    "from_accepted_tag": Boolean,
    "pr_count": Integer,
    "total_photo_count": Integer,
    "has_kudoed": Boolean,
    "suffer_score": Float,
    "athlete_id": BigInteger,
    "athlete_resource_state": Integer,
    "map_id": Text,
    "map_summary_polyline": Text,
    "map_resource_state": Integer,
    "average_temp": Float,
}

activities_details_dtype_map = {
    "resource_state": Integer,
    "name": Text,
    "distance": Float,
    "moving_time": Integer,
    "elapsed_time": Integer,
    "total_elevation_gain": Float,
    "type": Text,
    "sport_type": Text,
    "workout_type": Float,
    "id": BigInteger,
    "start_date": Text,
    "start_date_local": Text,
    "timezone": Text,
    "utc_offset": Float,
    "location_city": Text,
    "location_state": Text,
    "location_country": Text,
    "achievement_count": Integer,
    "kudos_count": Integer,
    "comment_count": Integer,
    "athlete_count": Integer,
    "photo_count": Integer,
    "trainer": Boolean,
    "commute": Boolean,
    "manual": Boolean,
    "private": Boolean,
    "visibility": Text,
    "flagged": Boolean,
    "gear_id": Text,
    "start_latlng": JSONB,
    "end_latlng": JSONB,
    "average_speed": Float,
    "max_speed": Float,
    "average_cadence": Float,
    "average_watts": Float,
    "max_watts": Float,
    "weighted_average_watts": Float,
    "device_watts": Boolean,
    "kilojoules": Float,
    "has_heartrate": Boolean,
    "average_heartrate": Float,
    "max_heartrate": Float,
    "heartrate_opt_out": Boolean,
    "display_hide_heartrate_option": Boolean,
    "elev_high": Float,
    "elev_low": Float,
    "upload_id": BigInteger,
    "upload_id_str": Text,
    "external_id": Text,
    "from_accepted_tag": Boolean,
    "pr_count": Integer,
    "total_photo_count": Integer,
    "has_kudoed": Boolean,
    "suffer_score": Float,
    "description": Text,
    "calories": Float,
    "perceived_exertion": Text,
    "prefer_perceived_exertion": Text,
    "segment_efforts": JSONB,
    "splits_metric": JSONB,
    "splits_standard": JSONB,
    "laps": JSONB,
    "best_efforts": JSONB,
    "stats_visibility": JSONB,
    "hide_from_home": Boolean,
    "device_name": Text,
    "embed_token": Text,
    "available_zones": JSONB,
    "athlete_id": BigInteger,
    "athlete_resource_state": Integer,
    "map_id": Text,
    "map_polyline": Text,
    "map_resource_state": Integer,
    "map_summary_polyline": Text,
    "gear_primary": Boolean,
    "gear_name": Text,
    "gear_nickname": Text,
    "gear_resource_state": Float,
    "gear_retired": Boolean,
    "gear_distance": Float,
    "gear_converted_distance": Float,
    "photos_primary": JSONB,
    "photos_count": Integer,
    "similar_activities_effort_count": Float,
    "similar_activities_average_speed": Float,
    "similar_activities_min_average_speed": Float,
    "similar_activities_mid_average_speed": Float,
    "similar_activities_max_average_speed": Float,
    "similar_activities_pr_rank": Float,
    "similar_activities_frequency_milestone": Float,
    "similar_activities_trend_speeds": JSONB,
    "similar_activities_trend_current_activity_index": Float,
    "similar_activities_trend_min_speed": Float,
    "similar_activities_trend_mid_speed": Float,
    "similar_activities_trend_max_speed": Float,
    "similar_activities_trend_direction": Float,
    "similar_activities_resource_state": Float,
    "average_temp": Float,
}

activities_kudos_dtype_map = {
    "resource_state": Integer,
    "firstname": Text,
    "lastname": Text,
    "activity_id": BigInteger,
}


all_activities_df.to_sql(
    name="activities",
    schema="bronze",
    con=engine,
    if_exists="replace",
    index=False,
    dtype=activities_dtype_map,
    method="multi",
    chunksize=1000
)

all_activities_details_df.to_sql(
    name="activities_details",
    schema="bronze",
    con=engine,
    if_exists="replace",
    index=False,
    dtype=activities_details_dtype_map,
    method="multi",
    chunksize=1000
)

all_activities_kudos_df.to_sql(
    name="activities_kudos",
    schema="bronze",
    con=engine,
    if_exists="replace",
    index=False,
    dtype=activities_kudos_dtype_map,
    method="multi",
    chunksize=1000
)

-1

### All activities Dataframe

In [331]:
all_activities_df.head()

Unnamed: 0,resource_state,athlete,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,start_date,start_date_local,timezone,utc_offset,location_city,location_state,location_country,achievement_count,kudos_count,comment_count,athlete_count,photo_count,map,trainer,commute,manual,private,visibility,flagged,gear_id,start_latlng,end_latlng,average_speed,max_speed,average_temp,average_watts,device_watts,kilojoules,has_heartrate,average_heartrate,max_heartrate,heartrate_opt_out,display_hide_heartrate_option,elev_high,elev_low,upload_id,upload_id_str,external_id,from_accepted_tag,pr_count,total_photo_count,has_kudoed,suffer_score,average_cadence,max_watts,weighted_average_watts
0,2,"{'id': 81055898, 'resource_state': 1}",Evening Ride,14177.5,3095,3461,53.0,Ride,Ride,,15686796063,2025-09-03T17:34:35Z,2025-09-03T19:34:35Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,4,0,2,0,"{'id': 'a15686796063', 'summary_polyline': 'ok...",False,False,False,False,followers_only,False,b12572672,"[51.107091, 17.124618]","[51.108163, 17.123457]",4.581,13.54,19.0,100.9,False,312.3,True,103.9,155.0,False,True,127.0,114.4,16756460000.0,16756458099,garmin_ping_476437881496,False,0,0,False,9.0,,,
1,2,"{'id': 81055898, 'resource_state': 1}",Afternoon Weight Training,0.0,4071,4071,0.0,Workout,WeightTraining,,15683435146,2025-09-03T12:57:35Z,2025-09-03T14:57:35Z,(GMT+02:00) Africa/Blantyre,7200.0,,,,0,3,0,1,0,"{'id': 'a15683435146', 'summary_polyline': '',...",True,False,False,False,followers_only,False,,[],[],0.0,0.0,,,,,True,97.9,137.0,False,True,0.0,0.0,16752910000.0,16752910671,garmin_ping_476360234705,False,0,0,False,9.0,,,
2,2,"{'id': 81055898, 'resource_state': 1}",Afternoon Ride,11833.2,1778,7158,32.0,Ride,Ride,10.0,15683575988,2025-09-03T12:37:26Z,2025-09-03T14:37:26Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,4,0,1,0,"{'id': 'a15683575988', 'summary_polyline': '{t...",False,True,False,False,followers_only,False,b12572672,"[51.107588, 17.123967]","[51.108269, 17.123584]",6.655,10.04,26.0,158.1,False,281.0,True,134.1,149.0,False,True,130.4,117.2,16753070000.0,16753067661,garmin_ping_476364154361,False,0,0,False,13.0,,,
3,2,"{'id': 81055898, 'resource_state': 1}",Afternoon Walk,5062.4,3664,3840,12.0,Walk,Walk,,15660079944,2025-09-01T14:19:56Z,2025-09-01T16:19:56Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,0,8,0,2,0,"{'id': 'a15660079944', 'summary_polyline': 'wh...",False,False,False,False,followers_only,False,,"[51.107714, 17.124284]","[51.108194, 17.125157]",1.382,1.88,,,,,True,81.2,108.0,False,True,124.2,116.2,16726900000.0,16726897935,garmin_ping_475670331039,False,0,0,False,5.0,52.4,,
4,2,"{'id': 81055898, 'resource_state': 1}",Afternoon Ride,24309.7,5770,7708,63.0,Ride,Ride,10.0,15650786884,2025-08-31T13:43:42Z,2025-08-31T15:43:42Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,11,13,0,2,0,"{'id': 'a15650786884', 'summary_polyline': 'ql...",False,False,False,False,followers_only,False,b12572672,"[51.10669, 17.123393]","[51.107619, 17.124172]",4.213,6.66,23.0,82.4,False,475.7,True,93.8,119.0,False,True,125.2,113.2,16716610000.0,16716611559,garmin_ping_475336836414,False,4,0,False,12.0,,,


### All activities with details Dataframe

In [None]:
all_activities_details_df = pd.json_normalize(all_activities_details, sep='.')
all_activities_details_df.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,start_date,start_date_local,timezone,utc_offset,location_city,location_state,location_country,achievement_count,kudos_count,comment_count,athlete_count,photo_count,trainer,commute,manual,private,visibility,flagged,gear_id,start_latlng,end_latlng,average_speed,max_speed,average_temp,average_watts,device_watts,kilojoules,has_heartrate,average_heartrate,max_heartrate,heartrate_opt_out,display_hide_heartrate_option,elev_high,elev_low,upload_id,upload_id_str,external_id,from_accepted_tag,pr_count,total_photo_count,has_kudoed,suffer_score,description,calories,perceived_exertion,prefer_perceived_exertion,segment_efforts,splits_metric,splits_standard,laps,stats_visibility,hide_from_home,device_name,embed_token,available_zones,athlete.id,athlete.resource_state,map.id,map.polyline,map.resource_state,map.summary_polyline,gear.id,gear.primary,gear.name,gear.nickname,gear.resource_state,gear.retired,gear.distance,gear.converted_distance,photos.primary,photos.count,average_cadence,max_watts,weighted_average_watts,best_efforts,similar_activities.effort_count,similar_activities.average_speed,similar_activities.min_average_speed,similar_activities.mid_average_speed,similar_activities.max_average_speed,similar_activities.pr_rank,similar_activities.frequency_milestone,similar_activities.trend.speeds,similar_activities.trend.current_activity_index,similar_activities.trend.min_speed,similar_activities.trend.mid_speed,similar_activities.trend.max_speed,similar_activities.trend.direction,similar_activities.resource_state,photos.primary.unique_id,photos.primary.urls.600,photos.primary.urls.100,photos.primary.source,photos.primary.media_type,photos.use_primary_photo
0,3,Evening Ride,14177.5,3095,3461,53.0,Ride,Ride,,15686796063,2025-09-03T17:34:35Z,2025-09-03T19:34:35Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,4,0,2,0,False,False,False,False,followers_only,False,b12572672,"[51.107091, 17.124618]","[51.108163, 17.123457]",4.581,13.54,19.0,100.9,False,312.3,True,103.9,155.0,False,True,127.0,114.4,16756458099,16756458099,garmin_ping_476437881496,False,0,0,False,9.0,Szybki test nowego setupuüóø,274.0,,,"[{'id': 3398400794639925554, 'resource_state':...","[{'distance': 1000.5, 'elapsed_time': 315, 'el...","[{'distance': 1613.0, 'elapsed_time': 454, 'el...","[{'id': 55856340694, 'resource_state': 2, 'nam...","[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Edge 840,dcab37d8f594ea69e901d6eb3b68c52d552a744d,"[heartrate, power]",81055898,1,a15686796063,iz|vHysogB^g@FEF?f@hAHDX@BBF^`@x@Pn@T\D@jAiAh@...,3,ok}vHw_ngBUZ[n@cAbBaAzAa@r@a@j@q@pAe@p@}@`BIHs...,b12572672,False,Cube Nuroad Pro,Cube Nuroad Pro,2.0,False,3625151.0,3625.2,,0,,,,,,,,,,,,,,,,,,,,,,,,
1,3,Afternoon Weight Training,0.0,4071,4071,0.0,Workout,WeightTraining,,15683435146,2025-09-03T12:57:35Z,2025-09-03T14:57:35Z,(GMT+02:00) Africa/Blantyre,7200.0,,,,0,3,0,1,0,True,False,False,False,followers_only,False,,[],[],0.0,0.0,,,,,True,97.9,137.0,False,True,0.0,0.0,16752910671,16752910671,garmin_ping_476360234705,False,0,0,False,9.0,Reska8Ô∏è‚É£4Ô∏è‚É£,330.0,,,[],,,"[{'id': 55843284355, 'resource_state': 2, 'nam...","[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 970,af5d26a5609de65ebdc6f37c9c01da44505d3187,[heartrate],81055898,1,a15683435146,,3,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,
2,3,Afternoon Ride,11833.2,1778,7158,32.0,Ride,Ride,10.0,15683575988,2025-09-03T12:37:26Z,2025-09-03T14:37:26Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,1,4,0,1,0,False,True,False,False,followers_only,False,b12572672,"[51.107588, 17.123967]","[51.108269, 17.123584]",6.655,10.04,26.0,158.1,False,281.0,True,134.1,149.0,False,True,130.4,117.2,16753067661,16753067661,garmin_ping_476364154361,False,0,0,False,13.0,Reska dojazdü´°,312.0,,,"[{'id': 3398333712248566118, 'resource_state':...","[{'distance': 1004.6, 'elapsed_time': 176, 'el...","[{'distance': 1615.6, 'elapsed_time': 265, 'el...","[{'id': 55843769122, 'resource_state': 2, 'nam...","[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Edge 840,44142241968bbc7953577176cb7320709c13ccaa,"[heartrate, power]",81055898,1,a15683575988,k}|vHwoogBXa@DOBCD@X`Ax@xBHDFAb@WH@V^FFHBJARKX...,3,{t{vHedogBZpC~@bDd@fANRPh@z@tBf@`Bh@vCV|B^tBT|...,b12572672,False,Cube Nuroad Pro,Cube Nuroad Pro,2.0,False,3625151.0,3625.2,,0,,,,,,,,,,,,,,,,,,,,,,,,
3,3,Afternoon Walk,5062.4,3664,3840,12.0,Walk,Walk,,15660079944,2025-09-01T14:19:56Z,2025-09-01T16:19:56Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,0,8,0,2,0,False,False,False,False,followers_only,False,,"[51.107714, 17.124284]","[51.108194, 17.125157]",1.382,1.88,,,,,True,81.2,108.0,False,True,124.2,116.2,16726897935,16726897935,garmin_ping_475670331039,False,0,0,False,5.0,"Mia≈Çem i≈õƒá pobiegaƒá, ale mi siƒô nie chcia≈Çoü´∂üèª",325.0,,,"[{'id': 3397619569256619512, 'resource_state':...","[{'distance': 1001.3, 'elapsed_time': 906, 'el...","[{'distance': 1612.2, 'elapsed_time': 1357, 'e...","[{'id': 55760693891, 'resource_state': 2, 'nam...","[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 970,3034afa3a7e9e16959b396df1d06c6260c8db279,[heartrate],81055898,1,a15660079944,e~|vHwqogB@XFFLOHGTEL@f@vA`@t@F^JRa@l@UPm@g@`@...,3,wh|vHavogBPl@HPLL^DzAY\ENBLN`@n@zA`DXr@DRDj@Gf...,,,,,,,,,,0,52.4,,,,,,,,,,,,,,,,,,,,,,,
4,3,Afternoon Ride,24309.7,5770,7708,63.0,Ride,Ride,10.0,15650786884,2025-08-31T13:43:42Z,2025-08-31T15:43:42Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,11,13,0,2,0,False,False,False,False,followers_only,False,b12572672,"[51.10669, 17.123393]","[51.107619, 17.124172]",4.213,6.66,23.0,82.4,False,475.7,True,93.8,119.0,False,True,125.2,113.2,16716611559,16716611559,garmin_ping_475336836414,False,4,0,False,12.0,Po cynamonkƒô na ostatni dzie≈Ñ wakacjiüá∏üá™,419.0,,,"[{'id': 3397265934815032876, 'resource_state':...","[{'distance': 1000.5, 'elapsed_time': 244, 'el...","[{'distance': 1612.0, 'elapsed_time': 369, 'el...","[{'id': 55728953191, 'resource_state': 2, 'nam...","[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Edge 840,bca876417efecf6632e084cb7f69e85037ab07e6,"[heartrate, power]",81055898,1,a15650786884,yw|vHelogBDAH@`@h@JBBEHCf@q@r@q@LIZk@XYPYb@e@f...,3,ql}vHm~mgB{BnD_IfNyAtB_BbCEReAnBsBdDyAjBDZjAdC...,b12572672,False,Cube Nuroad Pro,Cube Nuroad Pro,2.0,False,3625151.0,3625.2,,0,,,,,,,,,,,,,,,,,,,,,,,,


### Separate tables setup

In [310]:
dataframe_columns = {
  'activities' : [
    'id',
    'name',
    'distance',
    'moving_time',
    'elapsed_time',
    'total_elevation_gain',
    'type',
    'sport_type',
    'workout_type',
    'start_date',
    'start_date_local',
    'timezone',
    'utc_offset',
    'location_city',
    'location_state',
    'location_country',
    'achievement_count',
    'kudos_count',
    'comment_count',
    'athlete_count',
    'photo_count',
    'trainer',
    'commute',
    'manual',
    'private',
    'visibility',
    'flagged',
    'gear_id',
    'start_latlng',
    'end_latlng',
    'average_speed',
    'max_speed',
    'average_cadence',
    'average_watts',
    'max_watts',
    'weighted_average_watts',
    'device_watts',
    'kilojoules',
    'has_heartrate',
    'average_heartrate',
    'max_heartrate',
    'heartrate_opt_out',
    'display_hide_heartrate_option',
    'elev_high',
    'elev_low',
    'upload_id',
    'upload_id_str',
    'external_id',
    'from_accepted_tag',
    'pr_count',
    'total_photo_count',
    'has_kudoed',
    'suffer_score',
    'description',
    'calories',
    'perceived_exertion',
    'prefer_perceived_exertion',
    'hide_from_home',
    'device_name',
    'embed_token',
    'available_zones',
    'map_id',
    'gear_id'],
  'maps' : [
    'map_id',
    'map_polyline',
    'map_resource_state',
    'map_summary_polyline'],
  'gear' : [
    'gear_id',
    'gear_primary',
    'gear_name',
    'gear_nickname',
    'gear_resource_state',
    'gear_retired',
    'gear_distance',
    'gear_converted_distance'],
  'segment_efforts' : [
    'id',
    'resource_state',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'start_index',
    'end_index',
    'average_cadence',
    'device_watts',
    'average_watts',
    'average_heartrate',
    'max_heartrate',
    'pr_rank',
    'achievements',
    'visibility',
    'kom_rank',
    'hidden',
    'activity_id',
    'segment_id'],
  'segments' : [
    'segment_id',
    'segment_resource_state',
    'segment_name',
    'segment_activity_type',
    'segment_distance',
    'segment_average_grade',
    'segment_maximum_grade',
    'segment_elevation_high',
    'segment_elevation_low',
    'segment_start_latlng',
    'segment_end_latlng',
    'segment_elevation_profile',
    'segment_elevation_profiles',
    'segment_climb_category',
    'segment_city',
    'segment_state',
    'segment_country',
    'segment_private',
    'segment_hazardous',
    'segment_starred'],
  'laps' : [
    'id',
    'resource_state',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'average_speed',
    'max_speed',
    'lap_index',
    'split',
    'start_index',
    'end_index',
    'total_elevation_gain',
    'average_cadence',
    'device_watts',
    'average_watts',
    'average_heartrate',
    'max_heartrate',
    'pace_zone',
    'activity_id'],
  'best_efforts' : [
    'id',
    'activity_id',
    'resource_state',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'pr_rank',
    'achievements',
    'start_index',
    'end_index']
}

### Spliting data into tables

In [332]:
# Top level normalize
all_activities_details_normalized_df = pd.json_normalize(all_activities_details, sep='_')

# Activities
activities_cols = dataframe_columns['activities']
activities_df = all_activities_details_normalized_df[[c for c in activities_cols if c in all_activities_details_normalized_df.columns]].copy()

# Maps
maps_cols = dataframe_columns['maps']
maps_df = all_activities_details_normalized_df[[c for c in maps_cols if c in all_activities_details_normalized_df.columns]].copy()

# Gear
gear_cols = dataframe_columns['gear']
gear_df = all_activities_details_normalized_df[[c for c in gear_cols if c in all_activities_details_normalized_df.columns]].copy()

# Segment efforts
seg_eff_cols = dataframe_columns['segment_efforts']
segments_eff_df = pd.json_normalize(
    all_activities_details,
    record_path='segment_efforts',
    sep='_'
)
if not segments_eff_df.empty:
    segments_eff_df = segments_eff_df[[c for c in seg_eff_cols if c in segments_eff_df.columns]].copy()

# Segments
seg_cols = dataframe_columns['segments']
segments_df = pd.json_normalize(
    all_activities_details,
    record_path='segment_efforts',
    sep="_"
)

if not segments_df.empty:
    segments_df = segments_df[[c for c in seg_cols if c in segments_df.columns]].copy()

# Laps
lap_cols = dataframe_columns['laps']
laps_df = pd.json_normalize(
    all_activities_details,
    record_path='laps',
    sep="_"
)
if not laps_df.empty:
    laps_df = laps_df[[c for c in lap_cols if c in laps_df.columns]]

# Best efforts
best_eff_cols = dataframe_columns['best_efforts']
activities_with_best_eff = [row for row in all_activities_details if row.get('best_efforts')]
best_efforts_df = pd.json_normalize(
    activities_with_best_eff,
    record_path='best_efforts',
    sep="_",
)

if not best_efforts_df.empty:
    best_efforts_df = best_efforts_df[[c for c in best_eff_cols if c in best_efforts_df.columns]]

# All dataframes in dictoinary
dataframes = {
    "activities": activities_df,
    "maps": maps_df,
    "gear": gear_df,
    "segment_efforts": segments_eff_df,
    "segments": segments_df,
    "laps": laps_df,
    "best_efforts": best_efforts_df
}

### Data cleanup