### Libraries import

In [1]:
from dotenv import load_dotenv
import os
import requests
import urllib3
import pandas as pd
from tqdm import tqdm
import time
import random
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
pd.set_option('display.max_columns', None)

In [2]:
from sqlalchemy import Text, Integer, BigInteger, Float, Boolean
from sqlalchemy import create_engine
from sqlalchemy.dialects.postgresql import JSONB

### Load evn variables

In [18]:
load_dotenv()

True

### Strava API setup

In [3]:
auth_url = 'https://www.strava.com/oauth/token'
activites_url = 'https://www.strava.com/api/v3/athlete/activities'

payload = {
    'client_id': os.getenv('CLIENT_ID'),
    'client_secret': os.getenv('CLIENT_SECRET'),
    'refresh_token': os.getenv('REFRESH_TOKEN'),
    'grant_type': 'refresh_token',
    'f': 'json'
}

### SQL setup

In [17]:
engine = create_engine(os.getenv('DB_URI'))

### Requesing list of all activities

In [5]:
print('Requesting Token...\n')
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']

print('Access Token = {}\n'.format(access_token))
header = {'Authorization': 'Bearer ' + access_token}

# The first loop, request_page_number will be set to one, so it requests the first page. Increment this number after
# each request, so the next time we request the second page, then third, and so on...
request_page_num = 1
all_activities = []

while True:
    param = {'per_page': 200, 'page': request_page_num}
    # initial request, where we request the first page of activities
    my_dataset = requests.get(activites_url, headers=header, params=param).json()

    # check the response to make sure it is not empty. If it is empty, that means there is no more data left. So if you have
    # 1000 activities, on the 6th request, where we request page 6, there would be no more data left, so we will break out of the loop
    if len(my_dataset) == 0:
        print('breaking out of while loop because the response is zero, which means there must be no more activities')
        break

    # if the all_activities list is already populated, that means we want to add additional data to it via extend.
    if all_activities:
        print('all_activities is populated')
        all_activities.extend(my_dataset)

    # if the all_activities is empty, this is the first time adding data so we just set it equal to my_dataset
    else:
        print('all_activities is NOT populated')
        all_activities = my_dataset

    request_page_num += 1

print(len(all_activities))
for count, activity in enumerate(all_activities):
    print(activity['name'])
    print(count)

all_activities_df = pd.json_normalize(all_activities, sep='_')

Requesting Token...





Access Token = 62885dc0d0678b8b2fadd0eb5bb460f19d28095d

all_activities is NOT populated
all_activities is populated
all_activities is populated
all_activities is populated
all_activities is populated
all_activities is populated
breaking out of while loop because the response is zero, which means there must be no more activities
1085
Lunch Ride
0
24km Race Practice Long Runüî©
1
Evening Ride
2
Afternoon Weight Training
3
Afternoon Ride
4
800m RepeatsüöÄ
5
Evening Ride
6
Afternoon Weight Training
7
Afternoon Ride
8
Afternoon Walk
9
Afternoon Ride
10
24km Race Practice Long Runü™¶
11
Coffee Race 2025‚òïÔ∏è
12
9km Easy RunüåÖ
13
Afternoon Weight Training
14
Afternoon Ride
15
Rolling 300sü™¶
16
Afternoon Weight Training
17
Progressive Runü™¶
18
22km Long Runüóø
19
Afternoon Weight Training
20
Evening Ride
21
400m RepeatsüòÆ‚Äçüí®
22
9km Easy Run‚ú®
23
Afternoon Weight Training
24
Afternoon Ride
25
Afternoon Ride
26
11km Long Runüóø
27
Evening Ride
28
Morning Ride
29
Afternoon Wal

### Limiting size of data download

In [6]:
top_n = 5
all_activities_df = all_activities_df.iloc[:top_n]

In [7]:
all_activities_df.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,has_kudoed,suffer_score,athlete_id,athlete_resource_state,map_id,map_summary_polyline,map_resource_state,average_cadence,max_watts,weighted_average_watts
0,2,Lunch Ride,79588.5,11082,14430,202.0,Ride,Ride,,15729456618,...,False,53.0,81055898,1,a15729456618,kv}vHw~mgBqMlTwFbHa@pA`LzVaAzNy@`Pf@bZFtYrAfPC...,2,,,
1,2,24km Race Practice Long Runüî©,24120.3,8004,8085,56.0,Run,Run,2.0,15716821076,...,False,165.0,81055898,1,a15716821076,{`|vHsrogBz@fBdAbCDX?t@Ob@aBbCmCfD{JnKuChDuBxC...,2,84.8,581.0,375.0
2,2,Evening Ride,16823.7,3683,6122,47.0,Ride,Ride,,15708639235,...,False,9.0,81055898,1,a15708639235,kt}vHkangBkAhBKh@_@x@_CnDa@v@}@|AiA`B}@|A{B~Cw...,2,,,
3,2,Afternoon Weight Training,0.0,3713,3713,0.0,Workout,WeightTraining,,15705468575,...,False,8.0,81055898,1,a15705468575,,2,,,
4,2,Afternoon Ride,13045.3,1871,6563,44.0,Ride,Ride,10.0,15705659558,...,False,13.0,81055898,1,a15705659558,yv}vH_~mgBmFfJeAvAm@fAgC~DqAxAcAtAu@z@g@v@ELDX...,2,,,


### Requesing list of all activities with details

In [8]:
print('Requesting Token...\n')
res = requests.post(auth_url, data=payload, verify=False) # auth_url & payload referenced in code already above
access_token = res.json()['access_token']
header = {'Authorization': 'Bearer ' + access_token}
print('Access Token = {}\n'.format(access_token))

all_activities_details = []
count = 0

print('Calculating time to get all activities details....\n')
print(all_activities_df.shape[0] * 8 / 60, ' minutes to obtain data')

print('Getting details of each activity')
for index, row in tqdm(all_activities_df.iterrows(), total=all_activities_df.shape[0]):
    get_activity_url = 'https://www.strava.com/api/v3/activities/{}'.format(row['id'])
    try:
        activity_details = requests.get(get_activity_url, headers=header).json() 
        all_activities_details.append(activity_details)
        time.sleep(random.randint(7, 9))
    except requests.exception.RequestException as e:
        raise SystemExit(e)

all_activities_details_df = pd.json_normalize(all_activities_details, sep='_')

Requesting Token...





Access Token = 62885dc0d0678b8b2fadd0eb5bb460f19d28095d

Calculating time to get all activities details....

0.6666666666666666  minutes to obtain data
Getting details of each activity


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:47<00:00,  9.55s/it]


In [9]:
all_activities_details_df

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,similar_activities_trend_mid_speed,similar_activities_trend_max_speed,similar_activities_trend_direction,similar_activities_resource_state,photos_primary_unique_id,photos_primary_urls_600,photos_primary_urls_100,photos_primary_source,photos_primary_media_type,photos_use_primary_photo
0,3,Lunch Ride,79588.5,11082,14430,202.0,Ride,Ride,,15729456618,...,,,,,,,,,,
1,3,24km Race Practice Long Runüî©,24120.3,8004,8085,56.0,Run,Run,2.0,15716821076,...,3.013531,3.013531,0.0,2.0,,,,,,
2,3,Evening Ride,16823.7,3683,6122,47.0,Ride,Ride,,15708639235,...,,,,,B2CAAADF-142F-440B-B715-67CDF5940F7F,https://dgtzuqphqg23d.cloudfront.net/Zs-xsaZsY...,https://dgtzuqphqg23d.cloudfront.net/Zs-xsaZsY...,1.0,1.0,False
3,3,Afternoon Weight Training,0.0,3713,3713,0.0,Workout,WeightTraining,,15705468575,...,,,,,,,,,,
4,3,Afternoon Ride,13045.3,1871,6563,44.0,Ride,Ride,10.0,15705659558,...,,,,,,,,,,


### Requesing kudos related to activities

In [10]:
print('Requesting Token...\n')
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
header = {'Authorization': 'Bearer ' + access_token}
print('Access Token = {}\n'.format(access_token))

all_activities_kudos = []
count = 0

print('Calculating time to get all activities details....\n')
print(all_activities_df.shape[0] * 8 / 60, ' minutes to obtain data')

print('Getting details of each activity')
for index, row in tqdm(all_activities_df.iterrows(), total=all_activities_df.shape[0]):
    get_kudos_url = 'https://www.strava.com/api/v3/activities/{}/kudos'.format(row['id'])
    try:
        activity_kudos = requests.get(get_kudos_url, headers=header).json()
        if len(activity_kudos) > 0:
            for kudos in activity_kudos:
                kudos['activity_id'] = row['id']
                all_activities_kudos.extend(activity_kudos)
        time.sleep(random.randint(7, 9))
    except requests.exception.RequestException as e:
        raise SystemExit(e)
    
all_activities_kudos_df = pd.json_normalize(all_activities_kudos, sep='_')

Requesting Token...





Access Token = 62885dc0d0678b8b2fadd0eb5bb460f19d28095d

Calculating time to get all activities details....

0.6666666666666666  minutes to obtain data
Getting details of each activity


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:41<00:00,  8.24s/it]


In [11]:
all_activities_kudos_df

Unnamed: 0,resource_state,firstname,lastname,activity_id
0,2,Kacper,G.,15729456618
1,2,Jan,K.,15729456618
2,2,Jacek,S.,15729456618
3,2,jakub,B.,15729456618
4,2,Ola,≈Å.,15729456618
...,...,...,...,...
641,2,Alicja,≈Å.,15705659558
642,2,Mal,C.,15705659558
643,2,Karolina,C.,15705659558
644,2,Wies≈Çawa,C.,15705659558


### Load data to PostgreSQL

In [12]:
with engine.begin() as conn:
    conn.exec_driver_sql("CREATE SCHEMA IF NOT EXISTS bronze;")

activities_dtype_map = {
    "resource_state": Integer,
    "name": Text,
    "distance": Float,
    "moving_time": Integer,
    "elapsed_time": Integer,
    "total_elevation_gain": Float,
    "type": Text,
    "sport_type": Text,
    "workout_type": Float,
    "id": BigInteger,
    "start_date": Text,
    "start_date_local": Text,
    "timezone": Text,
    "utc_offset": Float,
    "location_city": Text,
    "location_state": Text,
    "location_country": Text,
    "achievement_count": Integer,
    "kudos_count": Integer,
    "comment_count": Integer,
    "athlete_count": Integer,
    "photo_count": Integer,
    "trainer": Boolean,
    "commute": Boolean,
    "manual": Boolean,
    "private": Boolean,
    "visibility": Text,
    "flagged": Boolean,
    "gear_id": Text,
    "start_latlng": JSONB,
    "end_latlng": JSONB,
    "average_speed": Float,
    "max_speed": Float,
    "average_cadence": Float,
    "average_watts": Float,
    "max_watts": Float,
    "weighted_average_watts": Float,
    "device_watts": Boolean,
    "kilojoules": Float,
    "has_heartrate": Boolean,
    "average_heartrate": Float,
    "max_heartrate": Float,
    "heartrate_opt_out": Boolean,
    "display_hide_heartrate_option": Boolean,
    "elev_high": Float,
    "elev_low": Float,
    "upload_id": BigInteger,
    "upload_id_str": Text,
    "external_id": Text,
    "from_accepted_tag": Boolean,
    "pr_count": Integer,
    "total_photo_count": Integer,
    "has_kudoed": Boolean,
    "suffer_score": Float,
    "athlete_id": BigInteger,
    "athlete_resource_state": Integer,
    "map_id": Text,
    "map_summary_polyline": Text,
    "map_resource_state": Integer,
    "average_temp": Float,
}

activities_details_dtype_map = {
    "resource_state": Integer,
    "name": Text,
    "distance": Float,
    "moving_time": Integer,
    "elapsed_time": Integer,
    "total_elevation_gain": Float,
    "type": Text,
    "sport_type": Text,
    "workout_type": Float,
    "id": BigInteger,
    "start_date": Text,
    "start_date_local": Text,
    "timezone": Text,
    "utc_offset": Float,
    "location_city": Text,
    "location_state": Text,
    "location_country": Text,
    "achievement_count": Integer,
    "kudos_count": Integer,
    "comment_count": Integer,
    "athlete_count": Integer,
    "photo_count": Integer,
    "trainer": Boolean,
    "commute": Boolean,
    "manual": Boolean,
    "private": Boolean,
    "visibility": Text,
    "flagged": Boolean,
    "gear_id": Text,
    "start_latlng": JSONB,
    "end_latlng": JSONB,
    "average_speed": Float,
    "max_speed": Float,
    "average_cadence": Float,
    "average_watts": Float,
    "max_watts": Float,
    "weighted_average_watts": Float,
    "device_watts": Boolean,
    "kilojoules": Float,
    "has_heartrate": Boolean,
    "average_heartrate": Float,
    "max_heartrate": Float,
    "heartrate_opt_out": Boolean,
    "display_hide_heartrate_option": Boolean,
    "elev_high": Float,
    "elev_low": Float,
    "upload_id": BigInteger,
    "upload_id_str": Text,
    "external_id": Text,
    "from_accepted_tag": Boolean,
    "pr_count": Integer,
    "total_photo_count": Integer,
    "has_kudoed": Boolean,
    "suffer_score": Float,
    "description": Text,
    "calories": Float,
    "perceived_exertion": Text,
    "prefer_perceived_exertion": Text,
    "segment_efforts": JSONB,
    "splits_metric": JSONB,
    "splits_standard": JSONB,
    "laps": JSONB,
    "best_efforts": JSONB,
    "stats_visibility": JSONB,
    "hide_from_home": Boolean,
    "device_name": Text,
    "embed_token": Text,
    "available_zones": JSONB,
    "athlete_id": BigInteger,
    "athlete_resource_state": Integer,
    "map_id": Text,
    "map_polyline": Text,
    "map_resource_state": Integer,
    "map_summary_polyline": Text,
    "gear_primary": Boolean,
    "gear_name": Text,
    "gear_nickname": Text,
    "gear_resource_state": Float,
    "gear_retired": Boolean,
    "gear_distance": Float,
    "gear_converted_distance": Float,
    "photos_primary": JSONB,
    "photos_count": Integer,
    "similar_activities_effort_count": Float,
    "similar_activities_average_speed": Float,
    "similar_activities_min_average_speed": Float,
    "similar_activities_mid_average_speed": Float,
    "similar_activities_max_average_speed": Float,
    "similar_activities_pr_rank": Float,
    "similar_activities_frequency_milestone": Float,
    "similar_activities_trend_speeds": JSONB,
    "similar_activities_trend_current_activity_index": Float,
    "similar_activities_trend_min_speed": Float,
    "similar_activities_trend_mid_speed": Float,
    "similar_activities_trend_max_speed": Float,
    "similar_activities_trend_direction": Float,
    "similar_activities_resource_state": Float,
    "average_temp": Float,
}

activities_kudos_dtype_map = {
    "resource_state": Integer,
    "firstname": Text,
    "lastname": Text,
    "activity_id": BigInteger,
}


all_activities_df.to_sql(
    name="activities",
    schema="bronze",
    con=engine,
    if_exists="replace",
    index=False,
    dtype=activities_dtype_map,
    method="multi",
    chunksize=1000
)

all_activities_details_df.to_sql(
    name="activities_details",
    schema="bronze",
    con=engine,
    if_exists="replace",
    index=False,
    dtype=activities_details_dtype_map,
    method="multi",
    chunksize=1000
)

all_activities_kudos_df.to_sql(
    name="kudos",
    schema="bronze",
    con=engine,
    if_exists="replace",
    index=False,
    dtype=activities_kudos_dtype_map,
    method="multi",
    chunksize=1000
)

-1

### All activities Dataframe

In [13]:
all_activities_df.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,has_kudoed,suffer_score,athlete_id,athlete_resource_state,map_id,map_summary_polyline,map_resource_state,average_cadence,max_watts,weighted_average_watts
0,2,Lunch Ride,79588.5,11082,14430,202.0,Ride,Ride,,15729456618,...,False,53.0,81055898,1,a15729456618,kv}vHw~mgBqMlTwFbHa@pA`LzVaAzNy@`Pf@bZFtYrAfPC...,2,,,
1,2,24km Race Practice Long Runüî©,24120.3,8004,8085,56.0,Run,Run,2.0,15716821076,...,False,165.0,81055898,1,a15716821076,{`|vHsrogBz@fBdAbCDX?t@Ob@aBbCmCfD{JnKuChDuBxC...,2,84.8,581.0,375.0
2,2,Evening Ride,16823.7,3683,6122,47.0,Ride,Ride,,15708639235,...,False,9.0,81055898,1,a15708639235,kt}vHkangBkAhBKh@_@x@_CnDa@v@}@|AiA`B}@|A{B~Cw...,2,,,
3,2,Afternoon Weight Training,0.0,3713,3713,0.0,Workout,WeightTraining,,15705468575,...,False,8.0,81055898,1,a15705468575,,2,,,
4,2,Afternoon Ride,13045.3,1871,6563,44.0,Ride,Ride,10.0,15705659558,...,False,13.0,81055898,1,a15705659558,yv}vH_~mgBmFfJeAvAm@fAgC~DqAxAcAtAu@z@g@v@ELDX...,2,,,


### All activities with details Dataframe

In [14]:
all_activities_details_df = pd.json_normalize(all_activities_details, sep='.')
all_activities_details_df.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,similar_activities.trend.mid_speed,similar_activities.trend.max_speed,similar_activities.trend.direction,similar_activities.resource_state,photos.primary.unique_id,photos.primary.urls.600,photos.primary.urls.100,photos.primary.source,photos.primary.media_type,photos.use_primary_photo
0,3,Lunch Ride,79588.5,11082,14430,202.0,Ride,Ride,,15729456618,...,,,,,,,,,,
1,3,24km Race Practice Long Runüî©,24120.3,8004,8085,56.0,Run,Run,2.0,15716821076,...,3.013531,3.013531,0.0,2.0,,,,,,
2,3,Evening Ride,16823.7,3683,6122,47.0,Ride,Ride,,15708639235,...,,,,,B2CAAADF-142F-440B-B715-67CDF5940F7F,https://dgtzuqphqg23d.cloudfront.net/Zs-xsaZsY...,https://dgtzuqphqg23d.cloudfront.net/Zs-xsaZsY...,1.0,1.0,False
3,3,Afternoon Weight Training,0.0,3713,3713,0.0,Workout,WeightTraining,,15705468575,...,,,,,,,,,,
4,3,Afternoon Ride,13045.3,1871,6563,44.0,Ride,Ride,10.0,15705659558,...,,,,,,,,,,


### Separate tables setup

In [15]:
dataframe_columns = {
  'activities' : [
    'id',
    'name',
    'distance',
    'moving_time',
    'elapsed_time',
    'total_elevation_gain',
    'type',
    'sport_type',
    'workout_type',
    'start_date',
    'start_date_local',
    'timezone',
    'utc_offset',
    'location_city',
    'location_state',
    'location_country',
    'achievement_count',
    'kudos_count',
    'comment_count',
    'athlete_count',
    'photo_count',
    'trainer',
    'commute',
    'manual',
    'private',
    'visibility',
    'flagged',
    'gear_id',
    'start_latlng',
    'end_latlng',
    'average_speed',
    'max_speed',
    'average_cadence',
    'average_watts',
    'max_watts',
    'weighted_average_watts',
    'device_watts',
    'kilojoules',
    'has_heartrate',
    'average_heartrate',
    'max_heartrate',
    'heartrate_opt_out',
    'display_hide_heartrate_option',
    'elev_high',
    'elev_low',
    'upload_id',
    'upload_id_str',
    'external_id',
    'from_accepted_tag',
    'pr_count',
    'total_photo_count',
    'has_kudoed',
    'suffer_score',
    'description',
    'calories',
    'perceived_exertion',
    'prefer_perceived_exertion',
    'hide_from_home',
    'device_name',
    'embed_token',
    'available_zones',
    'map_id',
    'gear_id'],
  'maps' : [
    'map_id',
    'map_polyline',
    'map_resource_state',
    'map_summary_polyline'],
  'gear' : [
    'gear_id',
    'gear_primary',
    'gear_name',
    'gear_nickname',
    'gear_resource_state',
    'gear_retired',
    'gear_distance',
    'gear_converted_distance'],
  'segment_efforts' : [
    'id',
    'resource_state',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'start_index',
    'end_index',
    'average_cadence',
    'device_watts',
    'average_watts',
    'average_heartrate',
    'max_heartrate',
    'pr_rank',
    'achievements',
    'visibility',
    'kom_rank',
    'hidden',
    'activity_id',
    'segment_id'],
  'segments' : [
    'segment_id',
    'segment_resource_state',
    'segment_name',
    'segment_activity_type',
    'segment_distance',
    'segment_average_grade',
    'segment_maximum_grade',
    'segment_elevation_high',
    'segment_elevation_low',
    'segment_start_latlng',
    'segment_end_latlng',
    'segment_elevation_profile',
    'segment_elevation_profiles',
    'segment_climb_category',
    'segment_city',
    'segment_state',
    'segment_country',
    'segment_private',
    'segment_hazardous',
    'segment_starred'],
  'laps' : [
    'id',
    'resource_state',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'average_speed',
    'max_speed',
    'lap_index',
    'split',
    'start_index',
    'end_index',
    'total_elevation_gain',
    'average_cadence',
    'device_watts',
    'average_watts',
    'average_heartrate',
    'max_heartrate',
    'pace_zone',
    'activity_id'],
  'best_efforts' : [
    'id',
    'activity_id',
    'resource_state',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'pr_rank',
    'achievements',
    'start_index',
    'end_index']
}

### Spliting data into tables

In [16]:
# Top level normalize
all_activities_details_normalized_df = pd.json_normalize(all_activities_details, sep='_')

# Activities
activities_cols = dataframe_columns['activities']
activities_df = all_activities_details_normalized_df[[c for c in activities_cols if c in all_activities_details_normalized_df.columns]].copy()

# Maps
maps_cols = dataframe_columns['maps']
maps_df = all_activities_details_normalized_df[[c for c in maps_cols if c in all_activities_details_normalized_df.columns]].copy()

# Gear
gear_cols = dataframe_columns['gear']
gear_df = all_activities_details_normalized_df[[c for c in gear_cols if c in all_activities_details_normalized_df.columns]].copy()

# Segment efforts
seg_eff_cols = dataframe_columns['segment_efforts']
segments_eff_df = pd.json_normalize(
    all_activities_details,
    record_path='segment_efforts',
    sep='_'
)
if not segments_eff_df.empty:
    segments_eff_df = segments_eff_df[[c for c in seg_eff_cols if c in segments_eff_df.columns]].copy()

# Segments
seg_cols = dataframe_columns['segments']
segments_df = pd.json_normalize(
    all_activities_details,
    record_path='segment_efforts',
    sep="_"
)

if not segments_df.empty:
    segments_df = segments_df[[c for c in seg_cols if c in segments_df.columns]].copy()

# Laps
lap_cols = dataframe_columns['laps']
laps_df = pd.json_normalize(
    all_activities_details,
    record_path='laps',
    sep="_"
)
if not laps_df.empty:
    laps_df = laps_df[[c for c in lap_cols if c in laps_df.columns]]

# Best efforts
best_eff_cols = dataframe_columns['best_efforts']
activities_with_best_eff = [row for row in all_activities_details if row.get('best_efforts')]
best_efforts_df = pd.json_normalize(
    activities_with_best_eff,
    record_path='best_efforts',
    sep="_",
)

if not best_efforts_df.empty:
    best_efforts_df = best_efforts_df[[c for c in best_eff_cols if c in best_efforts_df.columns]]

# All dataframes in dictoinary
dataframes = {
    "activities": activities_df,
    "maps": maps_df,
    "gear": gear_df,
    "segment_efforts": segments_eff_df,
    "segments": segments_df,
    "laps": laps_df,
    "best_efforts": best_efforts_df
}

### Data cleanup