### Import and config

In [1]:
# Imports
import os
import logging
from datetime import timezone, timedelta

from dotenv import load_dotenv

import pandas as pd

from sqlalchemy import create_engine, text, Integer, Float, String, Boolean, DateTime, Interval, Text, BigInteger

# Configuration
load_dotenv()

# DB
DB_URI = os.getenv('DB_URI')

# Bronze tables
TARGET_B_SCHEMA = os.getenv('TARGET_B_SCHEMA')
ACTIVITIES_B_TABLE = os.getenv('ACTIVITIES_B_TABLE')
DETAILS_B_TABLE = os.getenv('DETAILS_B_TABLE')

# Silver tables
TARGET_S_SCHEMA = os.getenv('TARGET_S_SCHEMA')
ACTIVITIES_S_TABLE = os.getenv('ACTIVITIES_S_TABLE')
BEST_EFFORTS_S_TABLE = os.getenv('BEST_EFFORTS_S_TABLE')
GEAR_S_TABLE = os.getenv('GEAR_S_TABLE')
LAPS_S_TABLE = os.getenv('LAPS_S_TABLE')
MAPS_S_TABLE = os.getenv('MAPS_S_TABLE')
SEGMENTS_S_TABLE = os.getenv('SEGMENTS_S_TABLE')
SEGMENTS_EFFORTS_S_TABLE = os.getenv('SEGMENTS_EFFORTS_S_TABLE')

# Other
LOG_LEVEL = os.getenv('LOG_LEVEL')

logging.basicConfig(
    level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
    format="%(asctime)s | %(levelname)s | %(message)s"
)

pd.set_option('display.max_columns', None)

### DB names validation

In [2]:
REQUIRED_DB_ENV = ['DB_URI', 'TARGET_B_SCHEMA', 'ACTIVITIES_B_TABLE', 'DETAILS_B_TABLE', 'TARGET_S_SCHEMA', 'ACTIVITIES_S_TABLE', 'BEST_EFFORTS_S_TABLE', 'GEAR_S_TABLE', 'LAPS_S_TABLE', 'MAPS_S_TABLE', 'SEGMENTS_S_TABLE', 'SEGMENTS_EFFORTS_S_TABLE']
missing_db_env = [env for env in REQUIRED_DB_ENV if not os.getenv(env)]
if missing_db_env:
  raise RuntimeError(f"Missing env variables: {', '.join(missing_db_env)}.")

### Request data from `bronze.activities_details`

In [3]:
engine = create_engine(
  DB_URI, 
  pool_pre_ping=True, 
  pool_size=5, 
  max_overflow=10
)
logging.info("Connection established")

2025-09-17 12:36:18,500 | INFO | Connection established


In [4]:
with engine.begin() as conn:
  activities_details_df = pd.read_sql(text(f"SELECT * FROM {TARGET_B_SCHEMA}.{DETAILS_B_TABLE}"), conn)
logging.info(f"Data from {TARGET_B_SCHEMA}.{DETAILS_B_TABLE} downloaded.")

2025-09-17 12:36:18,858 | INFO | Data from bronze.activities_details downloaded.


In [5]:
activities_details_df.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,start_date,start_date_local,timezone,utc_offset,location_city,location_state,location_country,achievement_count,kudos_count,comment_count,athlete_count,photo_count,trainer,commute,manual,private,visibility,flagged,gear_id,start_latlng,end_latlng,average_speed,max_speed,average_cadence,average_watts,max_watts,weighted_average_watts,device_watts,kilojoules,has_heartrate,average_heartrate,max_heartrate,heartrate_opt_out,display_hide_heartrate_option,elev_high,elev_low,upload_id,upload_id_str,external_id,from_accepted_tag,pr_count,total_photo_count,has_kudoed,suffer_score,description,calories,perceived_exertion,prefer_perceived_exertion,segment_efforts,splits_metric,splits_standard,laps,best_efforts,stats_visibility,hide_from_home,device_name,embed_token,available_zones,athlete_id,athlete_resource_state,map_id,map_polyline,map_resource_state,map_summary_polyline,gear_primary,gear_name,gear_nickname,gear_resource_state,gear_retired,gear_distance,gear_converted_distance,photos_primary,photos_count,similar_activities_effort_count,similar_activities_average_speed,similar_activities_min_average_speed,similar_activities_mid_average_speed,similar_activities_max_average_speed,similar_activities_pr_rank,similar_activities_frequency_milestone,similar_activities_trend_speeds,similar_activities_trend_current_activity_index,similar_activities_trend_min_speed,similar_activities_trend_mid_speed,similar_activities_trend_max_speed,similar_activities_trend_direction,similar_activities_resource_state,average_temp,private_note,photos_primary_unique_id,photos_primary_urls_600,photos_primary_urls_100,photos_primary_source,photos_primary_media_type,photos_use_primary_photo
0,3,Lunch Weight Training,0.0,6066,6066,0.0,WeightTraining,WeightTraining,,14086094444,2025-04-05T09:39:40Z,2025-04-05T11:39:40Z,(GMT+02:00) Africa/Blantyre,7200.0,,,,0,9,0,1,0,True,False,False,False,followers_only,False,,[],[],0.0,0.0,,,,,,,True,106.9,149.0,False,True,0.0,0.0,15035940000.0,15035936606,garmin_ping_426122504181,False,0,0,False,17.0,,552.0,,False,[],,,"[{'id': 49980673377, 'name': 'Lap 1', 'split':...",,"[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 945,82075ce6443833456de2feeb79ad874381da038b,[heartrate],81055898,1,a14086094444,,3,,,,,,,,,,0,,,,,,,,,,,,,,,27.0,,,,,,,
1,3,Evening Walk,5035.5,3637,3829,17.0,Walk,Walk,,14080030310,2025-04-04T16:37:40Z,2025-04-04T18:37:40Z,(GMT+01:00) Europe/Warsaw,7200.0,,,,0,8,0,2,0,False,False,False,False,followers_only,False,g19800575,"[51.107619, 17.124085]","[51.108098, 17.124984]",1.385,3.4,52.2,,,,,,True,94.3,107.0,False,True,125.6,114.6,15029560000.0,15029564885,garmin_ping_425899048795,False,0,0,False,7.0,Dokrƒôcanie krok√≥wüôÇ‚Äç‚ÜîÔ∏è,344.0,,False,"[{'id': 3343296484852327920, 'name': 'BƒÖczek c...","[{'split': 1, 'distance': 1000.4, 'pace_zone':...","[{'split': 1, 'distance': 1615.1, 'pace_zone':...","[{'id': 49956432295, 'name': 'Lap 1', 'split':...",,"[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 945,33a9d220712243f8e49e182755025746b35e4118,[heartrate],81055898,1,a14080030310,q}|vHopogB?BH@VJFGD]\|@H\?HBDDAJJJd@DJFANJ@HJL...,3,mh|vH{togBP`@F\G|@GTSVGAKd@IRKJ?BOZQVMDIHCJUHG...,False,Nike Invincible Run 3 White,White,2.0,True,430102.0,430.1,,0,,,,,,,,,,,,,,,25.0,,,,,,,
2,3,Morning Run,10030.0,3953,3953,21.0,Run,Run,0.0,12956260994,2024-11-22T05:54:04Z,2024-11-22T06:54:04Z,(GMT+01:00) Europe/Warsaw,3600.0,,,,0,8,0,1,0,False,False,False,False,everyone,False,g19800575,"[51.107261, 17.123889]","[51.107572, 17.124193]",2.537,3.42,82.7,295.0,601.0,295.0,True,1166.1,True,150.5,157.0,False,True,127.6,111.2,13816150000.0,13816147384,garmin_ping_387799325704,False,0,0,False,78.0,10km Easy Run with Runna ‚úÖ\n\n10km easy run at...,787.0,,False,"[{'id': 3294936864762577520, 'name': 'Pƒôtla od...","[{'split': 1, 'distance': 1001.7, 'pace_zone':...","[{'split': 1, 'distance': 1609.9, 'pace_zone':...","[{'id': 45542468202, 'name': 'Lap 1', 'split':...","[{'id': 55038759090, 'name': '400m', 'athlete'...","[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 945,8ba65750ff79254f8eba7f6d29358d57bc623c1a,"[heartrate, pace, power]",81055898,1,a12956260994,k{|vHgoogB^`AZd@BLHJF?DAXa@FCJJLPRPf@]^a@HQb@]...,3,uh|vHmuogB\`ABTAZGZKTe@p@a@v@c@l@KJ{@vAURcAvA}...,False,Nike Invincible Run 3 White,White,2.0,True,430102.0,430.1,,0,1.0,2.537313,2.537313,2.537313,2.537313,,,[2.537313432835821],0.0,2.537313,2.537313,2.537313,0.0,2.0,9.0,,,,,,,
3,3,Evening Weight Training,0.0,3846,3846,0.0,WeightTraining,WeightTraining,,12945894446,2024-11-20T17:02:56Z,2024-11-20T18:02:56Z,(GMT+01:00) Africa/Algiers,3600.0,,,,0,5,0,1,0,True,False,False,False,followers_only,False,,[],[],0.0,0.0,,,,,,,True,105.8,142.0,False,True,0.0,0.0,13805260000.0,13805258478,garmin_ping_387403002282,False,0,0,False,10.0,Reska5Ô∏è‚É£,367.0,,False,[],,,"[{'id': 45504588397, 'name': 'Lap 1', 'split':...",,"[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 945,f75f171b141157b24ebe3c0237814fc4baaa88e8,[heartrate],81055898,1,a12945894446,,3,,,,,,,,,,0,,,,,,,,,,,,,,,25.0,,,,,,,
4,3,Afternoon Run,8283.8,3154,3154,22.0,Run,Run,0.0,12944852535,2024-11-20T14:44:03Z,2024-11-20T15:44:03Z,(GMT+01:00) Europe/Warsaw,3600.0,,,,3,4,0,1,0,False,False,False,False,everyone,False,g19800575,"[51.10735, 17.12442]","[51.107565, 17.124136]",2.626,4.62,81.8,318.6,448.0,318.0,True,1005.0,True,149.6,163.0,False,True,128.0,114.8,13804140000.0,13804143758,garmin_ping_387374107952,False,0,0,False,57.0,8km Easy Run with Runna ‚úÖ\n\n8km easy run at a...,637.0,,False,"[{'id': 3294344204331294624, 'name': 'Pƒôtla od...","[{'split': 1, 'distance': 1000.5, 'pace_zone':...","[{'split': 1, 'distance': 1611.3, 'pace_zone':...","[{'id': 45500475514, 'name': 'Lap 1', 'split':...","[{'id': 54997510055, 'name': '400m', 'athlete'...","[{'type': 'heart_rate', 'visibility': 'everyon...",False,Garmin Forerunner 945,7f488a045f51ea466f08f8cfec3c4e5916058cea,"[heartrate, pace, power]",81055898,1,a12944852535,}{|vHsrogB^zAF^h@lAPNJ@NIV_@BDH\TRLDJEJ[HEd@i@...,3,oh|vHuuogBRd@H\?^AVKR_A|Ag@~@[`@m@j@gCpDa@v@[\...,False,Nike Invincible Run 3 White,White,2.0,True,430102.0,430.1,,0,6.0,2.52247,2.071395,2.385717,2.812496,3.0,,"[2.248123840707158, 2.3815517814976417, 2.4764...",4.0,2.071395,2.385717,2.812496,1.0,2.0,11.0,,,,,,,


### Separate tables setup

In [6]:
dataframe_columns = {
  'activities' : [
    'id',
    'name',
    'distance',
    'moving_time',
    'elapsed_time',
    'total_elevation_gain',
    'type',
    'sport_type',
    'workout_type',
    'start_date',
    'start_date_local',
    'timezone',
    'achievement_count',
    'kudos_count',
    'comment_count',
    'athlete_count',
    'photo_count',
    'trainer',
    'commute',
    'manual',
    'visibility',
    'start_latlng',
    'end_latlng',
    'average_speed',
    'max_speed',
    'average_cadence',
    'average_watts',
    'max_watts',
    'weighted_average_watts',
    'has_heartrate',
    'average_heartrate',
    'max_heartrate',
    'elev_high',
    'elev_low',
    'pr_count',
    'total_photo_count',
    'suffer_score',
    'description',
    'calories',
    'device_name',
    'map_id',
    'gear_id'],
  'maps' : [
    'map_id',
    'map_polyline',
    'map_summary_polyline'],
  'gear' : [
    'gear_id',
    'gear_name',
    'gear_distance',
    'gear_converted_distance',
    'start_date',
    'start_date_local'],
  'segment_efforts' : [
    'id',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'start_index',
    'end_index',
    'average_cadence',
    'device_watts',
    'average_watts',
    'average_heartrate',
    'max_heartrate',
    'pr_rank',
    'achievements',
    'visibility',
    'kom_rank',
    'hidden',
    'activity_id',
    'segment_id'],
  'segments' : [
    'segment_id',
    'segment_name',
    'segment_activity_type',
    'segment_distance',
    'segment_average_grade',
    'segment_maximum_grade',
    'segment_elevation_high',
    'segment_elevation_low',
    'segment_start_latlng',
    'segment_end_latlng',
    'segment_elevation_profile',
    'segment_elevation_profiles',
    'segment_climb_category',
    'segment_city',
    'segment_state',
    'segment_country',
    'segment_private',
    'segment_hazardous',
    'segment_starred'],
  'laps' : [
    'id',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'average_speed',
    'max_speed',
    'lap_index',
    'split',
    'start_index',
    'end_index',
    'total_elevation_gain',
    'average_cadence',
    'device_watts',
    'average_watts',
    'average_heartrate',
    'max_heartrate',
    'pace_zone',
    'activity_id'],
  'best_efforts' : [
    'id',
    'activity_id',
    'name',
    'elapsed_time',
    'moving_time',
    'start_date',
    'start_date_local',
    'distance',
    'pr_rank',
    'achievements',
    'start_index',
    'end_index']
}

### Spliting data into tables

In [7]:
def select_cols(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
  """
  Select only the specified columns from a DataFrame if they exist.

  Parameters
  ----------
  df : pd.DataFrame
      The input DataFrame.
  cols : list of str
      List of column names to select.

  Returns
  -------
  pd.DataFrame
      A new DataFrame containing only the specified columns that exist 
      in the input DataFrame. If none of the columns exist, 
      an empty DataFrame is returned.
  """
  
  existing = [c for c in cols if c in df.columns]
  
  return df[existing].copy() if existing else pd.DataFrame()

def explode_normalize_json(df: pd.DataFrame, col: str, id_col: str | None = None, id_name: str | None = None) -> pd.DataFrame:
  """
  Explode a list-like column into multiple rows and normalize nested JSON/dict objects 
  into a flat tabular structure.

  This function is useful for columns containing arrays of JSON objects 
  (e.g. laps, segment efforts). Each element of the list becomes a separate row, 
  and nested fields are flattened into individual columns. Optionally, 
  a parent identifier column can be retained/renamed to act as a foreign key.

  Parameters
  ----------
  df : pd.DataFrame
      Input DataFrame containing the column to explode.
  col : str
      Name of the column with list- or dict-like values to explode and normalize.
  id_col : str, optional
      Name of the column in the input DataFrame to keep as a parent identifier.
      If provided, it will be included in the output.
  id_name : str, optional
      If provided together with `id_col`, renames the identifier column 
      in the result (e.g. from "id" to "activity_id").

  Returns
  -------
  pd.DataFrame
      A new DataFrame where:
        * each list element from `col` is a separate row,
        * nested JSON/dict objects are flattened into columns with names joined by "_",
        * the parent identifier (`id_col`) is preserved and optionally renamed.
      If `col` is missing or contains only null/empty values, 
      an empty DataFrame is returned.
  """

  if col not in df.columns:
    return pd.DataFrame()
  
  base_cols = [col]

  if id_col and id_col in df.columns:
    base_cols.insert(0, id_col)

  base = df[base_cols].copy()
  exploded = base.explode(col, ignore_index=True)
  values = exploded[col].dropna()

  if values.empty:
    return pd.DataFrame()
  
  norm = pd.json_normalize(values, sep='_')
  out = exploded.loc[values.index].drop(columns=[col]).reset_index(drop=True)
  res = pd.concat([out.reset_index(drop=True), norm.reset_index(drop=True)], axis=1)
  
  if id_col and id_name and id_col in df.columns:
    res = res.rename(columns={id_col: id_name})
  return res

In [8]:
# Activities
activities_cols = dataframe_columns['activities']
activities_df = select_cols(activities_details_df, activities_cols)
logging.info("DataFrame 'activities_df' created.")

# Maps
maps_cols = dataframe_columns['maps']
maps_df = select_cols(activities_details_df, maps_cols)
logging.info("DataFrame 'maps_df' created.")

# Gear
gear_cols = dataframe_columns['gear']
gear_df = select_cols(activities_details_df, gear_cols)
logging.info("DataFrame 'gear_df' created.")

# Segment efforts
seg_eff_cols = dataframe_columns['segment_efforts']
segments_eff_df = explode_normalize_json(activities_details_df, 'segment_efforts')
segments_eff_df = select_cols(segments_eff_df, seg_eff_cols)
logging.info("DataFrame 'segments_eff_df' created.")

# Segments
seg_cols = dataframe_columns['segments']
segments_df = explode_normalize_json(activities_details_df, 'segment_efforts')
segments_df = select_cols(segments_df, seg_cols)
logging.info("DataFrame 'segments_df' created.")

# Laps
lap_cols = dataframe_columns['laps']
laps_df = explode_normalize_json(activities_details_df, 'laps')
laps_df = select_cols(laps_df, lap_cols)
logging.info("DataFrame 'laps_df' created.")

# Best efforts
best_eff_cols = dataframe_columns['best_efforts']
best_eff_df = explode_normalize_json(activities_details_df, 'best_efforts')
best_eff_df = select_cols(best_eff_df, best_eff_cols)
logging.info("DataFrame 'best_eff_df' created.")

# Workout types
workout_types_df = pd.DataFrame([
    {"id": 0.0, "type": "Running - None"},
    {"id": 1.0, "type": "Running - Race"},
    {"id": 2.0, "type": "Running - Long Run"},
    {"id": 3.0, "type": "Running - Workout"},
    {"id": 10.0, "type": "Riding - None"},
    {"id": 11.0, "type": "Riding - Race"},
    {"id": 12.0, "type": "Riding - Race"},
    {"id": 20.0, "type": "Other"}
])
logging.info("DataFrame 'workout_types_df' created.")

# All dataframes in dictionary
dataframes = {
    "activities": activities_df,
    "maps": maps_df,
    "gear": gear_df,
    "segment_efforts": segments_eff_df,
    "segments": segments_df,
    "laps": laps_df,
    "best_efforts": best_eff_df,
    "workout_types" : workout_types_df
}

2025-09-17 12:36:18,931 | INFO | DataFrame 'activities_df' created.
2025-09-17 12:36:18,932 | INFO | DataFrame 'maps_df' created.
2025-09-17 12:36:18,933 | INFO | DataFrame 'gear_df' created.
2025-09-17 12:36:18,992 | INFO | DataFrame 'segments_eff_df' created.
2025-09-17 12:36:19,048 | INFO | DataFrame 'segments_df' created.
2025-09-17 12:36:19,092 | INFO | DataFrame 'laps_df' created.
2025-09-17 12:36:19,106 | INFO | DataFrame 'best_eff_df' created.
2025-09-17 12:36:19,107 | INFO | DataFrame 'workout_types_df' created.


### Activities Dataframe

In [9]:
def speed_to_pace_str(speed: float) -> str | None:
  """
  Convert speed in meters per second to running pace as a string.

  Parameters
  ----------
  speed : float
      Speed value in meters per second. Must be greater than zero.

  Returns
  -------
  str or None
      Running pace in the format "M:SS" representing minutes per kilometer.
      For example, "5:32" means 5 minutes and 32 seconds per kilometer.
      Returns None if the speed is less than or equal to zero.
  """

  if speed <= 0:
    return None
  
  seconds = 1000/speed
  minutes = int(seconds // 60)
  sec = int(round(seconds % 60))

  if sec == 60:
    minutes += 1
    sec = 0

  return f"{minutes}:{sec:02d}"

def speed_to_pace_float(speed: float) -> float | None:
  """
  Convert speed in meters per second to running pace as a float.

  Parameters
  ----------
  speed : float
      Speed value in meters per second. Must be greater than zero.

  Returns
  -------
  float or None
      Running pace in minutes per kilometer, represented as a float.
      For example, 5.53 means approximately 5 minutes and 32 seconds per kilometer.
      Returns None if the speed is less than or equal to zero.
  """

  if speed <= 0:
    return None
  
  return 1000 / speed / 60

def extract_timedelta(time: pd.Series) -> pd.Series:
  """
  Convert a Series of numeric values (seconds) into timedeltas.

  Parameters
  ----------
  time : pd.Series
      Series containing durations expressed in seconds (int/float). 
      Null values are preserved as None.

  Returns
  -------
  pd.Series
      Series of Python ``datetime.timedelta`` objects. 
      Each element corresponds to the given number of seconds or None if missing.
  """

  return pd.Series([(timedelta(seconds=int(t)) if pd.notnull(t) else None) for t in time], dtype="object")

def extract_latlng(latlng: pd.Series) -> pd.DataFrame:
  """
  Split a Series of latitude/longitude pairs into a DataFrame with separate columns.

  Parameters
  ----------
  latlng : pd.Series
      Series where each element is expected to be a list or tuple of length 2 
      (latitude, longitude). If the element is not a valid pair, it is replaced 
      with [None, None].

  Returns
  -------
  pd.DataFrame
      DataFrame with two columns:
        * first column: latitude
        * second column: longitude
      The index is preserved from the input Series.
  """
  latlng = latlng.apply(
    lambda row: row if isinstance(row, (list, tuple)) and len(row) == 2 else [None, None]
  )
  return pd.DataFrame(latlng.tolist(), index=latlng.index)

def create_datetime_cols(df: pd.DataFrame, date_col: str, date_col_local: str) -> pd.DataFrame:
  """
  Create UTC and localized datetime columns from string timestamps.

  Parameters
  ----------
  df : pd.DataFrame
      Input DataFrame containing UTC and local datetime string columns.
  date_col : str
      Name of the column with UTC datetime strings.
  date_col_local : str
      Name of the column with local datetime strings.

  Returns
  -------
  pd.DataFrame
      A DataFrame with two timezone-aware datetime columns:
        * ``start_date_dt`` ‚Äî parsed UTC datetime,
        * ``start_date_local_dt`` ‚Äî localized datetime (converted from UTC using the inferred offset).
  """
  
  if date_col not in df.columns or date_col_local not in df.columns:
        raise KeyError(f"Missing required columns: {date_col}, {date_col_local}")

  temp_df = pd.DataFrame()
  temp_df["start_date_dt"] = pd.to_datetime(df[date_col], utc=True)
  temp_df["start_date_local_dt"] = pd.to_datetime(df[date_col_local], utc=False)
  temp_df["utc_offset"] = (temp_df["start_date_local_dt"] - temp_df["start_date_dt"]).dt.total_seconds() / 60

  temp_df["tz"] = temp_df["utc_offset"].apply(
    lambda m: timezone(timedelta(minutes=int(m))) if pd.notna(m) else None
    )
  
  temp_df["start_date_local_dt"] = temp_df.apply(
    lambda row: row["start_date_dt"].tz_convert(row["tz"]) if row["tz"] is not None or not pd.isna(row["tz"]) else pd.NaT,
    axis=1
    )
  
  return temp_df[["start_date_dt", "start_date_local_dt"]]


In [10]:
activities_df.loc[:, "moving_time_td"]  = extract_timedelta(activities_df["moving_time"])
activities_df.loc[:, "elapsed_time_td"] = extract_timedelta(activities_df["elapsed_time"])

activities_df[["start_date_dt", "start_date_local_dt"]] = create_datetime_cols(activities_df, "start_date", "start_date_local")

activities_df[["start_lat", "start_lng"]]  = extract_latlng(activities_df["start_latlng"])
activities_df[["end_lat", "end_lng"]] = extract_latlng(activities_df["end_latlng"])

is_run = activities_df['type'] == 'Run'

activities_df.loc[is_run, 'average_cadence'] = activities_df['average_cadence'].apply(lambda x: x * 2)

activities_df.loc[is_run, 'avg_pace_str'] = activities_df['average_speed'].apply(speed_to_pace_str)
activities_df.loc[is_run, 'avg_pace_float'] = activities_df['average_speed'].apply(speed_to_pace_float)

activities_df.loc[is_run, 'max_pace_str'] = activities_df['max_speed'].apply(speed_to_pace_str)
activities_df.loc[is_run, 'max_pace_float'] = activities_df['max_speed'].apply(speed_to_pace_float)


In [11]:
activities_cols_clean = [
    'id',
    'name',
    'start_date_dt',
    'start_date_local_dt',
    'distance',
    'moving_time',
    'moving_time_td',
    'elapsed_time',
    'elapsed_time_td',
    'total_elevation_gain',
    'elev_low',
    'elev_high',
    'type',
    'sport_type',
    'workout_type',
    'achievement_count',
    'kudos_count',
    'comment_count',
    'athlete_count',
    'photo_count',
    'trainer',
    'commute',
    'manual',
    'visibility',
    'average_speed',
    'avg_pace_str',
    'avg_pace_float',
    'max_speed',
    'max_pace_str',
    'max_pace_float',
    'average_cadence',
    'average_watts',
    'max_watts',
    'weighted_average_watts',
    'has_heartrate',
    'average_heartrate',
    'max_heartrate',
    'pr_count',
    'total_photo_count',
    'suffer_score',
    'description',
    'calories',
    'device_name',
    'start_lat',
    'start_lng',
    'map_id',
    'gear_id'
]
activities_df = activities_df[activities_cols_clean]
activities_df = activities_df.sort_values(by='start_date_dt', ascending=False)

In [12]:
activities_df.head()

Unnamed: 0,id,name,start_date_dt,start_date_local_dt,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,total_elevation_gain,elev_low,elev_high,type,sport_type,workout_type,achievement_count,kudos_count,comment_count,athlete_count,photo_count,trainer,commute,manual,visibility,average_speed,avg_pace_str,avg_pace_float,max_speed,max_pace_str,max_pace_float,average_cadence,average_watts,max_watts,weighted_average_watts,has_heartrate,average_heartrate,max_heartrate,pr_count,total_photo_count,suffer_score,description,calories,device_name,start_lat,start_lng,map_id,gear_id
1092,15831049874,Afternoon Weight Training,2025-09-16 13:01:07+00:00,2025-09-16 15:01:07+02:00,0.0,3825,1:03:45,3825,1:03:45,0.0,0.0,0.0,Workout,WeightTraining,,0,5,1,1,0,True,False,False,followers_only,0.0,,,0.0,,,,,,,True,94.0,222.0,0,0,9.0,Reska8Ô∏è‚É£8Ô∏è‚É£\nBench press PR: 85kgü•≥,254.0,Garmin Forerunner 970,,,a15831049874,
1091,15820198827,Tempo 2kmü•µ,2025-09-15 14:23:21+00:00,2025-09-15 16:23:21+02:00,9521.9,3241,0:54:01,3241,0:54:01,13.0,115.4,125.0,Run,Run,3.0,0,8,0,1,0,False,False,False,everyone,2.938,5:40,5.672793,4.94,3:22,3.373819,167.8,353.5,493.0,369.0,True,153.1,178.0,0,0,66.0,Tempo 2km Repeats with Runna ‚úÖ\n\nWysz≈Ço troch...,735.0,Garmin Forerunner 970,51.107301,17.124098,a15820198827,g24134620
973,15805849875,15km Long Run‚òîÔ∏è,2025-09-14 07:59:25+00:00,2025-09-14 09:59:25+02:00,15059.0,5461,1:31:01,5488,1:31:28,31.0,114.6,125.4,Run,Run,2.0,0,4,0,1,0,False,False,False,everyone,2.758,6:03,6.043026,3.44,4:51,4.844961,172.0,335.5,455.0,334.0,True,144.8,153.0,0,0,66.0,15km Long Run with Runna ‚úÖ\n\nOkrutny beton po...,1170.0,Garmin Forerunner 970,51.107336,17.124136,a15805849875,g24134620
974,15798063578,Tempo 4kmüòÆ‚Äçüí®,2025-09-13 13:59:16+00:00,2025-09-13 15:59:16+02:00,7531.2,2400,0:40:00,2400,0:40:00,11.0,115.2,125.2,Run,Run,3.0,0,7,0,1,0,False,False,False,everyone,3.138,5:19,5.311239,4.98,3:21,3.34672,167.8,364.2,509.0,377.0,True,155.3,181.0,0,0,53.0,Tempo 4km with Runna ‚úÖ\n\nWygrana walka z wiat...,579.0,Garmin Forerunner 970,51.107367,17.124221,a15798063578,g24134620
1090,15786538213,6km Easy Runüòå,2025-09-12 13:40:46+00:00,2025-09-12 15:40:46+02:00,6062.9,2140,0:35:40,2140,0:35:40,17.0,111.2,123.0,Run,Run,,0,7,0,1,0,False,False,False,everyone,2.833,5:53,5.883045,3.56,4:41,4.681648,166.2,351.5,445.0,350.0,True,143.7,152.0,0,0,24.0,6km Easy Run with Runna ‚úÖ\n\nOdgruzowywanie n√≥...,469.0,Garmin Forerunner 970,51.10733,17.124162,a15786538213,g24134620


In [13]:
activities_df_dtype_map = {
    "id": BigInteger,
    "name": String,
    "start_date_dt": DateTime(timezone=True),
    "start_date_local_dt":  DateTime(timezone=True),
    "distance": Float,
    "moving_time": Integer,
    "moving_time_td": Interval,
    "elapsed_time": Integer,
    "elapsed_time_td": Interval,
    "total_elevation_gain": Float,
    "elev_low": Float,
    "elev_high": Float,
    "type": String,
    "sport_type": String,
    "workout_type": Integer,
    "achievement_count": Integer,
    "kudos_count": Integer,
    "comment_count": Integer,
    "athlete_count": Integer,
    "photo_count": Integer,
    "trainer": Boolean,
    "commute": Boolean,
    "manual": Boolean,
    "visibility": String,
    "average_speed": Float,
    "avg_pace_str": String,
    "avg_pace_float": Float,
    "max_speed": Float,
    "max_pace_str": String,
    "max_pace_float": Float,
    "average_cadence": Float,
    "average_watts": Float,
    "max_watts": Float,
    "weighted_average_watts": Float,
    "has_heartrate": Boolean,
    "average_heartrate": Float,
    "max_heartrate": Float,
    "pr_count": Integer,
    "total_photo_count": Integer,
    "suffer_score": Float,
    "description": Text,
    "calories": Float,
    "device_name": String,
    'start_lat' : Float,
    'start_lng' : Float,
    "map_id": String,
    "gear_id": String,
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_S_SCHEMA};"))

logging.warning(f"Whole table {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE} will be overwritten.")

activities_df.to_sql(
    name=ACTIVITIES_S_TABLE,
    schema=TARGET_S_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=activities_df_dtype_map,
    method="multi",
    chunksize=1000
)



InternalError: (psycopg.errors.DependentObjectsStillExist) cannot drop table silver.activities because other objects depend on it
DETAIL:  constraint fk_seg_eff_act on table silver.segments_efforts depends on table silver.activities
constraint fk_lap_act on table silver.laps depends on table silver.activities
constraint fk_best_eff_act on table silver.best_efforts depends on table silver.activities
HINT:  Use DROP ... CASCADE to drop the dependent objects too.
[SQL: 
DROP TABLE silver.activities]
(Background on this error at: https://sqlalche.me/e/20/2j85)

## Maps Dataframe

In [14]:
maps_df.columns = maps_df.columns.str.replace("^map_", "", regex=True)

In [15]:
maps_cols_clean = [
  'id',
  'polyline',
  'summary_polyline'
]
maps_df = maps_df[maps_cols_clean]

In [None]:
maps_df.head()

In [None]:
maps_df_dtype_map = {
    "id": String,
    "polyline": Text,
    "summary_polyline": Text
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_S_SCHEMA};"))

logging.warning(f"Whole table {TARGET_S_SCHEMA}.{MAPS_S_TABLE} will be overwritten.")

maps_df.to_sql(
    name=MAPS_S_TABLE,
    schema=TARGET_S_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=maps_df_dtype_map,
    method="multi",
    chunksize=1000
)

## Gear Dataframe

In [None]:
gear_df.columns = gear_df.columns.str.replace("^gear_", "", regex=True)
gear_df = gear_df.dropna(subset=["id"])
gear_df = gear_df.rename(columns={'distance' : 'distance_m', 'converted_distance' : 'distance_km'})
gear_df[["start_date_dt", "start_date_local_dt"]] = create_datetime_cols(gear_df, "start_date", "start_date_local")
gear_df = (
  gear_df.sort_values(by='start_date_dt', ascending=False)
  .drop_duplicates(subset=["id"], keep="first")
  .reset_index(drop=True)
)

In [None]:
gear_df.head()

In [None]:
gear_df_dtype_map = {
    "id": String,
    "name": String,
    "distance_m": Float,
    "distance_km": Float,
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_S_SCHEMA};"))

logging.warning(f"Whole table {TARGET_S_SCHEMA}.{GEAR_S_TABLE} will be overwritten.")

gear_df.to_sql(
    name=GEAR_S_TABLE,
    schema=TARGET_S_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=gear_df_dtype_map,
    method="multi",
    chunksize=1000
)

## Segments efforts Dataframe

In [20]:
segments_types_df = segments_df[['segment_id', 'segment_activity_type']].copy()
segments_types_df.drop_duplicates(inplace=True)

segments_eff_df = pd.merge(segments_eff_df, segments_types_df, on='segment_id', how='left')
segments_eff_df = pd.merge(segments_eff_df.drop(columns="achievements"), explode_normalize_json(segments_eff_df, 'achievements', 'id'), on='id', how='left')

segments_eff_df.loc[:, "moving_time_td"]  = extract_timedelta(segments_eff_df["moving_time"])
segments_eff_df.loc[:, "elapsed_time_td"] = extract_timedelta(segments_eff_df["elapsed_time"])

segments_eff_df[["start_date_dt", "start_date_local_dt"]] = create_datetime_cols(segments_eff_df, "start_date", "start_date_local")

is_run = segments_eff_df['segment_activity_type'] == 'Run'

segments_eff_df.loc[is_run, 'average_cadence'] = segments_eff_df['average_cadence'].apply(lambda x: x * 2)

In [21]:
segments_eff_cols_clean = [
  'id',
  'name',
  'start_date_dt',
  'start_date_local_dt',
  'distance',
  'moving_time',
  'moving_time_td',
  'elapsed_time',
  'elapsed_time_td',
  'average_cadence',
  'device_watts',
  'average_watts',
  'average_heartrate',
  'max_heartrate',
  'pr_rank',
  'visibility',
  'kom_rank',
  'hidden',
  'rank',
  'type',
  'activity_id',
  'segment_id'
]
segments_eff_df = segments_eff_df[segments_eff_cols_clean]

In [None]:
segments_eff_df.head()

In [None]:
segments_eff_df_dtype_map = {
    "id": BigInteger,
    "name": String,
    "start_date_dt": DateTime(timezone=True),
    "start_date_local_dt": DateTime(timezone=True),
    "distance": Float,
    "moving_time": Integer,
    "moving_time_td": Interval,
    "elapsed_time": Integer,
    "elapsed_time_td": Interval,
    "average_cadence": Float,
    "device_watts": Boolean,
    "average_watts": Float,
    "average_heartrate": Float,
    "max_heartrate": Float,
    "pr_rank": Integer,
    "visibility": String,
    "kom_rank": Integer,
    "hidden": Boolean,
    "rank": Integer,
    "type": String,
    "activity_id": BigInteger,
    "segment_id": BigInteger,
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_S_SCHEMA};"))

logging.warning(f"Whole table {TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE} will be overwritten.")

segments_eff_df.to_sql(
    name=SEGMENTS_EFFORTS_S_TABLE,
    schema=TARGET_S_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=segments_eff_df_dtype_map,
    method="multi",
    chunksize=1000
)

## Segments Dataframe

In [25]:
segments_df.columns = segments_df.columns.str.replace("^segment_", "", regex=True)

segments_df[["start_lat", "start_lng"]]  = extract_latlng(segments_df["start_latlng"])
segments_df[["end_lat", "end_lng"]] = extract_latlng(segments_df["end_latlng"])

segments_df = segments_df.drop(columns=["start_latlng", "end_latlng"])
segments_df = segments_df.drop_duplicates()

In [None]:
segments_df.head()

In [None]:
segments_df_dtype_map = {
    "id": BigInteger,
    "name": String,
    "activity_type": String,
    "distance": Float,
    "average_grade": Float,
    "maximum_grade": Float,
    "elevation_high": Float,
    "elevation_low": Float,
    "elevation_profile": Float,
    "elevation_profiles": Float,
    "climb_category": Float,
    "city": String,
    "state": String,
    "country": String,
    "private": Boolean,
    "hazardous": Boolean,
    "starred": Boolean,
    "start_lat": Float,
    "start_lng": Float,
    "end_lat": Float,
    "end_lng": Float,
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_S_SCHEMA};"))

logging.warning(f"Whole table {TARGET_S_SCHEMA}.{SEGMENTS_S_TABLE} will be overwritten.")

segments_df.to_sql(
    name=SEGMENTS_S_TABLE,
    schema=TARGET_S_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=segments_df_dtype_map,
    method="multi",
    chunksize=1000
)

## Laps Dataframe

In [28]:
laps_types_df = activities_df[['id', 'type']].copy()
laps_types_df.drop_duplicates(inplace=True)
laps_df = pd.merge(laps_df, laps_types_df, left_on='activity_id', right_on='id', how='left')

In [29]:
laps_df.loc[:, "moving_time_td"]  = extract_timedelta(laps_df["moving_time"])
laps_df.loc[:, "elapsed_time_td"] = extract_timedelta(laps_df["elapsed_time"])

laps_df[["start_date_dt", "start_date_local_dt"]] = create_datetime_cols(laps_df, "start_date", "start_date_local")

is_run = laps_df['type'] == 'Run'

laps_df.loc[is_run, 'average_cadence'] = laps_df['average_cadence'].apply(lambda x: x * 2)

laps_df.loc[is_run, 'avg_pace_str'] = laps_df['average_speed'].apply(speed_to_pace_str)
laps_df.loc[is_run, 'avg_pace_float'] = laps_df['average_speed'].apply(speed_to_pace_float)

laps_df.loc[is_run, 'max_pace_str'] = laps_df['max_speed'].apply(speed_to_pace_str)
laps_df.loc[is_run, 'max_pace_float'] = laps_df['max_speed'].apply(speed_to_pace_float)


In [30]:
laps_cols_df_clean = [
    'id_x',
    'name',
    'lap_index',
    'split',
    'start_date_dt',
    'start_date_local_dt',
    'distance',
    'moving_time',
    'moving_time_td',
    'elapsed_time',
    'elapsed_time_td',
    'total_elevation_gain',
    'type',
    'average_speed',
    'avg_pace_str',
    'avg_pace_float',
    'pace_zone',
    'max_speed',
    'max_pace_str',
    'max_pace_float',
    'average_cadence',
    'device_watts',
    'average_watts',
    'average_heartrate',
    'max_heartrate',
    'activity_id'
]

laps_df = laps_df[laps_cols_df_clean]
laps_df = laps_df.rename(columns={'id_x': 'id'})

In [None]:
laps_df.head()

In [None]:
laps_df_dtype_map = {
"id": BigInteger,
"name": String,
"lap_index": Integer,
"split": Integer,
"start_date_dt": DateTime(timezone=True),
"start_date_local_dt": DateTime(timezone=True),
"distance": Float,
"moving_time": Integer,
"moving_time_td": Interval,
"elapsed_time": Integer,
"elapsed_time_td": Interval,
"total_elevation_gain": Float,
"type": String,
"average_speed": Float,
"avg_pace_str": String,
"avg_pace_float": Float,
"pace_zone": Float,
"max_speed": Float,
"max_pace_str": String,
"max_pace_float": Float,
"average_cadence": Float,
"device_watts": Boolean,
"average_watts": Float,
"average_heartrate": Float,
"max_heartrate": Float,
"activity_id": BigInteger,
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_S_SCHEMA};"))

logging.warning(f"Whole table {TARGET_S_SCHEMA}.{LAPS_S_TABLE} will be overwritten.")

laps_df.to_sql(
    name=LAPS_S_TABLE,
    schema=TARGET_S_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=laps_df_dtype_map,
    method="multi",
    chunksize=1000
)


## Best efforts Dataframe

In [15]:

best_eff_df.loc[:, "moving_time_td"]  = extract_timedelta(best_eff_df["moving_time"])
best_eff_df.loc[:, "elapsed_time_td"] = extract_timedelta(best_eff_df["elapsed_time"])

best_eff_df[["start_date_dt", "start_date_local_dt"]] = create_datetime_cols(best_eff_df, "start_date", "start_date_local")

In [17]:
best_eff_df = pd.merge(best_eff_df.drop(columns="achievements"), explode_normalize_json(best_eff_df, 'achievements', 'id'), on='id', how='left')


In [18]:
best_eff_df

Unnamed: 0,id,activity_id,name,elapsed_time,moving_time,start_date,start_date_local,distance,pr_rank,start_index,end_index,moving_time_td,elapsed_time_td,start_date_dt,start_date_local_dt,rank,type,type_id
0,55038759090,12956260994,400m,147,147,2024-11-22T05:54:04Z,2024-11-22T06:54:04Z,400,,3765,3912,0:02:27,0:02:27,2024-11-22 05:54:04+00:00,2024-11-22 06:54:04+01:00,,,
1,55038759091,12956260994,1/2 mile,301,301,2024-11-22T05:54:04Z,2024-11-22T06:54:04Z,805,,3601,3902,0:05:01,0:05:01,2024-11-22 05:54:04+00:00,2024-11-22 06:54:04+01:00,,,
2,55038759092,12956260994,1K,376,376,2024-11-22T05:54:04Z,2024-11-22T06:54:04Z,1000,,3532,3908,0:06:16,0:06:16,2024-11-22 05:54:04+00:00,2024-11-22 06:54:04+01:00,,,
3,55038759086,12956260994,1 mile,612,612,2024-11-22T05:54:04Z,2024-11-22T06:54:04Z,1609,,3287,3899,0:10:12,0:10:12,2024-11-22 05:54:04+00:00,2024-11-22 06:54:04+01:00,,,
4,55038759087,12956260994,2 mile,1238,1238,2024-11-22T05:54:04Z,2024-11-22T06:54:04Z,3219,,2672,3908,0:20:38,0:20:38,2024-11-22 05:54:04+00:00,2024-11-22 06:54:04+01:00,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3242,66383793143,15820198827,1/2 mile,219,219,2025-09-15T14:23:21Z,2025-09-15T16:23:21Z,805,,1065,1284,0:03:39,0:03:39,2025-09-15 14:23:21+00:00,2025-09-15 16:23:21+02:00,,,
3243,66383793144,15820198827,1K,272,272,2025-09-15T14:23:21Z,2025-09-15T16:23:21Z,1000,,1121,1393,0:04:32,0:04:32,2025-09-15 14:23:21+00:00,2025-09-15 16:23:21+02:00,,,
3244,66383793139,15820198827,1 mile,440,440,2025-09-15T14:23:21Z,2025-09-15T16:23:21Z,1609,2.0,953,1393,0:07:20,0:07:20,2025-09-15 14:23:21+00:00,2025-09-15 16:23:21+02:00,2.0,pr,3.0
3245,66383793140,15820198827,2 mile,946,946,2025-09-15T14:23:21Z,2025-09-15T16:23:21Z,3219,,886,1832,0:15:46,0:15:46,2025-09-15 14:23:21+00:00,2025-09-15 16:23:21+02:00,,,


In [20]:
best_eff_df_cols_clean = [
    'id',
    'name',
    'start_date_dt',
    'start_date_local_dt',
    'distance',
    'moving_time',
    'moving_time_td',
    'elapsed_time',
    'elapsed_time_td',
    'rank',
    'type',
    'activity_id'
]
best_eff_df = best_eff_df[best_eff_df_cols_clean]

In [28]:
best_eff_df[best_eff_df['activity_id'] == 15729456618]

Unnamed: 0,id,name,start_date_dt,start_date_local_dt,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,rank,type,activity_id


In [None]:
best_eff_df_dtype_map = {
    "id": BigInteger,
    "name": String,
    "start_date_dt": DateTime(timezone=True),
    "start_date_local_dt": DateTime(timezone=True),
    "distance": Float,
    "moving_time": Integer,
    "moving_time_td": Interval,
    "elapsed_time": Integer,
    "elapsed_time_td": Interval,
    "rank": Integer,
    "type": String,
    "activity_id": BigInteger
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_S_SCHEMA};"))

logging.warning(f"Whole table {TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE} will be overwritten.")

best_eff_df.to_sql(
    name=BEST_EFFORTS_S_TABLE,
    schema=TARGET_S_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=best_eff_df_dtype_map,
    method="multi",
    chunksize=1000
)

In [None]:
print(dataframe_columns.keys())

In [None]:
best_eff_df

### Primary and foreign keys definition

In [44]:
keys_instructions= [
    f"""CREATE SCHEMA IF NOT EXISTS {TARGET_S_SCHEMA};""",
    # ********** PRIMARY KEYS **********
    # --- activities ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}
          ADD CONSTRAINT {ACTIVITIES_S_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- maps ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{MAPS_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{MAPS_S_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{MAPS_S_TABLE}
          ADD CONSTRAINT {MAPS_S_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- gear ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{GEAR_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{GEAR_S_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{GEAR_S_TABLE}
          ADD CONSTRAINT {GEAR_S_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- segments efforts ---
    # PK
    # f"""
    # DO $$
    # BEGIN
    #   IF to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}') IS NOT NULL
    #      AND NOT EXISTS (
    #        SELECT 1 FROM pg_constraint
    #        WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}')
    #          AND contype = 'p'
    #      )
    #   THEN
    #     ALTER TABLE {TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}
    #       ADD CONSTRAINT {SEGMENTS_EFFORTS_S_TABLE}_pkey PRIMARY KEY (id);
    #   END IF;
    # END $$;
    # """,
    # --- segments ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_S_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{SEGMENTS_S_TABLE}
          ADD CONSTRAINT {SEGMENTS_S_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- laps ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{LAPS_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{LAPS_S_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{LAPS_S_TABLE}
          ADD CONSTRAINT {LAPS_S_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- best efforts ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE}
          ADD CONSTRAINT {BEST_EFFORTS_S_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # ********** FOREIGN KEYS **********
    # --- activities ---
    # FK - maps
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}') IS NOT NULL
         AND to_regclass('{TARGET_S_SCHEMA}.{MAPS_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}')
           AND conname = 'fk_act_map'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}
          ADD CONSTRAINT fk_act_map
          FOREIGN KEY (map_id) REFERENCES {TARGET_S_SCHEMA}.{MAPS_S_TABLE}(id);
      END IF;
    END $$;
    """,
    # --- activities ---
    # FK - gear
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}') IS NOT NULL
         AND to_regclass('{TARGET_S_SCHEMA}.{GEAR_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}')
           AND conname = 'fk_act_gear'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}
          ADD CONSTRAINT fk_act_gear
          FOREIGN KEY (gear_id) REFERENCES {TARGET_S_SCHEMA}.{GEAR_S_TABLE}(id);
      END IF;
    END $$;
    """,
    # --- segments efforts ---
    # FK - activity
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}') IS NOT NULL
         AND to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}')
           AND conname = 'fk_seg_eff_act'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}
          ADD CONSTRAINT fk_seg_eff_act
          FOREIGN KEY (activity_id) REFERENCES {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}(id);
      END IF;
    END $$;
    """,
    # --- segments efforts ---
    # FK - segments
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}') IS NOT NULL
         AND to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}')
           AND conname = 'fk_seg_eff_seg'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}
          ADD CONSTRAINT fk_seg_eff_seg
          FOREIGN KEY (segment_id) REFERENCES {TARGET_S_SCHEMA}.{SEGMENTS_S_TABLE}(id);
      END IF;
    END $$;
    """,
    # --- laps ---
    # FK - activities
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{LAPS_S_TABLE}') IS NOT NULL
         AND to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{LAPS_S_TABLE}')
           AND conname = 'fk_lap_act'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{LAPS_S_TABLE}
          ADD CONSTRAINT fk_lap_act
          FOREIGN KEY (activity_id) REFERENCES {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}(id);
      END IF;
    END $$;
    """,
    # --- best efforts ---
    # FK - activities
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE}') IS NOT NULL
         AND to_regclass('{TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE}')
           AND conname = 'fk_best_eff_act'
         )
      THEN
        ALTER TABLE {TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE}
          ADD CONSTRAINT fk_best_eff_act
          FOREIGN KEY (activity_id) REFERENCES {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}(id);
      END IF;
    END $$;
    """
]

### Create constrains

In [45]:
with engine.begin() as conn:
    for sql in keys_instructions:
        conn.execute(text(sql))