### Import and config

In [1]:
# Imports
import os
import logging
from datetime import timedelta

from dotenv import load_dotenv

import pandas as pd
import numpy as np

from sqlalchemy import create_engine, text, Integer, Float, String, Boolean, Date, Interval, Text, BigInteger, Time, DateTime


# Configuration
load_dotenv()

# DB
DB_URI = os.getenv('DB_URI')

# Silver tables
TARGET_S_SCHEMA = os.getenv('TARGET_S_SCHEMA')
ACTIVITIES_S_TABLE = os.getenv('ACTIVITIES_S_TABLE')
LOCATIONS_S_TABLE = os.getenv('LOCATIONS_S_TABLE')
GEAR_S_TABLE = os.getenv('GEAR_S_TABLE')
SEGMENTS_S_TABLE = os.getenv('SEGMENTS_S_TABLE')
BEST_EFFORTS_S_TABLE = os.getenv('BEST_EFFORTS_S_TABLE')
SEGMENTS_EFFORTS_S_TABLE = os.getenv('SEGMENTS_EFFORTS_S_TABLE')
LAPS_S_TABLE = os.getenv('LAPS_S_TABLE')
KUDOS_S_TABLE = os.getenv('KUDOS_S_TABLE')
ZONES_S_TABLE = os.getenv('ZONES_S_TABLE')
RELATIVE_EFFORT_S_TABLE = os.getenv('RELATIVE_EFFORT_S_TABLE')
MAPS_S_TABLE = os.getenv('MAPS_S_TABLE')

# Gold tables
TARGET_G_SCHEMA = os.getenv('TARGET_G_SCHEMA')
FACT_ACTIVITIES_TABLE = os.getenv('FACT_ACTIVITIES_TABLE')
FACT_SEGMENTS_EFFORTS_TABLE = os.getenv('FACT_SEGMENTS_EFFORTS_TABLE')
FACT_BEST_EFFORTS_TABLE = os.getenv('FACT_BEST_EFFORTS_TABLE')
FACT_KUDOS_TABLE = os.getenv('FACT_KUDOS_TABLE')
FACT_LAPS_TABLE = os.getenv('FACT_LAPS_TABLE')
FACT_ZONES_TABLE = os.getenv('FACT_ZONES_TABLE')
FACT_MAPS_ACT_TABLE = os.getenv('FACT_MAPS_ACT_TABLE')
FACT_MAPS_SEG_TABLE = os.getenv('FACT_MAPS_SEG_TABLE')

DIM_CALENDAR_TABLE = os.getenv('DIM_CALENDAR_TABLE')
DIM_TIME_TABLE = os.getenv('DIM_TIME_TABLE')
DIM_SPORT_TYPE_TABLE = os.getenv('DIM_SPORT_TYPE_TABLE')
DIM_DEVICE_TABLE = os.getenv('DIM_DEVICE_TABLE')
DIM_LOCATION_TABLE = os.getenv('DIM_LOCATION_TABLE')
DIM_GEAR_TABLE = os.getenv('DIM_GEAR_TABLE')
DIM_SEGMENT_TABLE = os.getenv('DIM_SEGMENT_TABLE')
DIM_EFFORT_TYPE_TABLE = os.getenv('DIM_EFFORT_TYPE_TABLE')
DIM_WORKOUT_TYPE_TABLE = os.getenv('DIM_WORKOUT_TYPE_TABLE')

# Other
LOG_LEVEL = os.getenv('LOG_LEVEL')

logging.basicConfig(
    level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
    format="%(asctime)s | %(levelname)s | %(message)s"
)

pd.set_option('display.max_columns', None)

### DB names validation

In [2]:
REQUIRED_ENV = [
  'DB_URI',
  'TARGET_S_SCHEMA','TARGET_G_SCHEMA',
  'ACTIVITIES_S_TABLE','LOCATIONS_S_TABLE','GEAR_S_TABLE','SEGMENTS_S_TABLE','BEST_EFFORTS_S_TABLE','SEGMENTS_EFFORTS_S_TABLE','LAPS_S_TABLE','KUDOS_S_TABLE', 'ZONES_S_TABLE', 'RELATIVE_EFFORT_S_TABLE', 'MAPS_S_TABLE',
  'DIM_CALENDAR_TABLE','DIM_TIME_TABLE','DIM_DEVICE_TABLE','DIM_SPORT_TYPE_TABLE','DIM_LOCATION_TABLE','DIM_GEAR_TABLE','DIM_SEGMENT_TABLE','DIM_EFFORT_TYPE_TABLE','DIM_WORKOUT_TYPE_TABLE',
  'FACT_ACTIVITIES_TABLE','FACT_SEGMENTS_EFFORTS_TABLE','FACT_BEST_EFFORTS_TABLE','FACT_LAPS_TABLE','FACT_KUDOS_TABLE', 'FACT_ZONES_TABLE', 'FACT_MAPS_ACT_TABLE', 'FACT_MAPS_SEG_TABLE'
]
missing = [k for k in REQUIRED_ENV if not os.getenv(k)]
if missing:
    raise RuntimeError(f"Missing env variables: {', '.join(missing)}")


### Request data from `silver` layer

In [3]:
engine = create_engine(
  DB_URI, 
  pool_pre_ping=True, 
  pool_size=5, 
  max_overflow=10
)
logging.info("Connection established")

2025-12-09 15:20:01,273 | INFO | Connection established


In [4]:
with engine.begin() as conn:
    activities_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE} downloaded. shape={activities_df.shape}")

    locations_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{LOCATIONS_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{LOCATIONS_S_TABLE} downloaded. shape={locations_df.shape}")

    gear_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{GEAR_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{GEAR_S_TABLE} downloaded. shape={gear_df.shape}")

    segments_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{SEGMENTS_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{SEGMENTS_S_TABLE} downloaded. shape={segments_df.shape}")

    best_eff_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{BEST_EFFORTS_S_TABLE} downloaded. shape={best_eff_df.shape}")

    seg_eff_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{SEGMENTS_EFFORTS_S_TABLE} downloaded. shape={seg_eff_df.shape}")

    laps_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{LAPS_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{LAPS_S_TABLE} downloaded. shape={laps_df.shape}")

    kudos_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{KUDOS_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{KUDOS_S_TABLE} downloaded. shape={kudos_df.shape}")

    zones_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{ZONES_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{ZONES_S_TABLE} downloaded. shape={zones_df.shape}")

    relative_effort_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{RELATIVE_EFFORT_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{RELATIVE_EFFORT_S_TABLE} downloaded. shape={relative_effort_df.shape}")

    maps_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{MAPS_S_TABLE}"), conn)
    logging.info(f"Data from {TARGET_S_SCHEMA}.{MAPS_S_TABLE} downloaded. shape={relative_effort_df.shape}")


2025-12-09 15:20:01,572 | INFO | Data from silver.activities downloaded. shape=(1175, 49)
2025-12-09 15:20:01,575 | INFO | Data from silver.locations downloaded. shape=(249, 4)
2025-12-09 15:20:01,578 | INFO | Data from silver.gear downloaded. shape=(13, 4)
2025-12-09 15:20:01,624 | INFO | Data from silver.segments downloaded. shape=(1684, 43)
2025-12-09 15:20:01,655 | INFO | Data from silver.best_efforts downloaded. shape=(3533, 13)
2025-12-09 15:20:01,706 | INFO | Data from silver.segments_efforts downloaded. shape=(6991, 23)
2025-12-09 15:20:01,773 | INFO | Data from silver.laps downloaded. shape=(8996, 27)
2025-12-09 15:20:01,793 | INFO | Data from silver.kudos downloaded. shape=(8295, 4)
2025-12-09 15:20:01,814 | INFO | Data from silver.zones downloaded. shape=(7790, 8)
2025-12-09 15:20:01,817 | INFO | Data from silver.relative_effort downloaded. shape=(1012, 2)
2025-12-09 15:20:02,506 | INFO | Data from silver.maps downloaded. shape=(1012, 2)


In [5]:
activities_df.head()

Unnamed: 0,id,name,start_date_utc_dt,start_date_local_dt,local_timezone,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,total_elevation_gain,elev_low,elev_high,type,sport_type,workout_type,achievement_count,kudos_count,comment_count,athlete_count,photo_count,trainer,commute,manual,visibility,average_speed,avg_pace_str,avg_pace_float,max_speed,max_pace_str,max_pace_float,average_cadence,average_watts,max_watts,weighted_average_watts,has_heartrate,average_heartrate,max_heartrate,pr_count,total_photo_count,suffer_score,description,calories,device_name,start_lat,start_lng,map_id,gear_id,location_id
0,16691408683,Morning Yoga,2025-12-09 08:02:45,2025-12-09 08:02:45,Etc/GMT-1.0,0.0,4537,0 days 01:15:37,4537,0 days 01:15:37,0.0,0.0,0.0,Yoga,Yoga,31.0,0,1,0,1,0,True,False,False,everyone,0.0,,,0.0,,,,,,,True,77.4,123.0,0,0,6.0,,233.0,Garmin Forerunner 970,,,a16691408683,x00000000,
1,16684277150,6.5km Easy RunüôÇ‚Äç‚ÜîÔ∏è,2025-12-08 13:45:47,2025-12-08 13:45:47,Etc/GMT-1.0,6565.0,2317,0 days 00:38:37,2355,0 days 00:39:15,62.0,21.6,60.4,Run,Run,,4,8,0,1,0,False,False,False,everyone,2.833,5:53,5.883045,3.8,4:23,4.385965,168.6,351.5,536.0,347.0,True,142.8,155.0,3,0,25.0,Easy z g√≥rkamiüóª\n\n6.5km easy run at a convers...,510.0,Garmin Forerunner 970,52.755966,15.226957,a16684277150,g24134620,1203.0
2,16678081919,Drop Set Hill Repsüè°,2025-12-07 17:57:40,2025-12-07 17:57:40,Etc/GMT-1.0,9222.0,3286,0 days 00:54:46,3292,0 days 00:54:52,194.0,23.4,53.0,Run,Run,3.0,1,9,0,1,0,False,False,False,everyone,2.806,5:56,5.939653,4.5,3:42,3.703704,164.4,352.8,765.0,369.0,True,148.6,173.0,0,0,51.0,Gorzowskie g√≥ry > Wroc≈Çawskie pag√≥rkiüôÇ‚Äç‚ÜîÔ∏è\n\n2...,745.0,Garmin Forerunner 970,52.755944,15.226775,a16678081919,g24134620,1203.0
3,16663625820,24km Long Runüá™üá∏,2025-12-06 08:19:02,2025-12-06 08:19:02,Etc/GMT-1.0,24159.5,8178,0 days 02:16:18,9074,0 days 02:31:14,61.0,112.2,125.0,Run,Run,2.0,15,8,0,1,0,False,False,False,everyone,2.954,5:39,5.642067,4.54,3:40,3.671072,171.6,344.4,507.0,327.0,True,143.5,155.0,7,0,92.0,Mini Walencja w domu:\n\n(Trochƒô wiƒôcej stania...,1755.0,Garmin Forerunner 970,51.107712,17.124042,a16663625820,g23642256,1183.0
4,16648928719,12km Easy Runüòã,2025-12-04 15:20:16,2025-12-04 15:20:16,Etc/GMT-1.0,12328.4,4326,0 days 01:12:06,4366,0 days 01:12:46,42.0,106.6,120.6,Run,Run,,9,8,0,1,0,False,False,False,everyone,2.85,5:51,5.847953,3.48,4:47,4.789272,170.2,322.6,411.0,321.0,True,142.2,152.0,6,0,43.0,Lu≈∫ne klepanieü§ùüèª\n\n12km easy run at a convers...,960.0,Garmin Forerunner 970,51.107732,17.123874,a16648928719,g24134620,1183.0


In [6]:
locations_df.head()

Unnamed: 0,id,country,region,locality
0,1000,Austria,Lower Austria,Sankt Michael am Bruckbach
1,1001,Austria,Lower Austria,Waidhofen an der Ybbs
2,1002,Austria,Upper Austria,Garsten
3,1003,Austria,Upper Austria,Gr√ºnburg
4,1004,Austria,Upper Austria,Linz


In [7]:
gear_df.head()

Unnamed: 0,id,name,distance_m,distance_km
0,x00000000,No gear,0.0,0.0
1,g24134620,ASICS Novablast 5,579895.0,579.9
2,g23642256,Adidas EVO SL,345476.0,345.5
3,b13100260,Cube Nuroad Pro Wirtualnie,567349.0,567.3
4,g27111424,Nike Vaporfly 3,10036.0,10.0


In [8]:
segments_df.head()

Unnamed: 0,id,name,activity_type,distance,average_grade,maximum_grade,elevation_high,elevation_low,climb_category,private,hazardous,start_lat,start_lng,end_lat,end_lng,location_id,elevation_profile,created_date,updated_date,total_elevation_gain,effort_count,athlete_count,star_count,elevation_profiles_light_url,elevation_profiles_dark_url,map_id,athlete_segment_stats_pr_elapsed_time,athlete_segment_stats_pr_date,athlete_segment_stats_pr_visibility,athlete_segment_stats_pr_activity_id,athlete_segment_stats_pr_activity_visibility,athlete_segment_stats_effort_count,xoms_kom,xoms_qom,xoms_overall,xoms_destination_type,xoms_destination_name,local_legend_athlete_id,local_legend_title,local_legend_profile,local_legend_effort_description,local_legend_effort_count,local_legend_effort_counts_overall
0,11065825,Odcinek miƒôdzy kana≈Çami,Ride,1386.8,0.2,2.3,118.8,114.6,0.0,False,False,51.104562,17.123725,51.113834,17.110484,1183,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,11740949,Most Swojczycki- zjazd pod mosty Jagielo≈Ñskie,Ride,2310.6,0.0,5.4,124.0,115.1,0.0,False,False,51.113875,17.107271,51.126718,17.083225,1183,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,11584293,Kana≈Çowa,Ride,549.4,0.0,1.1,115.0,113.4,0.0,False,False,51.127217,17.075984,51.127622,17.068148,1183,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,12380127,Stara Odra do centrum,Ride,1490.4,0.3,5.8,119.9,113.7,0.0,False,False,51.127377,17.067455,51.114944,17.072457,1183,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,7667882,Z g√≥rki na pazurki (Odra-wa≈Çy-od Grunwaldzkiej...,Ride,443.8,-0.5,2.6,122.3,117.0,0.0,False,False,51.11465,17.072658,51.110929,17.071034,1183,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
best_eff_df.head()

Unnamed: 0,id,name,start_date_utc_dt,start_date_local_dt,local_timezone,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,rank,type,activity_id
0,31466450585,400m,2021-01-19 09:05:55,2021-01-19 10:05:55,Etc/GMT-1.0,400.0,142,0 days 00:02:22,142,0 days 00:02:22,,,4961015103
1,31466450824,1/2 mile,2021-01-19 09:03:43,2021-01-19 10:03:43,Etc/GMT-1.0,805.0,286,0 days 00:04:46,286,0 days 00:04:46,,,4961015103
2,31466451099,1K,2021-01-19 09:02:40,2021-01-19 10:02:40,Etc/GMT-1.0,1000.0,367,0 days 00:06:07,367,0 days 00:06:07,,,4961015103
3,31466451392,1 mile,2021-01-19 09:03:21,2021-01-19 10:03:21,Etc/GMT-1.0,1609.0,629,0 days 00:10:29,629,0 days 00:10:29,,,4961015103
4,31466450030,2 mile,2021-01-19 09:03:34,2021-01-19 10:03:34,Etc/GMT-1.0,3219.0,1323,0 days 00:22:03,1323,0 days 00:22:03,,,4961015103


In [10]:
seg_eff_df.head()

Unnamed: 0,id,name,start_date_utc_dt,start_date_local_dt,local_timezone,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,average_cadence,device_watts,average_watts,average_heartrate,max_heartrate,pr_rank,visibility,kom_rank,hidden,rank,type,activity_id,segment_id
0,3404548530909393958,Odcinek miƒôdzy kana≈Çami,2025-09-20 19:06:32,2025-09-20 21:06:32,Etc/GMT-2.0,1386.8,310,0 days 00:05:10,310,0 days 00:05:10,,False,,82.5,91.0,,followers_only,,False,,,15879687027,11065825
1,3404548530908871718,Most Swojczycki- zjazd pod mosty Jagielo≈Ñskie,2025-09-20 19:13:13,2025-09-20 21:13:13,Etc/GMT-2.0,2310.6,519,0 days 00:08:39,519,0 days 00:08:39,,False,,87.5,99.0,,followers_only,,False,,,15879687027,11740949
2,3404548530910437414,Kana≈Çowa,2025-09-20 19:23:51,2025-09-20 21:23:51,Etc/GMT-2.0,549.4,116,0 days 00:01:56,116,0 days 00:01:56,,False,,91.9,106.0,,followers_only,,False,,,15879687027,11584293
3,3404548530907957286,Stara Odra do centrum,2025-09-20 19:26:00,2025-09-20 21:26:00,Etc/GMT-2.0,1490.4,360,0 days 00:06:00,376,0 days 00:06:16,,False,,89.8,101.0,,followers_only,,False,,,15879687027,12380127
4,3404548530908100646,Z g√≥rki na pazurki (Odra-wa≈Çy-od Grunwaldzkiej...,2025-09-20 19:34:16,2025-09-20 21:34:16,Etc/GMT-2.0,443.8,101,0 days 00:01:41,101,0 days 00:01:41,,False,,93.6,107.0,,followers_only,,False,,,15879687027,7667882


In [11]:
laps_df.head()

Unnamed: 0,id,name,lap_index,split,start_date_utc_dt,start_date_local_dt,local_timezone,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,total_elevation_gain,type,average_speed,avg_pace_str,avg_pace_float,pace_zone,max_speed,max_pace_str,max_pace_float,average_cadence,device_watts,average_watts,average_heartrate,max_heartrate,activity_id
0,56505060057,Lap 1,1,1,2025-09-19 15:02:12,2025-09-19 17:02:12,Etc/GMT-2.0,0.0,3673,0 days 01:01:13,3673,0 days 01:01:13,0.0,Workout,0.0,,,,0.0,,,,False,,101.1,153.0,15865360447
1,56559813306,Lap 1,1,1,2025-09-20 19:03:29,2025-09-20 21:03:29,Etc/GMT-2.0,5000.0,1153,0 days 00:19:13,1153,0 days 00:19:13,12.4,Ride,4.34,,,,5.98,,,,False,88.2,87.1,109.0,15879687027
2,56559813315,Lap 2,2,2,2025-09-20 19:22:44,2025-09-20 21:22:44,Etc/GMT-2.0,5000.0,1189,0 days 00:19:49,1204,0 days 00:20:04,15.8,Ride,4.21,,,,8.0,,,,False,88.5,92.9,116.0,15879687027
3,56559813324,Lap 3,3,3,2025-09-20 19:42:48,2025-09-20 21:42:48,Etc/GMT-2.0,5000.0,1007,0 days 00:16:47,1041,0 days 00:17:21,31.2,Ride,4.97,,,,9.98,,,,False,112.0,93.9,114.0,15879687027
4,56559813335,Lap 4,4,4,2025-09-20 20:00:10,2025-09-20 22:00:10,Etc/GMT-2.0,4351.3,915,0 days 00:15:15,976,0 days 00:16:16,10.6,Ride,4.76,,,,11.5,,,,False,104.3,91.8,134.0,15879687027


In [12]:
kudos_df.head()

Unnamed: 0,first_name,last_name,full_name,activity_id
0,Kacper,G.,Kacper G.,15716821076
1,Jan,K.,Jan K.,15716821076
2,Jacek,S.,Jacek S.,15716821076
3,Ola,≈Å.,Ola ≈Å.,15716821076
4,Kacper,K.,Kacper K.,15716821076


In [13]:
zones_df.head()

Unnamed: 0,id,activity_id,type,zone_number,zone_name,time,min,max
0,15923268347-heartrate-1,15923268347,heartrate,1,Z1 - Recovery,73.0,0.0,133.0
1,15923268347-heartrate-2,15923268347,heartrate,2,Z2 - Endurance,1706.0,134.0,147.0
2,15923268347-heartrate-3,15923268347,heartrate,3,Z3 - Tempo,1290.0,148.0,160.0
3,15923268347-heartrate-4,15923268347,heartrate,4,Z4 - Threshold,0.0,161.0,166.0
4,15923268347-heartrate-5,15923268347,heartrate,5,Z5 - Anaerobic,0.0,167.0,-1.0


In [14]:
relative_effort_df.head()

Unnamed: 0,activity_id,relative_effort
0,15923268347,39.0
1,8254517069,17.0
2,8252893698,13.0
3,8247520568,23.0
4,8239641985,16.0


In [15]:
maps_df.head()

Unnamed: 0,id,point_id,lat,lng
0,a15865360447,0,,
1,a15879687027,0,51.10732,17.12439
2,a15879687027,1,51.10729,17.12443
3,a15879687027,2,51.10717,17.12455
4,a15879687027,3,51.10692,17.12486


### Setup `gold.dim_calendar`

In [16]:
dim_calendar_df = pd.DataFrame(
  {'date': pd.date_range(
    activities_df['start_date_local_dt'].dt.date.min(), 
    pd.Timestamp('today').normalize(), 
    freq="D")}
)
dim_calendar_df = dim_calendar_df.sort_values(by='date', ascending=False).reset_index(drop=True)

In [17]:
#Year
dim_calendar_df['year'] = dim_calendar_df['date'].dt.year
dim_calendar_df['year_start_date'] = dim_calendar_df['date'].dt.to_period('Y').dt.start_time

# Month
dim_calendar_df['month'] = dim_calendar_df['date'].dt.month
dim_calendar_df['month_year'] = dim_calendar_df['date'].dt.to_period('M').astype('str')
dim_calendar_df['month_start_date'] = dim_calendar_df['date'].dt.to_period('M').dt.start_time
dim_calendar_df['month_name_year'] = dim_calendar_df['date'].dt.strftime('%b-%Y')
dim_calendar_df['month_name'] = dim_calendar_df['date'].dt.strftime('%B') 

# Week
dim_calendar_df['week'] = dim_calendar_df['date'].dt.isocalendar().week
dim_calendar_df['week_start_date'] = dim_calendar_df['date'].dt.to_period('W').dt.start_time

# Day
dim_calendar_df['day'] = dim_calendar_df['date'].dt.day
dim_calendar_df['day_of_year'] = dim_calendar_df['date'].dt.day_of_year
dim_calendar_df['day_of_week'] = dim_calendar_df['date'].dt.weekday + 1
dim_calendar_df['day_of_week_name'] = dim_calendar_df['date'].dt.day_name()
dim_calendar_df['is_weekend'] = dim_calendar_df['date'].dt.weekday + 1 >= 6

In [18]:
dim_calendar_df.head()

Unnamed: 0,date,year,year_start_date,month,month_year,month_start_date,month_name_year,month_name,week,week_start_date,day,day_of_year,day_of_week,day_of_week_name,is_weekend
0,2025-12-09,2025,2025-01-01,12,2025-12,2025-12-01,Dec-2025,December,50,2025-12-08,9,343,2,Tuesday,False
1,2025-12-08,2025,2025-01-01,12,2025-12,2025-12-01,Dec-2025,December,50,2025-12-08,8,342,1,Monday,False
2,2025-12-07,2025,2025-01-01,12,2025-12,2025-12-01,Dec-2025,December,49,2025-12-01,7,341,7,Sunday,True
3,2025-12-06,2025,2025-01-01,12,2025-12,2025-12-01,Dec-2025,December,49,2025-12-01,6,340,6,Saturday,True
4,2025-12-05,2025,2025-01-01,12,2025-12,2025-12-01,Dec-2025,December,49,2025-12-01,5,339,5,Friday,False


In [19]:
dim_calendar_df_dtype_map = {
    "date": Date,
    "year": Integer,
    "year_start_date": Date,
    "month": Integer,
    "month_year": String,
    "month_start_date": Date,
    "month_name_year": String,
    "month_name": String,
    "week": Integer,
    "week_start_date": Date,
    "day": Integer,
    "day_of_year": Integer,
    "day_of_week": Integer,
    "day_of_week_name": String,
    "is_weekend": Boolean
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_CALENDAR_TABLE} will be overwritten.")

dim_calendar_df.to_sql(
    name=DIM_CALENDAR_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_calendar_df_dtype_map,
    method="multi",
    chunksize=1000
)



-4

### Setup `gold.dim_time`

In [20]:
times = pd.date_range("1900-01-01 00:00:00", "1900-01-01 23:59:59", freq="s")
dim_time_df = pd.DataFrame({"datetime": times})

In [21]:
dim_time_df['time'] = dim_time_df['datetime'].dt.time
dim_time_df['hour'] = dim_time_df['datetime'].dt.hour
dim_time_df['minute'] = dim_time_df['datetime'].dt.minute
dim_time_df['second'] = dim_time_df['datetime'].dt.second
dim_time_df['hour_minute'] = dim_time_df['datetime'].dt.strftime("%H:%M")
dim_time_df['hour_label'] = dim_time_df['datetime'].dt.strftime("%H:00")
dim_time_df['day_part'] = pd.cut(dim_time_df['hour'], 
                                 bins=[0,3,10,12,17,20,23],
                                 labels=['Night','Morning','Lunch','Afternoon','Evening', 'Night'],
                                 include_lowest=True,
                                 ordered=False)
dim_time_df['day_part_number'] = pd.cut(dim_time_df['hour'], 
                                 bins=[0,3,10,12,17,20,23],
                                 labels=[5, 1, 2, 3, 4, 5],
                                 include_lowest=True,
                                 ordered=False)
dim_time_df = dim_time_df.drop(columns='datetime')

In [22]:
dim_time_df.head()

Unnamed: 0,time,hour,minute,second,hour_minute,hour_label,day_part,day_part_number
0,00:00:00,0,0,0,00:00,00:00,Night,5
1,00:00:01,0,0,1,00:00,00:00,Night,5
2,00:00:02,0,0,2,00:00,00:00,Night,5
3,00:00:03,0,0,3,00:00,00:00,Night,5
4,00:00:04,0,0,4,00:00,00:00,Night,5


In [23]:
dim_time_df_dtype_map = {
    "time": Time,
    "hour": Integer,
    "minute": Integer,
    "second": Integer,
    "hour_minute": String,
    "hour_label": String,
    "day_part": String,
    "day_part_number": Integer
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_TIME_TABLE} will be overwritten.")

dim_time_df.to_sql(
    name=DIM_TIME_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_time_df_dtype_map,
    method="multi",
    chunksize=1000
)



-87

In [24]:
activities_df['sport_type'].value_counts()

sport_type
Run               578
Walk              245
Ride              123
WeightTraining    118
Swim               35
VirtualRide        23
Yoga               21
Squash             14
Workout             8
RockClimbing        5
Hike                3
Soccer              1
Rowing              1
Name: count, dtype: int64

### Setup `gold.dim_sport_type`

In [25]:
dim_sport_type_df = (activities_df['sport_type']
                     .copy()
                     .drop_duplicates()
                     .to_frame()
                     .reset_index(drop=True))
dim_sport_type_df['sport_type_id'] = 1000 + np.arange(len(dim_sport_type_df))

In [26]:
dim_sport_type_df = dim_sport_type_df[['sport_type_id', 'sport_type']]
dim_sport_type_df = dim_sport_type_df.rename(columns={'sport_type_id': 'id'})

In [27]:
dim_sport_type_df['sport_type_summary'] = np.where(
    dim_sport_type_df['sport_type'].isin(['Ride', 'Run', 'Swim']),
    dim_sport_type_df['sport_type'],
    'Other'
)
mapping = {
    'Run': 1,
    'Ride': 2,
    'Swim': 3,
    'Other': 4
}

dim_sport_type_df['sport_type_summary_number'] = (
    dim_sport_type_df['sport_type_summary'].map(mapping)
)

In [28]:
dim_sport_type_df.head()

Unnamed: 0,id,sport_type,sport_type_summary,sport_type_summary_number
0,1000,Yoga,Other,4
1,1001,Run,Run,1
2,1002,WeightTraining,Other,4
3,1003,VirtualRide,Other,4
4,1004,Walk,Other,4


In [29]:
dim_sport_type_df_dtype_map = {
    "id": Integer,
    "sport_type": String,
    "sport_type_summary": String,
    "sport_type_summary_number": Integer
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_SPORT_TYPE_TABLE} will be overwritten.")

dim_sport_type_df.to_sql(
    name=DIM_SPORT_TYPE_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_sport_type_df_dtype_map,
    method="multi",
    chunksize=1000
)



-1

### Setup `gold.dim_device`

In [30]:
dim_device_df = (activities_df['device_name']
                     .copy()
                     .drop_duplicates()
                     .to_frame()
                     .reset_index(drop=True))
dim_device_df['device_id'] = 1000 + np.arange(len(dim_device_df))

In [31]:
dim_device_df = dim_device_df[['device_id', 'device_name']]
dim_device_df = dim_device_df.rename(columns={'device_id': 'id'})
dim_device_df['device_name'] = dim_device_df['device_name'].fillna('No device')

In [32]:
dim_device_df.head()

Unnamed: 0,id,device_name
0,1000,Garmin Forerunner 970
1,1001,No device
2,1002,Apple Watch SE
3,1003,Garmin Edge 1030 Plus
4,1004,Garmin Edge 840


In [33]:
dim_device_df_dtype_map = {
    "id": Integer,
    "device_name": String
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_DEVICE_TABLE} will be overwritten.")

dim_device_df.to_sql(
    name=DIM_DEVICE_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_device_df_dtype_map,
    method="multi",
    chunksize=1000
)



-1

### Setup `gold.dim_location`

In [34]:
dim_location_df = locations_df.copy()

In [35]:
dim_location_df.head()

Unnamed: 0,id,country,region,locality
0,1000,Austria,Lower Austria,Sankt Michael am Bruckbach
1,1001,Austria,Lower Austria,Waidhofen an der Ybbs
2,1002,Austria,Upper Austria,Garsten
3,1003,Austria,Upper Austria,Gr√ºnburg
4,1004,Austria,Upper Austria,Linz


In [36]:
dim_location_df_dtype_map = {
    "id": Integer,
    "locality": String,
    "region": String,
    "country": String
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_LOCATION_TABLE} will be overwritten.")

dim_location_df.to_sql(
    name=DIM_LOCATION_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_location_df_dtype_map,
    method="multi",
    chunksize=1000
)



-1

### Setup `gold.dim_gear`

In [37]:
dim_gear_df = gear_df.copy()

In [38]:
dim_gear_df = dim_gear_df.sort_values(by='id')
dim_gear_df = dim_gear_df.reset_index(drop=True)
dim_gear_df['gear_type'] = dim_gear_df['id'].str[0].str.lower().map({'b': 'Bike', 'g': 'Shoes'}).fillna('Other')


In [39]:
retired_gear = [
  "Adidas Ultraboost 19",
  "New Balance 1080 v12",
  "Nike Invincible Run 3 Black",
  "Nike Invincible Run 3 Blueprint",
  "Nike Invincible Run 3 White",
  "Nike Pegasus 38",
  "Nike Zoom Fly 4"
]

dim_gear_df['status'] = np.where(
    dim_gear_df['name'].isin(retired_gear),
    "Retired",
    "Active"
)

In [40]:
dim_gear_df.head()

Unnamed: 0,id,name,distance_m,distance_km,gear_type,status
0,b12572672,Cube Nuroad Pro,3788632.0,3788.6,Bike,Active
1,b13100260,Cube Nuroad Pro Wirtualnie,567349.0,567.3,Bike,Active
2,g11165677,New Balance 1080 v12,1194617.0,1194.6,Shoes,Retired
3,g11783267,Nike Zoom Fly 4,272798.0,272.8,Shoes,Retired
4,g17673165,Nike Invincible Run 3 Black,666029.0,666.0,Shoes,Retired


In [41]:
dim_gear_df_dtype_map = {
    "id": String,
    "name": String,
    "distance_m": Float,
    "distance_km": Float,
    "gear_type": String,
    "status": String
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_GEAR_TABLE} will be overwritten.")

dim_gear_df.to_sql(
    name=DIM_GEAR_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_gear_df_dtype_map,
    method="multi",
    chunksize=1000
)



-1

### Setup `gold.dim_segment`

In [42]:
dim_segment_df = segments_df.copy()

In [43]:
dim_segment_df = dim_segment_df[dim_segment_df['activity_type'] == 'Run']

In [44]:
dim_segment_cols_clean = [
  'id',
  'name',
  'activity_type',
  'distance',
  'average_grade',
  'maximum_grade',
  'elevation_high',
  'elevation_low',
  'climb_category',
  'elevation_profile',
  'created_date',
  'updated_date',
  'total_elevation_gain',
  'athlete_segment_stats_pr_elapsed_time',
  'athlete_segment_stats_pr_date',
  'athlete_segment_stats_pr_activity_id',
  'athlete_segment_stats_effort_count',
  'xoms_kom',
  'xoms_qom',
  'xoms_overall',
  'local_legend_athlete_id',
  'local_legend_title',
  'local_legend_profile',
  'local_legend_effort_description',
  'local_legend_effort_count',
  'map_id',
  'location_id'
]

In [45]:
dim_segment_df = dim_segment_df[dim_segment_cols_clean]
dim_segment_df = dim_segment_df.sort_values(by='id')
dim_segment_df = dim_segment_df.reset_index(drop=True)

In [46]:
dim_segment_df.head()

Unnamed: 0,id,name,activity_type,distance,average_grade,maximum_grade,elevation_high,elevation_low,climb_category,elevation_profile,created_date,updated_date,total_elevation_gain,athlete_segment_stats_pr_elapsed_time,athlete_segment_stats_pr_date,athlete_segment_stats_pr_activity_id,athlete_segment_stats_effort_count,xoms_kom,xoms_qom,xoms_overall,local_legend_athlete_id,local_legend_title,local_legend_profile,local_legend_effort_description,local_legend_effort_count,map_id,location_id
0,1137415,Passeig De Garcia F√†ria Climb,Run,361.742,1.0,18.1,17.2,-1.3,0.0,https://d3o5xota0a1fcr.cloudfront.net/v6/chart...,2012-03-26,2021-05-20,32.6848,142,2023-02-19,8586119000.0,1.0,33s,28s,28s,105512113.0,Adam Richards,https://lh3.googleusercontent.com/a/ACg8ocKCuK...,63 efforts in the last 90 days,63.0,s1137415,1236
1,1137416,Carretera De Montju√Øc Climb,Run,439.109,2.8,8.1,19.2,6.9,0.0,https://d3o5xota0a1fcr.cloudfront.net/v6/chart...,2012-03-26,2021-05-20,39.8947,161,2023-02-19,8586119000.0,1.0,1:01,56s,56s,67973198.0,Mark Dale,https://dgalywyr863hv.cloudfront.net/pictures/...,47 efforts in the last 90 days,47.0,s1137416,1236
2,1617416,Estaci√≥n de Francia - Col√≥n,Run,1011.51,-0.5,33.1,30.8,2.6,0.0,https://d3o5xota0a1fcr.cloudfront.net/v6/chart...,2012-06-23,2021-05-20,43.8,366,2023-02-19,8586119000.0,1.0,2:44,2:58,2:44,63344153.0,Andrea Vilanova,https://dgalywyr863hv.cloudfront.net/pictures/...,40 efforts in the last 90 days,40.0,s1617416,1236
3,1996637,Unisee,Run,1743.44,0.0,4.1,3.4,-1.8,0.0,https://d3o5xota0a1fcr.cloudfront.net/v6/chart...,2012-08-05,2021-05-20,9.21209,611,2019-09-12,6012663000.0,11.0,5:22,6:22,5:22,175014577.0,J√∂rn Wolters,https://dgalywyr863hv.cloudfront.net/pictures/...,19 efforts in the last 90 days,19.0,s1996637,1032
4,2309147,Uniseerunde,Run,2416.68,0.0,5.4,7.5,-1.7,0.0,https://d3o5xota0a1fcr.cloudfront.net/v6/chart...,2012-09-05,2021-05-15,16.622,868,2019-08-21,6012663000.0,10.0,8:48,8:52,8:48,124589068.0,Joona R√∂mer,https://lh3.googleusercontent.com/a/ACg8ocJ6oG...,9 efforts in the last 90 days,9.0,s2309147,1032


In [47]:
dim_segment_df_dtype_map = {
    "id": BigInteger,
    "name": String,
    "activity_type": String,
    "distance": Float,
    "average_grade": Float,
    "maximum_grade": Float,
    "elevation_high": Float,
    "elevation_low": Float,
    "climb_category": Float,
    "elevation_profile": Text,
    "created_date": Date,
    "updated_date": Date,
    "total_elevation_gain": Float,
    "athlete_segment_stats_pr_elapsed_time": Text,
    "athlete_segment_stats_pr_date": Text,
    "athlete_segment_stats_pr_activity_id": BigInteger,
    "athlete_segment_stats_effort_count": Integer,
    "xoms_kom": Text,
    "xoms_qom": Text,
    "xoms_overall": Text,
    "local_legend_athlete_id": BigInteger,
    "local_legend_title": Text,
    "local_legend_profile": Text,
    "local_legend_effort_description": Text,
    "local_legend_effort_count": Integer,
    "map_id": Text,
    "location_id": Integer
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_SEGMENT_TABLE} will be overwritten.")

dim_segment_df.to_sql(
    name=DIM_SEGMENT_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_segment_df_dtype_map,
    method="multi",
    chunksize=1000
)



-1

### Setup `gold.dim_effort_type`

In [48]:
dim_effort_type_df = (best_eff_df['name']
                     .copy()
                     .drop_duplicates()
                     .to_frame()
                     .reset_index(drop=True))
dim_effort_type_df['id'] = 1000 + np.arange(len(dim_effort_type_df))

In [49]:
dim_effort_type_df = dim_effort_type_df[['id', 'name']]

In [50]:
dim_effort_type_df.head()

Unnamed: 0,id,name
0,1000,400m
1,1001,1/2 mile
2,1002,1K
3,1003,1 mile
4,1004,2 mile


In [51]:
dim_effort_type_df_dtype_map = {
    "id": Integer,
    "name": String
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_EFFORT_TYPE_TABLE} will be overwritten.")

dim_effort_type_df.to_sql(
    name=DIM_EFFORT_TYPE_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_effort_type_df_dtype_map,
    method="multi",
    chunksize=1000
)



-1

### Setup `gold.dim_workout_type`

In [52]:
def extract_timedelta(time: pd.Series) -> pd.Series:
  """
  Convert a Series of numeric values (seconds) into timedeltas.

  Parameters
  ----------
  time : pd.Series
      Series containing durations expressed in seconds (int/float). 
      Null values are preserved as None.

  Returns
  -------
  pd.Series
      Series of Python ``datetime.timedelta`` objects. 
      Each element corresponds to the given number of seconds or None if missing.
  """

  return pd.Series([(timedelta(seconds=int(t)) if pd.notnull(t) else None) for t in time], dtype="object")

In [53]:
dim_workout_type_df = pd.DataFrame([
    {"id": 0.0, "type": "Run - General"},
    {"id": 1.0, "type": "Run - Race"},
    {"id": 2.0, "type": "Run - Long Run"},
    {"id": 3.0, "type": "Run - Workout"},
    {"id": 10.0, "type": "Ride - General"},
    {"id": 11.0, "type": "Ride - Race"},
    {"id": 12.0, "type": "Ride - Workout"},
    {"id": 20.0, "type": "Other"}
])

In [54]:
dim_workout_type_df.head()

Unnamed: 0,id,type
0,0.0,Run - General
1,1.0,Run - Race
2,2.0,Run - Long Run
3,3.0,Run - Workout
4,10.0,Ride - General


In [55]:
dim_workout_type_df_dtype_map = {
    "id": Integer,
    "type": String
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{DIM_WORKOUT_TYPE_TABLE} will be overwritten.")

dim_workout_type_df.to_sql(
    name=DIM_WORKOUT_TYPE_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=dim_workout_type_df_dtype_map,
    method="multi",
    chunksize=1000
)



-1

### Setup `gold.fact_activities`

In [56]:
def extract_timedelta(time: pd.Series) -> pd.Series:
  """
  Convert a Series of numeric values (seconds) into timedeltas.

  Parameters
  ----------
  time : pd.Series
      Series containing durations expressed in seconds (int/float). 
      Null values are preserved as None.

  Returns
  -------
  pd.Series
      Series of Python ``datetime.timedelta`` objects. 
      Each element corresponds to the given number of seconds or None if missing.
  """

  return pd.Series([(timedelta(seconds=int(t)) if pd.notnull(t) else None) for t in time], dtype="object")

In [57]:
fact_activities_df = activities_df.copy()

In [58]:
fact_activities_df = fact_activities_df.drop(columns=['moving_time_td', 'elapsed_time_td'])
fact_activities_df.loc[:, "moving_time_td"]  = extract_timedelta(fact_activities_df["moving_time"])
fact_activities_df.loc[:, "elapsed_time_td"] = extract_timedelta(fact_activities_df["elapsed_time"])

In [59]:
fact_activities_df = pd.merge(fact_activities_df, dim_sport_type_df, how='left', on='sport_type')
fact_activities_df = fact_activities_df.rename(columns={'id_y': 'sport_type_id', 'id_x': 'id'})
fact_activities_df = pd.merge(fact_activities_df, dim_device_df, how='left', on='device_name')
fact_activities_df = fact_activities_df.rename(columns={'id_y': 'device_id', 'id_x': 'id'})
fact_activities_df = pd.merge(fact_activities_df, relative_effort_df, how='left', left_on='id', right_on='activity_id')


In [60]:
fact_activities_df['date'] = fact_activities_df['start_date_local_dt'].dt.date
fact_activities_df['time'] = fact_activities_df['start_date_local_dt'].dt.time

In [61]:
fact_activities_df = fact_activities_df.rename(columns={"start_date_local_dt": "datetime"})

In [62]:
fact_activities_df['workout_type'] = fact_activities_df.apply(
    lambda r: 0 if pd.isna(r['workout_type']) and r['sport_type'] == 'Run'
              else 20 if pd.isna(r['workout_type'])
              else r['workout_type'],
    axis=1
)
fact_activities_df = fact_activities_df.rename(columns={'workout_type': 'workout_type_id'})

In [63]:
fact_activities_cols_clean = [
  'id',
  'name',
  'description',
  'date',
  'time',
  'datetime',
  'relative_effort',
  'distance',
  'moving_time',
  'moving_time_td',
  'elapsed_time',
  'elapsed_time_td',
  'total_elevation_gain',
  'achievement_count',
  'kudos_count',
  'comment_count',
  'athlete_count',
  'photo_count',
  'commute',
  'manual',
  'visibility',
  'average_speed',
  'avg_pace_str',
  'avg_pace_float',
  'max_speed',
  'max_pace_str',
  'max_pace_float',
  'average_cadence',
  'average_watts',
  'max_watts',
  'weighted_average_watts',
  'has_heartrate',
  'average_heartrate',
  'max_heartrate',
  'pr_count',
  'suffer_score',
  'calories',
  'map_id',
  'gear_id',
  'location_id',
  'sport_type_id',
  'device_id',
  'workout_type_id'
]
fact_activities_df = fact_activities_df[fact_activities_cols_clean]

In [64]:
fact_activities_df.head()

Unnamed: 0,id,name,description,date,time,datetime,relative_effort,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,total_elevation_gain,achievement_count,kudos_count,comment_count,athlete_count,photo_count,commute,manual,visibility,average_speed,avg_pace_str,avg_pace_float,max_speed,max_pace_str,max_pace_float,average_cadence,average_watts,max_watts,weighted_average_watts,has_heartrate,average_heartrate,max_heartrate,pr_count,suffer_score,calories,map_id,gear_id,location_id,sport_type_id,device_id,workout_type_id
0,16691408683,Morning Yoga,,2025-12-09,08:02:45,2025-12-09 08:02:45,6.0,0.0,4537,1:15:37,4537,1:15:37,0.0,0,1,0,1,0,False,False,everyone,0.0,,,0.0,,,,,,,True,77.4,123.0,0,6.0,233.0,a16691408683,x00000000,,1000,1000.0,31.0
1,16684277150,6.5km Easy RunüôÇ‚Äç‚ÜîÔ∏è,Easy z g√≥rkamiüóª\n\n6.5km easy run at a convers...,2025-12-08,13:45:47,2025-12-08 13:45:47,25.0,6565.0,2317,0:38:37,2355,0:39:15,62.0,4,8,0,1,0,False,False,everyone,2.833,5:53,5.883045,3.8,4:23,4.385965,168.6,351.5,536.0,347.0,True,142.8,155.0,3,25.0,510.0,a16684277150,g24134620,1203.0,1001,1000.0,0.0
2,16678081919,Drop Set Hill Repsüè°,Gorzowskie g√≥ry > Wroc≈Çawskie pag√≥rkiüôÇ‚Äç‚ÜîÔ∏è\n\n2...,2025-12-07,17:57:40,2025-12-07 17:57:40,51.0,9222.0,3286,0:54:46,3292,0:54:52,194.0,1,9,0,1,0,False,False,everyone,2.806,5:56,5.939653,4.5,3:42,3.703704,164.4,352.8,765.0,369.0,True,148.6,173.0,0,51.0,745.0,a16678081919,g24134620,1203.0,1001,1000.0,3.0
3,16663625820,24km Long Runüá™üá∏,Mini Walencja w domu:\n\n(Trochƒô wiƒôcej stania...,2025-12-06,08:19:02,2025-12-06 08:19:02,92.0,24159.5,8178,2:16:18,9074,2:31:14,61.0,15,8,0,1,0,False,False,everyone,2.954,5:39,5.642067,4.54,3:40,3.671072,171.6,344.4,507.0,327.0,True,143.5,155.0,7,92.0,1755.0,a16663625820,g23642256,1183.0,1001,1000.0,2.0
4,16648928719,12km Easy Runüòã,Lu≈∫ne klepanieü§ùüèª\n\n12km easy run at a convers...,2025-12-04,15:20:16,2025-12-04 15:20:16,43.0,12328.4,4326,1:12:06,4366,1:12:46,42.0,9,8,0,1,0,False,False,everyone,2.85,5:51,5.847953,3.48,4:47,4.789272,170.2,322.6,411.0,321.0,True,142.2,152.0,6,43.0,960.0,a16648928719,g24134620,1183.0,1001,1000.0,0.0


In [65]:
fact_activities_df_dtype_map = {
    "id": BigInteger,
    "name": String,
    "description": Text,
    "date": Date,
    "time": Time,
    "datetime": DateTime,
    "relative_effort": Float,
    "moving_time": Integer,
    "moving_time_td": Interval,
    "elapsed_time": Integer,
    "elapsed_time_td": Interval,
    "total_elevation_gain": Float,
    "achievement_count": Integer,
    "kudos_count": Integer,
    "comment_count": Integer,
    "athlete_count": Integer,
    "photo_count": Integer,
    "commute": Boolean,
    "manual": Boolean,
    "visibility": String,
    "average_speed": Float,
    "avg_pace_str": String,
    "avg_pace_float": Float,
    "max_speed": Float,
    "max_pace_str": String,
    "max_pace_float": Float,
    "average_cadence": Float,
    "average_watts": Float,
    "max_watts": Float,
    "weighted_average_watts": Float,
    "has_heartrate": Boolean,
    "average_heartrate": Float,
    "max_heartrate": Float,
    "pr_count": Integer,
    "suffer_score": Float,
    "calories": Float,
    "gear_id": String,
    "location_id": Integer,
    "sport_type_id": Integer,
    "device_id": Integer,
    "workout_type_id": Integer
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{FACT_ACTIVITIES_TABLE} will be overwritten.")

fact_activities_df.to_sql(
    name=FACT_ACTIVITIES_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=fact_activities_df_dtype_map,
    method="multi",
    chunksize=1000
)



-2

### Setup `gold.fact_segment_efforts`

In [66]:
fact_segment_efforts_df = seg_eff_df.copy()

In [67]:
fact_segment_efforts_df = fact_segment_efforts_df.drop(columns=['moving_time_td', 'elapsed_time_td'])
fact_segment_efforts_df.loc[:, "moving_time_td"]  = extract_timedelta(fact_segment_efforts_df["moving_time"])
fact_segment_efforts_df.loc[:, "elapsed_time_td"] = extract_timedelta(fact_segment_efforts_df["elapsed_time"])

In [68]:
fact_segment_efforts_df['date'] = fact_segment_efforts_df['start_date_local_dt'].dt.date
fact_segment_efforts_df['time'] = fact_segment_efforts_df['start_date_local_dt'].dt.time

In [69]:
fact_segment_efforts_cols_clean = [
  'id',
  'date',
  'time',
  'moving_time',
  'moving_time_td',
  'elapsed_time',
  'elapsed_time_td',
  'average_cadence',
  'device_watts',
  'average_watts',
  'average_heartrate',
  'max_heartrate',
  'pr_rank',
  'visibility',
  'kom_rank',
  'rank',
  'type',
  'segment_id',
  'activity_id'
]
fact_segment_efforts_df = fact_segment_efforts_df[fact_segment_efforts_cols_clean]

In [70]:
fact_segment_efforts_df.head()

Unnamed: 0,id,date,time,moving_time,moving_time_td,elapsed_time,elapsed_time_td,average_cadence,device_watts,average_watts,average_heartrate,max_heartrate,pr_rank,visibility,kom_rank,rank,type,segment_id,activity_id
0,3404548530909393958,2025-09-20,21:06:32,310,0:05:10,310,0:05:10,,False,,82.5,91.0,,followers_only,,,,11065825,15879687027
1,3404548530908871718,2025-09-20,21:13:13,519,0:08:39,519,0:08:39,,False,,87.5,99.0,,followers_only,,,,11740949,15879687027
2,3404548530910437414,2025-09-20,21:23:51,116,0:01:56,116,0:01:56,,False,,91.9,106.0,,followers_only,,,,11584293,15879687027
3,3404548530907957286,2025-09-20,21:26:00,360,0:06:00,376,0:06:16,,False,,89.8,101.0,,followers_only,,,,12380127,15879687027
4,3404548530908100646,2025-09-20,21:34:16,101,0:01:41,101,0:01:41,,False,,93.6,107.0,,followers_only,,,,7667882,15879687027


In [71]:
fact_segment_efforts_df_dtype_map = {
    "id": BigInteger,
    "date": Date,
    "time": Time,
    "moving_time": Integer,
    "moving_time_td": Interval,
    "elapsed_time": Integer,
    "elapsed_time_td": Interval,
    "average_cadence": Float,
    "device_watts": Boolean,
    "average_watts": Float,
    "average_heartrate": Float,
    "max_heartrate": Float,
    "pr_rank": Integer,
    "visibility": String,
    "kom_rank": Integer,
    "rank": Integer,
    "type": String,
    "activity_id": BigInteger,
    "segment_id": BigInteger
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{FACT_SEGMENTS_EFFORTS_TABLE} will be overwritten.")

fact_segment_efforts_df.to_sql(
    name=FACT_SEGMENTS_EFFORTS_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=fact_segment_efforts_df_dtype_map,
    method="multi",
    chunksize=1000
)



-7

### Setup `gold.fact_best_efforts`

In [72]:
fact_best_efforts_df = best_eff_df.copy()

In [73]:
fact_best_efforts_df = pd.merge(fact_best_efforts_df, dim_effort_type_df, how='left', on='name')
fact_best_efforts_df = fact_best_efforts_df.rename(columns={'id_y': 'effort_type_id', 'id_x': 'id'})

In [74]:
fact_best_efforts_df = fact_best_efforts_df.drop(columns=['moving_time_td', 'elapsed_time_td'])
fact_best_efforts_df.loc[:, "moving_time_td"]  = extract_timedelta(fact_best_efforts_df["moving_time"])
fact_best_efforts_df.loc[:, "elapsed_time_td"] = extract_timedelta(fact_best_efforts_df["elapsed_time"])

In [75]:
fact_best_efforts_df['date'] = fact_best_efforts_df['start_date_local_dt'].dt.date
fact_best_efforts_df['time'] = fact_best_efforts_df['start_date_local_dt'].dt.time

In [76]:
fact_best_efforts_cols_clean = [
  'id',
  'date',
  'time',
  'moving_time',
  'moving_time_td',
  'elapsed_time',
  'elapsed_time_td',
  'rank',
  'type',
  'effort_type_id',
  'activity_id'
]
fact_best_efforts_df = fact_best_efforts_df[fact_best_efforts_cols_clean]

In [77]:
fact_best_efforts_df.head()

Unnamed: 0,id,date,time,moving_time,moving_time_td,elapsed_time,elapsed_time_td,rank,type,effort_type_id,activity_id
0,31466450585,2021-01-19,10:05:55,142,0:02:22,142,0:02:22,,,1000,4961015103
1,31466450824,2021-01-19,10:03:43,286,0:04:46,286,0:04:46,,,1001,4961015103
2,31466451099,2021-01-19,10:02:40,367,0:06:07,367,0:06:07,,,1002,4961015103
3,31466451392,2021-01-19,10:03:21,629,0:10:29,629,0:10:29,,,1003,4961015103
4,31466450030,2021-01-19,10:03:34,1323,0:22:03,1323,0:22:03,,,1004,4961015103


In [78]:
fact_best_efforts_df_dtype_map = {
    "id": BigInteger,
    "date": Date,
    "time": Time,
    "moving_time": Integer,
    "moving_time_td": Interval,
    "elapsed_time": Integer,
    "elapsed_time_td": Interval,
    "rank": Integer,
    "type": String,
    "effort_type_id": BigInteger,
    "activity_id": BigInteger
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{FACT_BEST_EFFORTS_TABLE} will be overwritten.")

fact_best_efforts_df.to_sql(
    name=FACT_BEST_EFFORTS_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=fact_best_efforts_df_dtype_map,
    method="multi",
    chunksize=1000
)



-4

### Setup `gold.fact_kudos`

In [79]:
fact_kudos_df = kudos_df.copy()

In [80]:
fact_kudos_df.head()

Unnamed: 0,first_name,last_name,full_name,activity_id
0,Kacper,G.,Kacper G.,15716821076
1,Jan,K.,Jan K.,15716821076
2,Jacek,S.,Jacek S.,15716821076
3,Ola,≈Å.,Ola ≈Å.,15716821076
4,Kacper,K.,Kacper K.,15716821076


In [81]:
fact_kudos_df_dtype_map = {
    "first_name": String,
    "last_name": String,
    "full_name": String,
    "activity_id": BigInteger
}
with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{FACT_KUDOS_TABLE} will be overwritten.")

fact_kudos_df.to_sql(
    name=FACT_KUDOS_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=fact_kudos_df_dtype_map,
    method="multi",
    chunksize=1000
)



-9

### Setup `gold.fact_laps`

In [82]:
fact_laps_df = laps_df.copy()

In [83]:
fact_laps_df = fact_laps_df.drop(columns=['moving_time_td', 'elapsed_time_td'])
fact_laps_df.loc[:, "moving_time_td"]  = extract_timedelta(fact_laps_df["moving_time"])
fact_laps_df.loc[:, "elapsed_time_td"] = extract_timedelta(fact_laps_df["elapsed_time"])

In [84]:
fact_laps_df['date'] = fact_laps_df['start_date_local_dt'].dt.date
fact_laps_df['time'] = fact_laps_df['start_date_local_dt'].dt.time

In [85]:
fact_laps_cols_clean = [
  'id',
  'name',
  'lap_index',
  'split',
  'date',
  'time',
  'distance',
  'moving_time',
  'moving_time_td',
  'elapsed_time',
  'elapsed_time_td',
  'total_elevation_gain',
  'average_speed',
  'avg_pace_str',
  'avg_pace_float',
  'max_speed',
  'max_pace_str',
  'max_pace_float',
  'average_cadence',
  'average_heartrate',
  'max_heartrate',
  'activity_id'
]
fact_laps_df = fact_laps_df[fact_laps_cols_clean]

In [86]:
fact_laps_df.head()

Unnamed: 0,id,name,lap_index,split,date,time,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,total_elevation_gain,average_speed,avg_pace_str,avg_pace_float,max_speed,max_pace_str,max_pace_float,average_cadence,average_heartrate,max_heartrate,activity_id
0,56505060057,Lap 1,1,1,2025-09-19,17:02:12,0.0,3673,1:01:13,3673,1:01:13,0.0,0.0,,,0.0,,,,101.1,153.0,15865360447
1,56559813306,Lap 1,1,1,2025-09-20,21:03:29,5000.0,1153,0:19:13,1153,0:19:13,12.4,4.34,,,5.98,,,,87.1,109.0,15879687027
2,56559813315,Lap 2,2,2,2025-09-20,21:22:44,5000.0,1189,0:19:49,1204,0:20:04,15.8,4.21,,,8.0,,,,92.9,116.0,15879687027
3,56559813324,Lap 3,3,3,2025-09-20,21:42:48,5000.0,1007,0:16:47,1041,0:17:21,31.2,4.97,,,9.98,,,,93.9,114.0,15879687027
4,56559813335,Lap 4,4,4,2025-09-20,22:00:10,4351.3,915,0:15:15,976,0:16:16,10.6,4.76,,,11.5,,,,91.8,134.0,15879687027


In [87]:
fact_laps_df_dtype_map = {
"id": BigInteger,
"name": String,
"lap_index": Integer,
"split": Integer,
"date": Date,
"time": Time,
"distance": Float,
"moving_time": Integer,
"moving_time_td": Interval,
"elapsed_time": Integer,
"elapsed_time_td": Interval,
"total_elevation_gain": Float,
"average_speed": Float,
"avg_pace_str": String,
"avg_pace_float": Float,
"max_speed": Float,
"max_pace_str": String,
"max_pace_float": Float,
"average_cadence": Float,
"average_heartrate": Float,
"max_heartrate": Float,
"activity_id": BigInteger
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{FACT_LAPS_TABLE} will be overwritten.")

fact_laps_df.to_sql(
    name=FACT_LAPS_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=fact_laps_df_dtype_map,
    method="multi",
    chunksize=1000
)




-9

### Setup `gold.fact_zones`

In [88]:
fact_zones_df = zones_df.copy()

In [89]:
fact_zones_cols_clean = [
    'activity_id',
    'type',
    'zone_number',
    'zone_name',
    'time',
    'min',
    'max'
]
fact_zones_df = fact_zones_df[fact_zones_cols_clean]

In [90]:
fact_zones_df.head()

Unnamed: 0,activity_id,type,zone_number,zone_name,time,min,max
0,15923268347,heartrate,1,Z1 - Recovery,73.0,0.0,133.0
1,15923268347,heartrate,2,Z2 - Endurance,1706.0,134.0,147.0
2,15923268347,heartrate,3,Z3 - Tempo,1290.0,148.0,160.0
3,15923268347,heartrate,4,Z4 - Threshold,0.0,161.0,166.0
4,15923268347,heartrate,5,Z5 - Anaerobic,0.0,167.0,-1.0


In [91]:
fact_zones_dtype_map = {
    "activity_id": BigInteger,
    "type": String,
    'zone_number': Integer,
    "zone_name": String,
    "time": Float,
    "min": Float,
    "max": Float
}

with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{FACT_ZONES_TABLE} will be overwritten.")

fact_zones_df.to_sql(
    name=FACT_ZONES_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=fact_zones_dtype_map,
    method="multi",
    chunksize=1000
)



-8

### Setup `gold.fact_maps`

In [92]:
fact_maps_df = maps_df.copy()

In [93]:
fact_maps_df = fact_maps_df.rename(columns={'id': 'map_id'})

In [94]:
fact_maps_df

Unnamed: 0,map_id,point_id,lat,lng
0,a15865360447,0,,
1,a15879687027,0,51.10732,17.12439
2,a15879687027,1,51.10729,17.12443
3,a15879687027,2,51.10717,17.12455
4,a15879687027,3,51.10692,17.12486
...,...,...,...,...
439698,s33691647,1312,50.04438,19.94368
439699,s33691647,1313,50.04427,19.94332
439700,s33691647,1314,50.04421,19.94292
439701,s33691647,1315,50.04416,19.94218


In [95]:
fact_maps_activitites_df = fact_maps_df[fact_maps_df["map_id"].str.startswith("a")]
fact_maps_activitites_df = fact_maps_activitites_df.reset_index(drop=True)
fact_maps_activitites_df

Unnamed: 0,map_id,point_id,lat,lng
0,a15865360447,0,,
1,a15879687027,0,51.10732,17.12439
2,a15879687027,1,51.10729,17.12443
3,a15879687027,2,51.10717,17.12455
4,a15879687027,3,51.10692,17.12486
...,...,...,...,...
400514,a16684277150,233,52.75619,15.22816
400515,a16684277150,234,52.75625,15.22838
400516,a16684277150,235,52.75633,15.22883
400517,a16691408683,0,,


In [96]:
fact_maps_segments_df = fact_maps_df[fact_maps_df["map_id"].str.startswith("s")]
fact_maps_segments_df = fact_maps_segments_df.reset_index(drop=True)
fact_maps_segments_df

Unnamed: 0,map_id,point_id,lat,lng
0,s19517444,1,54.40947,18.63622
1,s19517444,2,54.40966,18.63558
2,s19517444,3,54.40978,18.63502
3,s19517444,4,54.40991,18.63463
4,s19517444,5,54.41010,18.63393
...,...,...,...,...
39179,s33691647,1312,50.04438,19.94368
39180,s33691647,1313,50.04427,19.94332
39181,s33691647,1314,50.04421,19.94292
39182,s33691647,1315,50.04416,19.94218


In [97]:
fact_maps_dtype_map = {
    "map_id": String,
    "point_id": Integer,
    'lat': Float,
    "lng": Float
}

In [98]:
with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{FACT_MAPS_ACT_TABLE} will be overwritten.")

fact_maps_activitites_df.to_sql(
    name=FACT_MAPS_ACT_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=fact_maps_dtype_map,
    method="multi",
    chunksize=1000
)



-401

In [99]:
with engine.begin() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};"))

logging.warning(f"Whole table {TARGET_G_SCHEMA}.{FACT_MAPS_SEG_TABLE} will be overwritten.")

fact_maps_segments_df.to_sql(
    name=FACT_MAPS_SEG_TABLE,
    schema=TARGET_G_SCHEMA,
    con=engine,
    if_exists="replace",
    index=False,
    dtype=fact_maps_dtype_map,
    method="multi",
    chunksize=1000
)



-40

### Primary keys definition

In [100]:
keys_instructions= [
    f"""CREATE SCHEMA IF NOT EXISTS {TARGET_G_SCHEMA};""",
    # ********** PRIMARY KEYS **********
    # --- dim_calendar ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_CALENDAR_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_CALENDAR_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_CALENDAR_TABLE}
          ADD CONSTRAINT {DIM_CALENDAR_TABLE}_pkey PRIMARY KEY (date);
      END IF;
    END $$;
    """,
    # --- dim_device ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_DEVICE_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_DEVICE_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_DEVICE_TABLE}
          ADD CONSTRAINT {DIM_DEVICE_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- dim_effort_type ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_EFFORT_TYPE_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_EFFORT_TYPE_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_EFFORT_TYPE_TABLE}
          ADD CONSTRAINT {DIM_EFFORT_TYPE_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- dim_gear ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_GEAR_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_GEAR_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_GEAR_TABLE}
          ADD CONSTRAINT {DIM_GEAR_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- dim_location ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_LOCATION_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_LOCATION_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_LOCATION_TABLE}
          ADD CONSTRAINT {DIM_LOCATION_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- dim_segment ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_SEGMENT_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_SEGMENT_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_SEGMENT_TABLE}
          ADD CONSTRAINT {DIM_SEGMENT_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- dim_sport_type ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_SPORT_TYPE_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_SPORT_TYPE_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_SPORT_TYPE_TABLE}
          ADD CONSTRAINT {DIM_SPORT_TYPE_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- dim_time ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_TIME_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_TIME_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_TIME_TABLE}
          ADD CONSTRAINT {DIM_TIME_TABLE}_pkey PRIMARY KEY (time);
      END IF;
    END $$;
    """,
    # --- dim_workout_type ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{DIM_WORKOUT_TYPE_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{DIM_WORKOUT_TYPE_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{DIM_WORKOUT_TYPE_TABLE}
          ADD CONSTRAINT {DIM_WORKOUT_TYPE_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- fact_activities ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{FACT_ACTIVITIES_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{FACT_ACTIVITIES_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{FACT_ACTIVITIES_TABLE}
          ADD CONSTRAINT {FACT_ACTIVITIES_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- fact_best_efforts ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{FACT_BEST_EFFORTS_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{FACT_BEST_EFFORTS_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{FACT_BEST_EFFORTS_TABLE}
          ADD CONSTRAINT {FACT_BEST_EFFORTS_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """,
    # --- fact_laps ---
    # PK
    f"""
    DO $$
    BEGIN
      IF to_regclass('{TARGET_G_SCHEMA}.{FACT_LAPS_TABLE}') IS NOT NULL
         AND NOT EXISTS (
           SELECT 1 FROM pg_constraint
           WHERE conrelid = to_regclass('{TARGET_G_SCHEMA}.{FACT_LAPS_TABLE}')
             AND contype = 'p'
         )
      THEN
        ALTER TABLE {TARGET_G_SCHEMA}.{FACT_LAPS_TABLE}
          ADD CONSTRAINT {FACT_LAPS_TABLE}_pkey PRIMARY KEY (id);
      END IF;
    END $$;
    """
]

In [101]:
with engine.begin() as conn:
    for sql in keys_instructions:
        conn.execute(text(sql))