### Import and config

In [1]:
# Imports
import os
import logging
from datetime import timezone, timedelta

from dotenv import load_dotenv

import pandas as pd
import numpy as np

from sqlalchemy import create_engine, text, Integer, Float, String, Boolean, DateTime, Interval, Text, BigInteger


# Configuration
load_dotenv()

# DB
DB_URI = os.getenv('DB_URI')

# Silver tables
TARGET_S_SCHEMA = os.getenv('TARGET_S_SCHEMA')
ACTIVITIES_S_TABLE = os.getenv('ACTIVITIES_S_TABLE')

# Gold tables
TARGET_G_SCHEMA = os.getenv('TARGET_S_SCHEMA')
DIM_CALENDAR_TABLE = os.getenv('DIM_CALENDAR_TABLE')

# Other
LOG_LEVEL = os.getenv('LOG_LEVEL')

logging.basicConfig(
    level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
    format="%(asctime)s | %(levelname)s | %(message)s"
)

pd.set_option('display.max_columns', None)

### DB names validation

In [2]:
REQUIRED_DB_ENV = ['DB_URI', 'TARGET_S_SCHEMA','ACTIVITIES_S_TABLE', 'TARGET_S_SCHEMA', 'DIM_CALENDAR_TABLE']
missing_db_env = [env for env in REQUIRED_DB_ENV if not os.getenv(env)]
if missing_db_env:
  raise RuntimeError(f"Missing env variables: {', '.join(missing_db_env)}.")

### Request data from `silver` layer

In [3]:
engine = create_engine(
  DB_URI, 
  pool_pre_ping=True, 
  pool_size=5, 
  max_overflow=10
)
logging.info("Connection established")

2025-09-19 12:17:35,980 | INFO | Connection established


In [4]:
with engine.begin() as conn:
  activities_df = pd.read_sql(text(f"SELECT * FROM {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE}"), conn)
logging.info(f"Data from {TARGET_S_SCHEMA}.{ACTIVITIES_S_TABLE} downloaded.")

2025-09-19 12:17:36,267 | INFO | Data from silver.activities downloaded.


In [5]:
activities_df.head()

Unnamed: 0,id,name,start_date_utc_dt,start_date_local_dt,local_timezone,distance,moving_time,moving_time_td,elapsed_time,elapsed_time_td,total_elevation_gain,elev_low,elev_high,type,sport_type,workout_type,achievement_count,kudos_count,comment_count,athlete_count,photo_count,trainer,commute,manual,visibility,average_speed,avg_pace_str,avg_pace_float,max_speed,max_pace_str,max_pace_float,average_cadence,average_watts,max_watts,weighted_average_watts,has_heartrate,average_heartrate,max_heartrate,pr_count,total_photo_count,suffer_score,description,calories,device_name,start_lat,start_lng,map_id,gear_id,location_id
0,15855640218,K200süèéÔ∏è,2025-09-18 17:30:30,2025-09-18 19:30:30,Etc/GMT-2.0,9915.6,3272,0 days 00:54:32,3314,0 days 00:55:14,10.0,115.6,122.8,Run,Run,3.0,0,9,0,1,0,False,False,False,everyone,3.03,5:30,5.50055,4.82,3:27,3.457815,164.4,365.2,546.0,382.0,True,151.7,176.0,0,0,60.0,K200s with Runna ‚úÖ\n\nKilometr√≥wki z dwusetkam...,765.0,Garmin Forerunner 970,51.107177,17.123797,a15855640218,g24134620,1179.0
1,15843349072,9km Easy RunüëΩ,2025-09-17 16:08:34,2025-09-17 18:08:34,Etc/GMT-2.0,9051.8,3095,0 days 00:51:35,3142,0 days 00:52:22,16.0,114.8,123.0,Run,Run,,0,6,0,1,0,False,False,False,everyone,2.925,5:42,5.698006,4.12,4:03,4.045307,169.4,367.5,513.0,364.0,True,145.1,152.0,0,0,41.0,9km Easy Run with Runna ‚úÖ\n\nWyjƒÖtkowo ≈ºwawe e...,699.0,Garmin Forerunner 970,51.107162,17.123739,a15843349072,g24134620,1179.0
2,15831049874,Afternoon Weight Training,2025-09-16 15:01:07,2025-09-16 17:01:07,Etc/GMT-2.0,0.0,3825,0 days 01:03:45,3825,0 days 01:03:45,0.0,0.0,0.0,Workout,WeightTraining,,0,5,1,1,0,True,False,False,followers_only,0.0,,,0.0,,,,,,,True,94.0,222.0,0,0,9.0,Reska8Ô∏è‚É£8Ô∏è‚É£\nBench press PR: 85kgü•≥,254.0,Garmin Forerunner 970,,,a15831049874,,
3,15820198827,Tempo 2kmü•µ,2025-09-15 16:23:21,2025-09-15 18:23:21,Etc/GMT-2.0,9521.9,3241,0 days 00:54:01,3241,0 days 00:54:01,13.0,115.4,125.0,Run,Run,3.0,0,8,0,1,0,False,False,False,everyone,2.938,5:40,5.672793,4.94,3:22,3.373819,167.8,353.5,493.0,369.0,True,153.1,178.0,0,0,66.0,Tempo 2km Repeats with Runna ‚úÖ\n\nWysz≈Ço troch...,735.0,Garmin Forerunner 970,51.107301,17.124098,a15820198827,g24134620,1179.0
4,15805849875,15km Long Run‚òîÔ∏è,2025-09-14 09:59:25,2025-09-14 11:59:25,Etc/GMT-2.0,15059.0,5461,0 days 01:31:01,5488,0 days 01:31:28,31.0,114.6,125.4,Run,Run,2.0,0,4,0,1,0,False,False,False,everyone,2.758,6:03,6.043026,3.44,4:51,4.844961,172.0,335.5,455.0,334.0,True,144.8,153.0,0,0,66.0,15km Long Run with Runna ‚úÖ\n\nOkrutny beton po...,1170.0,Garmin Forerunner 970,51.107336,17.124136,a15805849875,g24134620,1179.0


### Setup `gold.dim_calendar`

In [153]:
activities_df['start_date_local_dt'].dt.date.min()

datetime.date(2016, 7, 11)

In [159]:
dim_calendar_df = pd.DataFrame(
  {'date_key': pd.date_range(
    activities_df['start_date_local_dt'].dt.date.min(), 
    activities_df['start_date_local_dt'].dt.date.max(), 
    freq="D")}
)
dim_calendar_df = dim_calendar_df.sort_values(by='date_key', ascending=False).reset_index(drop=True)

In [161]:
#Year
dim_calendar_df['year'] = dim_calendar_df['date_key'].dt.year
dim_calendar_df['year_start_date'] = dim_calendar_df['date_key'].dt.to_period('Y').dt.start_time

# Month
dim_calendar_df['month'] = dim_calendar_df['date_key'].dt.month
dim_calendar_df['month_year'] = dim_calendar_df['date_key'].dt.to_period('M')
dim_calendar_df['month_start_date'] = dim_calendar_df['date_key'].dt.to_period('M').dt.start_time
dim_calendar_df['month_name_year'] = dim_calendar_df['date_key'].dt.strftime('%b-%Y')
dim_calendar_df['month_name'] = dim_calendar_df['date_key'].dt.strftime('%B') 

# Week
dim_calendar_df['week'] = dim_calendar_df['date_key'].dt.isocalendar().week
dim_calendar_df['week_start_date'] = dim_calendar_df['date_key'].dt.to_period('W-MON').dt.start_time

# Day
dim_calendar_df['day'] = dim_calendar_df['date_key'].dt.day
dim_calendar_df['day_of_year'] = dim_calendar_df['date_key'].dt.day_of_year
dim_calendar_df['day_of_week'] = dim_calendar_df['date_key'].dt.weekday + 1
dim_calendar_df['day_of_week_name'] = dim_calendar_df['date_key'].dt.day_name()
dim_calendar_df['is_weekend'] = dim_calendar_df['date_key'].dt.weekday + 1 >= 6

dim_calendar_df

Unnamed: 0,date_key,year,year_start_date,month,month_year,month_start_date,month_name_year,month_name,week,week_start_date,day,day_of_year,day_of_week,day_of_week_name,is_weekend
0,2025-09-18,2025,2025-01-01,9,2025-09,2025-09-01,Sep-2025,September,38,2025-09-16,18,261,4,Thursday,False
1,2025-09-17,2025,2025-01-01,9,2025-09,2025-09-01,Sep-2025,September,38,2025-09-16,17,260,3,Wednesday,False
2,2025-09-16,2025,2025-01-01,9,2025-09,2025-09-01,Sep-2025,September,38,2025-09-16,16,259,2,Tuesday,False
3,2025-09-15,2025,2025-01-01,9,2025-09,2025-09-01,Sep-2025,September,38,2025-09-09,15,258,1,Monday,False
4,2025-09-14,2025,2025-01-01,9,2025-09,2025-09-01,Sep-2025,September,37,2025-09-09,14,257,7,Sunday,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3352,2016-07-15,2016,2016-01-01,7,2016-07,2016-07-01,Jul-2016,July,28,2016-07-12,15,197,5,Friday,False
3353,2016-07-14,2016,2016-01-01,7,2016-07,2016-07-01,Jul-2016,July,28,2016-07-12,14,196,4,Thursday,False
3354,2016-07-13,2016,2016-01-01,7,2016-07,2016-07-01,Jul-2016,July,28,2016-07-12,13,195,3,Wednesday,False
3355,2016-07-12,2016,2016-01-01,7,2016-07,2016-07-01,Jul-2016,July,28,2016-07-12,12,194,2,Tuesday,False
