In [1]:
# ============================================
# nb_config â€“ Central Configuration Notebook
# ============================================

# Block 1a: Parameters & Constants
spark.conf.set("spark.sql.session.timeZone", "Europe/Amsterdam")
FULL_LOAD = False

# Dataset inclusion switches
is_included_energy = True
is_included_weather = True
is_included_air_quality = True

_SOURCE = "smart_city"
FILE_FORMAT = "json"
PRIM_COL_NAME = "_id"
SOURCE_COL_NAME = "_source"
IMPORTDATE_COL_NAME = "_datetime_import"

# Exact paths from your Lakehouse context
PATH_BRONZE = (
    "abfss://ce7fad14-2d0e-40e4-8c22-a560e4ffafd3"
    "@onelake.dfs.fabric.microsoft.com/"
    "880a2fb2-8672-4c97-a3a0-56f067dc2207/Files/bronze"
)
PATH_SILVER = (
    "abfss://ce7fad14-2d0e-40e4-8c22-a560e4ffafd3"
    "@onelake.dfs.fabric.microsoft.com/"
    "880a2fb2-8672-4c97-a3a0-56f067dc2207/Files/silver"
)
PATH_GOLD   = (
    "abfss://ce7fad14-2d0e-40e4-8c22-a560e4ffafd3"
    "@onelake.dfs.fabric.microsoft.com/"
    "880a2fb2-8672-4c97-a3a0-56f067dc2207/Files/gold"
)

# Block 1c: Constants
LKHS_PATH_SRC = f"{PATH_BRONZE}/{_SOURCE}/"
LKHS_PATH_DES = f"{PATH_SILVER}/{_SOURCE}/"

# Data Structures (Identical to Block 1a)
energy_config = {
    "source": {"src_directory": "energy_prices"},
    "simplify_structure": [
        {"method": "explode", "col": "Prices"}, 
        {"method": "flatten", "col": "Prices"}],
    "columns_to_select": ["_id", "_source", "_datetime_import", "time", "price"], 
    "primary_col": {
        "primary_col_name": "_id", 
        "lokaal_id_cols": ["time", "price"], 
        "objecttype": "energy_price"},
    "sink": {"sink_directory": "energy_prices"},
    "included": is_included_energy
}

weather_config = {
    "source": {"src_directory": "weather"},
    "simplify_structure": [
        {"method": "zip_explode", "parent": "hourly", "children": ["time", "temperature_2m", "wind_speed_10m", "direct_radiation"]}
        ],
    "columns_to_select": ["_id", "_source", "_datetime_import", "time", "temperature_2m", "wind_speed_10m", "direct_radiation"],
    "primary_col": {
        "primary_col_name": "_id", 
        "lokaal_id_cols": ["time"], 
        "objecttype": "weather_forecast"
        },
    "sink": {"sink_directory": "weather"},
    "included": is_included_weather
}

air_quality_config = {
    "source": {"src_directory": "air_quality"},
    "simplify_structure": [
        {"method": "zip_explode", "parent": "hourly", "children": ["time", "carbon_monoxide", "nitrogen_dioxide", "pm10"]}
        ],
    "columns_to_select": ["_id", "_source", "_datetime_import", "time", "carbon_monoxide", "nitrogen_dioxide", "pm10"],
    "primary_col": {
        "primary_col_name": "_id", 
        "lokaal_id_cols": ["time"], 
        "objecttype": "air_quality"
        },
    "sink": {"sink_directory": "air_quality"},
    "included": is_included_air_quality
}

# Combine and Filter
all_source_configs = {
    "energy_config": energy_config,
    "weather_config": weather_config,
    "air_quality_config": air_quality_config,
}

source_configs = {name: config for name, config in all_source_configs.items() if config.get("included", False)}

print("Config Loaded Successfully")



StatementMeta(, 3232fcf1-7dbb-4bc5-8289-905892c6167c, 3, Finished, Available, Finished)

Config Loaded Successfully
