In [0]:
import dlt
import json
from pyspark.sql.functions import col

# Zmapování katalogu a schématu
spark.sql("USE CATALOG principal_lab_db")
spark.sql("USE SCHEMA dev_silver")

# Načtu si metadata z lookup tabulky
config = spark.table("principal_lab_db.config_dev.table_lookup") \
    .filter(col("table_name") == "dim_policies") \
    .select("keys", "scd_type", "description") \
    .first()

# Parsování dat
keys_raw = config["keys"]
scd_type_raw = config["scd_type"]
description = config["description"]

# Pokud je 'keys' uložen jako JSON = parsování
if isinstance(keys_raw, str):
    business_keys = json.loads(keys_raw)
else:
    business_keys = keys_raw

# Mapování - potřebuji INT
scd_type_map = {
    "SCD1": 1,
    "SCD2": 2,
    "1": 1,
    "2": 2,
    None: 0,
    "": 0
}
scd_type = scd_type_map.get(str(scd_type_raw).upper(), 0)

print(f"Business Keys: {business_keys}")
print(f"SCD Type: {scd_type}")

# Vytvořím si view před napočtení SCD2
@dlt.view(name="policies_bronze_clean")
def policies_bronze_clean():
    return spark.readStream.table("principal_lab_db.dev_bronze.policies_bronze")

# Vytvoření Silver tabulky jako SCD2
if scd_type == 2:
    dlt.create_streaming_table(
        name="dim_policies",
        comment=description,
        table_properties={"quality": "silver"}
    )

    dlt.apply_changes(
        target="dim_policies",
        source="policies_bronze_clean",
        keys=business_keys,
        sequence_by=col("snapshot_date"),
        ignore_null_updates=False,
        stored_as_scd_type="2",
        track_history_except_column_list=["ingestion_ts", "source_file", "snapshot_date"]
    )
else:
    raise ValueError(f"Unsupported or missing SCD type: {scd_type_raw}")
