In [0]:
%run ../_config

In [0]:
# Load the Silver table from Unity Catalog into the silver_df variable
silver_df = spark.read.table(f"{catalog}.clickstream_silver.silver_clickstream")

In [0]:
from pyspark.sql.functions import col, count, countDistinct, hour, dayofweek, date_format

# 1. DIM_DATE
dim_date = (silver_df
    .selectExpr("CAST(event_timestamp AS DATE) AS date_key")
    .distinct()
    .withColumn("day_name", date_format(col("date_key"), "EEEE"))
    .withColumn("month_name", date_format(col("date_key"), "MMMM"))
    .withColumn("day_of_week", dayofweek(col("date_key")))
    .withColumn("is_weekend", col("day_of_week").isin(1, 7))
)
dim_date.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(f"{catalog}.clickstream_gold.dim_date")

# 2. DIM_USERS
dim_users = silver_df.select("user_id_hashed", "email_hashed").dropDuplicates(["user_id_hashed"])
dim_users.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(f"{catalog}.clickstream_gold.dim_users")

# 3. DIM_GEO
dim_geo = silver_df.select("city", "country").dropDuplicates()
dim_geo.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(f"{catalog}.clickstream_gold.dim_geo")

# 4. FACT_WEB_EVENTS
fact_web_events = (silver_df
    .selectExpr(
        "event_id",
        "CAST(event_timestamp AS DATE) AS date_key", 
        "user_id_hashed",
        "city",
        "country",
        "clean_page_url",
        "utm_source"
    )
)
fact_web_events.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(f"{catalog}.clickstream_gold.fact_web_events")

# 5. FIX NULLABILITY (The "Secret Sauce")
# This tells the catalog to change the column metadata to NOT NULL.
spark.sql(f"ALTER TABLE {catalog}.clickstream_gold.dim_date ALTER COLUMN date_key SET NOT NULL")
spark.sql(f"ALTER TABLE {catalog}.clickstream_gold.dim_users ALTER COLUMN user_id_hashed SET NOT NULL")
spark.sql(f"ALTER TABLE {catalog}.clickstream_gold.fact_web_events ALTER COLUMN event_id SET NOT NULL")

print("Tables created and nullability fixed. You can now run your ADD CONSTRAINT commands!")