In [0]:
# Build dim_date from daily, monthly, and quarterly datasets

from pyspark.sql.functions import (
    year, month, dayofmonth, dayofweek, weekofyear,
    quarter, dayofyear, date_format
)
from pyspark.sql import SparkSession
from datetime import datetime
import time
from pyspark.sql.types import StructType, StructField, StringType, TimestampType, LongType

start_time = time.time()

# === Load unique dates from all frequencies ===
df_daily = spark.read.format("delta").load("/mnt/fred/daily").select("date")
df_monthly = spark.read.format("delta").load("/mnt/fred/monthly").select("date")
df_quarterly = spark.read.format("delta").load("/mnt/fred/quarterly").select("date")

dim_date = df_daily.union(df_monthly).union(df_quarterly).distinct()

# === Add calendar attributes ===
dim_date = dim_date \
    .withColumn("year", year("date")) \
    .withColumn("quarter", quarter("date")) \
    .withColumn("month", month("date")) \
    .withColumn("day", dayofmonth("date")) \
    .withColumn("day_of_week", dayofweek("date")) \
    .withColumn("week_of_year", weekofyear("date")) \
    .withColumn("day_of_year", dayofyear("date")) \
    .withColumn("is_weekend", (dayofweek("date").isin([1, 7])).cast("boolean")) \
    .withColumn("month_name", date_format("date", "MMMM")) \
    .withColumn("day_name", date_format("date", "EEEE")) \
    .withColumn("is_start_of_month", (dayofmonth("date") == 1).cast("boolean"))

# === Save as Delta Table ===
spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled", "true")

dim_date.write \
    .format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .save("/mnt/fred/dim_date")

# === Register in metastore ===
spark.sql("DROP TABLE IF EXISTS dim_date")
spark.sql("CREATE TABLE dim_date USING DELTA LOCATION '/mnt/fred/dim_date'")

# === Metadata Logging ===
end_time = time.time()
duration = int(end_time - start_time)
row_count = dim_date.count()

log_data = [( 
    "dim_date_builder",
    "dim_date",
    datetime.utcnow(),
    row_count,
    "success",
    duration,
    "utility"
)]

log_schema = StructType([
    StructField("job_name", StringType(), True),
    StructField("table_name", StringType(), True),
    StructField("run_date", TimestampType(), True),
    StructField("row_count", LongType(), True),
    StructField("status", StringType(), True),
    StructField("duration_sec", LongType(), True),
    StructField("frequency", StringType(), True)
])

log_df = spark.createDataFrame(log_data, schema=log_schema)

log_df.write \
    .format("delta") \
    .mode("append") \
    .partitionBy("frequency") \
    .save("/mnt/fred/logs/job_metadata")

# Register log for Fabric
spark.sql("DROP TABLE IF EXISTS log_dim_date")
spark.sql("CREATE TABLE log_dim_date USING DELTA LOCATION '/mnt/fred/logs/job_metadata'")