In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, expr, sequence
from pyspark.sql.types import DateType

# Initialize Spark session
spark = SparkSession.builder.appName("DateDimensionLoad").getOrCreate()

# Azure Storage connection details
storage_account_name = "cdmo"
storage_account_key = "XXXXXXXXXXXXX"

# Set Spark configuration for Azure Blob Storage
spark.conf.set(f"fs.azure.account.key.{storage_account_name}.blob.core.windows.net", storage_account_key)

# Define the Gold layer container path
gold_layer_path = f"abfss://03-gold@{storage_account_name}.dfs.core.windows.net/date_dimension"

# Create a function to generate the date dimension DataFrame
def create_date_dimension(start_date, end_date):
    # Generate a sequence of dates between start_date and end_date
    date_df = spark.sql(f"""
        SELECT explode(sequence(to_date('{start_date}'), to_date('{end_date}'), interval 1 day)) as date
    """)
    
# Add additional date-related columns
    date_dimension_df = (
        date_df.withColumn("Year", expr("year(date)"))
               .withColumn("Month", expr("month(date)"))
               .withColumn("Day", expr("day(date)"))
               .withColumn("DayOfWeek", expr("date_format(date, 'EEEE')"))
               .withColumn("Quarter", expr("quarter(date)"))
               .withColumn("WeekOfYear", expr("weekofyear(date)"))
               .withColumn("IsWeekend", expr("case when date_format(date, 'EEEE') IN ('Saturday', 'Sunday') then true else false end"))
               .withColumn("IsHoliday", lit(False))  # Placeholder, update for specific holidays
               .withColumn("MonthName", expr("date_format(date, 'MMMM')"))
               .withColumn("DayOfMonth", expr("dayofmonth(date)"))
               .withColumn("DayOfYear", expr("dayofyear(date)"))
    )
    return date_dimension_df

# Define start and end dates
start_date = "2000-01-01"  # Replace with your desired start date
end_date = "2099-12-31"    # Replace with your desired end date

# Create the date dimension DataFrame
date_dimension_df = create_date_dimension(start_date, end_date)

# Write the DataFrame to the Delta table in the Gold layer
try:
    date_dimension_df.write.format("delta").mode("overwrite").save(gold_layer_path)
    print(f"Date dimension successfully written to: {gold_layer_path}")
except Exception as e:
    print(f"Error writing to Gold layer: {e}")


In [0]:
# Define the path to the date dimension table in the Gold layer
gold_layer_path = f"abfss://03-gold@{storage_account_name}.dfs.core.windows.net/date_dimension"

# Read the Delta table from the Gold layer
try:
    date_dimension_df = spark.read.format("delta").load(gold_layer_path)
    print(f"Successfully loaded date dimension from: {gold_layer_path}")
    
    # Display the first 10 rows of the DataFrame
    date_dimension_df.display()
except Exception as e:
    print(f"Error loading date dimension: {e}")
