In [0]:
pip install pyspark

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, expr, lit

# ----------------------------------------------------------------------------------
# 1. SPARK CONFIGURATION & AZURE STORAGE AUTHENTICATION
# ----------------------------------------------------------------------------------

# Azure Storage account details (Using SAS Token)
STORAGE_ACCOUNT_NAME = "cdmo"  # Replace with your storage account name
SAS_TOKEN = "sv=2022-11-02&ss=bfqt&srt=sco&sp=rwdlacupyx&se=2025-02-01T13:22:40Z&st=2025-01-31T05:22:40Z&spr=https&sig=eNjMZTrl03xT4e2cf5nA2fmHglRbbQaFYgTnqWaECF4%3D"  # Replace with your SAS token
CONTAINER_NAME = "03-gold"  # Target container for Gold layer

# Initialize Spark session
spark = SparkSession.builder.appName("DateDimensionLoad").getOrCreate()

# Configure Spark to use the SAS Token for authentication via `wasbs://`
spark.conf.set(f"fs.azure.sas.{CONTAINER_NAME}.{STORAGE_ACCOUNT_NAME}.blob.core.windows.net", SAS_TOKEN)

# Define the Gold layer container path using `wasbs://`
gold_layer_path = f"wasbs://{CONTAINER_NAME}@{STORAGE_ACCOUNT_NAME}.blob.core.windows.net/date_dimension"

# ----------------------------------------------------------------------------------
# 2. GENERATE DATE DIMENSION DATAFRAME
# ----------------------------------------------------------------------------------

def create_date_dimension(start_date, end_date):
    """
    Generates a date dimension DataFrame from start_date to end_date.
    """
    date_df = spark.sql(f"""
        SELECT explode(sequence(to_date('{start_date}'), to_date('{end_date}'), interval 1 day)) as date
    """)

    date_dimension_df = (
        date_df.withColumn("Year", expr("year(date)"))
               .withColumn("Month", expr("month(date)"))
               .withColumn("Day", expr("day(date)"))
               .withColumn("DayOfWeek", expr("date_format(date, 'EEEE')"))
               .withColumn("Quarter", expr("quarter(date)"))
               .withColumn("WeekOfYear", expr("weekofyear(date)"))
               .withColumn("IsWeekend", expr("case when date_format(date, 'EEEE') IN ('Saturday', 'Sunday') then true else false end"))
               .withColumn("IsHoliday", lit(False))  # Placeholder for specific holidays
               .withColumn("MonthName", expr("date_format(date, 'MMMM')"))
               .withColumn("DayOfMonth", expr("dayofmonth(date)"))
               .withColumn("DayOfYear", expr("dayofyear(date)"))
    )
    return date_dimension_df

# ----------------------------------------------------------------------------------
# 3. CREATE AND WRITE DATE DIMENSION TO GOLD LAYER
# ----------------------------------------------------------------------------------

# Define start and end dates
start_date = "2000-01-01"
end_date = "2099-12-31"

# Create the date dimension DataFrame
date_dimension_df = create_date_dimension(start_date, end_date)

# Write the DataFrame to the Delta table in the Gold layer
try:
    date_dimension_df.write.format("delta").mode("overwrite").save(gold_layer_path)
    print(f"✅ Date dimension successfully written to: {gold_layer_path}")
except Exception as e:
    print(f"❌ Error writing to Gold layer: {e}")


✅ Date dimension successfully written to: wasbs://03-gold@cdmo.blob.core.windows.net/date_dimension


In [0]:
# Define the correct path to the date dimension table in the Gold layer
gold_layer_path = f"wasbs://03-gold@{STORAGE_ACCOUNT_NAME}.blob.core.windows.net/date_dimension"

# Read the Delta table from the Gold layer
try:
    date_dimension_df = spark.read.format("delta").load(gold_layer_path)
    print(f"✅ Successfully loaded date dimension from: {gold_layer_path}")

    # Display the first 10 rows of the DataFrame
    date_dimension_df.show(10)  # Use .show(10) instead of .display()
except Exception as e:
    print(f"❌ Error loading date dimension: {e}")


✅ Successfully loaded date dimension from: wasbs://03-gold@cdmo.blob.core.windows.net/date_dimension
+----------+----+-----+---+---------+-------+----------+---------+---------+---------+----------+---------+
|      date|Year|Month|Day|DayOfWeek|Quarter|WeekOfYear|IsWeekend|IsHoliday|MonthName|DayOfMonth|DayOfYear|
+----------+----+-----+---+---------+-------+----------+---------+---------+---------+----------+---------+
|2000-01-01|2000|    1|  1| Saturday|      1|        52|     true|    false|  January|         1|        1|
|2000-01-02|2000|    1|  2|   Sunday|      1|        52|     true|    false|  January|         2|        2|
|2000-01-03|2000|    1|  3|   Monday|      1|         1|    false|    false|  January|         3|        3|
|2000-01-04|2000|    1|  4|  Tuesday|      1|         1|    false|    false|  January|         4|        4|
|2000-01-05|2000|    1|  5|Wednesday|      1|         1|    false|    false|  January|         5|        5|
|2000-01-06|2000|    1|  6| Thursda