## Date Dimension
###### * Create a Dim_Date dimension table
###### * Parameterize Start and End date based on orders_gold Min and Max dates
###### * Create a surrogate gate for use in the Fact_Sales table

In [0]:
# Import modules
from pyspark.sql.functions import (
    col, lit, year, month, dayofmonth, dayofweek, weekofyear,
    quarter, date_format, when, current_date, date_trunc, to_date, datediff, floor, trunc, months_between
)

from datetime import date

from pyspark.sql.types import IntegerType, DateType
from delta.tables import DeltaTable


#### Parameterize Start Date and End Date

In [0]:
# Read Min and Max dates from orders_gold table
df_date_range = spark.sql(
                """
                    SELECT
                        min(order_date) as min_order_date,
                        max(order_date) as max_order_date
                    FROM
                        gold_dev.global_mart_retail.orders_gold
                """
            ).collect()[0]

min_order_date = date(df_date_range['min_order_date'].year, 1, 1)
max_order_date = date(df_date_range['max_order_date'].year + 1, 12, 31)

print(f"Min Order Date: {min_order_date}")
print(f"Max Order Date: {max_order_date}")


#### Generate a continuous date column

In [0]:
# Create dim_date
df_date = spark.sql(
  f"""
    SELECT explode(
        sequence(
          to_date('{min_order_date}'),
          to_date('{max_order_date}'),
          INTERVAL 1 DAY
        )
    ) AS date
  """
)

                       

#### Add standard date table attributes

In [0]:
df_date = (
    df_date
    .withColumn('date_key', date_format(col('date'), "yyyyMMdd").cast(IntegerType()))
    .withColumn('year', year(col('date')))
    .withColumn('quarter', quarter(col('date')))
    .withColumn('quarter_name', date_format(col("date"), "QQQ"))
    .withColumn('month', month(col('date')))
    .withColumn('month_name', date_format(col('date'), "MMMM"))
    .withColumn('month_short', date_format(col("date"), "MMM"))
    .withColumn('week_of_year', weekofyear(col('date')))
    .withColumn('day', dayofmonth(col('date')))
    .withColumn('day_name', date_format(col('date'), "EEEE"))
    .withColumn('day_short', date_format(col("date"), "EEE"))
    .withColumn('day_of_week', dayofweek(col('date')))
    .withColumn('is_weekend', col('day_of_week').isin(1, 7))
)


#### Add Offset columns 
###### * Useful for calculating Time Intelligence measures in Power BI

In [0]:
# Add Current Week, Month and Year Offsets
current_week_start = date_trunc("week", current_date())
current_month_start = trunc(current_date(), "month")
current_year = year(current_date())

df_date = (
  df_date
  # Week Offset
  .withColumn(
    'current_week_offset',
    floor( 
    datediff(
      date_trunc('week', col('date')),
      current_week_start)
    ) / 7
  )

  # Month Offset
  .withColumn(
    "current_month_offset",
    floor(
      months_between(
        trunc(col("date"), "month"),
        current_month_start
      )
    )
  )

  # Year Offset
  .withColumn(
    "current_year_offset",
    year(col("date")) - current_year
  )
)


#### Save as Gold Dim_Date Delta Table

In [0]:
# Save as delta table
gold_dim_date_table = "gold_dev.global_mart_retail.dim_date"
(
    df_date
    .write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable(gold_dim_date_table)
)
