# **Bronze to Silver Transformation â€” DimCalendar**

## Introduction
This notebook loads the Bronze AdventureWorks Calendar dataset, applies date-based transformations, adds derived fields, performs data quality checks, and writes the cleaned DimDate table to the Silver layer in Delta format.

In [1]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

StatementMeta(, 906315b2-7364-477f-9509-101022bb7b96, 3, Finished, Available, Finished)

In [2]:
# Load Bronze Data
df_calendar_Bronze = \
    spark.read.format("csv")\
    .option("header", True)\
    .option("inferSchema", True)\
    .load("abfss://MonoWS@onelake.dfs.fabric.microsoft.com/MonoLH_Bronze.Lakehouse/Files/Raw/AdventureWorks_Calendar/AdventureWorks_Calendar.csv")

# Preview dtype and data
df_calendar_Bronze.printSchema()
df_calendar_Bronze.show()

StatementMeta(, 906315b2-7364-477f-9509-101022bb7b96, 4, Finished, Available, Finished)

root
 |-- Date: string (nullable = true)

+---------+
|     Date|
+---------+
| 1/1/2015|
| 1/2/2015|
| 1/3/2015|
| 1/4/2015|
| 1/5/2015|
| 1/6/2015|
| 1/7/2015|
| 1/8/2015|
| 1/9/2015|
|1/10/2015|
|1/11/2015|
|1/12/2015|
|1/13/2015|
|1/14/2015|
|1/15/2015|
|1/16/2015|
|1/17/2015|
|1/18/2015|
|1/19/2015|
|1/20/2015|
+---------+
only showing top 20 rows



In [3]:
# Convert to date and add all date-related columns
df_calendar_Silver = df_calendar_Bronze \
    .withColumn("Date", to_date(col("Date"), "M/d/yyyy")) \
    .withColumn("Year", year(col("Date"))) \
    .withColumn("Month", month(col("Date"))) \
    .withColumn("Day", dayofmonth(col("Date"))) \
    .withColumn("Quarter", quarter(col("Date"))) \
    .withColumn("Month_Name", date_format(col("Date"), "MMMM")) \
    .withColumn("Month_Short", date_format(col("Date"), "MMM")) \
    .withColumn("Day_Name", date_format(col("Date"), "EEEE")) \
    .withColumn("Day_Short", date_format(col("Date"), "EEE")) \
    .withColumn("Month_Year", date_format(col("Date"), "MMM-yyyy")) \
    .withColumn("Week_of_Year", weekofyear(col("Date"))) \
    .withColumn("Day_of_Week", dayofweek(col("Date"))) \
    .withColumn("Year_Month", concat(col("Year"), lit("-"), col("Month"))) \
    .withColumn("Is_Weekend", col("Day_of_Week").isin([1, 7]).cast("boolean"))

# Display the result
df_calendar_Silver.printSchema()
df_calendar_Silver.show(10, truncate=False)

StatementMeta(, 906315b2-7364-477f-9509-101022bb7b96, 5, Finished, Available, Finished)

root
 |-- Date: date (nullable = true)
 |-- Year: integer (nullable = true)
 |-- Month: integer (nullable = true)
 |-- Day: integer (nullable = true)
 |-- Quarter: integer (nullable = true)
 |-- Month_Name: string (nullable = true)
 |-- Month_Short: string (nullable = true)
 |-- Day_Name: string (nullable = true)
 |-- Day_Short: string (nullable = true)
 |-- Month_Year: string (nullable = true)
 |-- Week_of_Year: integer (nullable = true)
 |-- Day_of_Week: integer (nullable = true)
 |-- Year_Month: string (nullable = true)
 |-- Is_Weekend: boolean (nullable = true)

+----------+----+-----+---+-------+----------+-----------+---------+---------+----------+------------+-----------+----------+----------+
|Date      |Year|Month|Day|Quarter|Month_Name|Month_Short|Day_Name |Day_Short|Month_Year|Week_of_Year|Day_of_Week|Year_Month|Is_Weekend|
+----------+----+-----+---+-------+----------+-----------+---------+---------+----------+------------+-----------+----------+----------+
|2015-01-01|2015

In [4]:
# Data Quality Checks
df_calendar_Silver.groupBy("Date") \
    .count() \
    .filter(col("count") > 1) \
    .show()

StatementMeta(, 906315b2-7364-477f-9509-101022bb7b96, 6, Finished, Available, Finished)

+----+-----+
|Date|count|
+----+-----+
+----+-----+



In [5]:
# Write Cleaned DimDate Table to Silver Layer (Delta Format)

df_calendar_Silver.write\
    .mode("append")\
    .format("delta")\
    .saveAsTable("DimCalendar")

StatementMeta(, 906315b2-7364-477f-9509-101022bb7b96, 7, Finished, Available, Finished)

In [6]:
# Verify if Silver write Succeeded
df_dimdate_silver_check = spark.read.table("DimCalendar")
df_dimdate_silver_check.show(5)
df_dimdate_silver_check.printSchema()


StatementMeta(, 906315b2-7364-477f-9509-101022bb7b96, 8, Finished, Available, Finished)

+----------+----+-----+---+-------+----------+-----------+--------+---------+----------+------------+-----------+----------+----------+
|      Date|Year|Month|Day|Quarter|Month_Name|Month_Short|Day_Name|Day_Short|Month_Year|Week_of_Year|Day_of_Week|Year_Month|Is_Weekend|
+----------+----+-----+---+-------+----------+-----------+--------+---------+----------+------------+-----------+----------+----------+
|2015-01-01|2015|    1|  1|      1|   January|        Jan|Thursday|      Thu|  Jan-2015|           1|          5|    2015-1|     false|
|2015-01-02|2015|    1|  2|      1|   January|        Jan|  Friday|      Fri|  Jan-2015|           1|          6|    2015-1|     false|
|2015-01-03|2015|    1|  3|      1|   January|        Jan|Saturday|      Sat|  Jan-2015|           1|          7|    2015-1|      true|
|2015-01-04|2015|    1|  4|      1|   January|        Jan|  Sunday|      Sun|  Jan-2015|           1|          1|    2015-1|      true|
|2015-01-05|2015|    1|  5|      1|   January|  