# **Bronze to Silver Transformation - FactReturns**

## Introduction
This notebook loads the AdventureWorks Returns dataset from the Bronze layer, applies cleaning, type conversions, optional enrichment, DQ checks and writes the resulting table to the Silver layer as a managed Delta table named
`FactReturns`.

In [1]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

StatementMeta(, 9efc9766-a2ce-4c4e-a406-5129cb4214f3, 3, Finished, Available, Finished)

In [2]:
# Load Bronze Data
df_returns_Bronze = \
    spark.read.format("csv")\
    .option("header", True)\
    .option("inferScheme", True)\
    .load("abfss://MonoWS@onelake.dfs.fabric.microsoft.com/MonoLH_Bronze.Lakehouse/Files/Raw/AdventureWorks_Returns/AdventureWorks_Returns.csv")

# Preview dtype and data
df_returns_Bronze.printSchema()
df_returns_Bronze.show()

StatementMeta(, 9efc9766-a2ce-4c4e-a406-5129cb4214f3, 4, Finished, Available, Finished)

root
 |-- ReturnDate: string (nullable = true)
 |-- TerritoryKey: string (nullable = true)
 |-- ProductKey: string (nullable = true)
 |-- ReturnQuantity: string (nullable = true)

+----------+------------+----------+--------------+
|ReturnDate|TerritoryKey|ProductKey|ReturnQuantity|
+----------+------------+----------+--------------+
| 1/18/2015|           9|       312|             1|
| 1/18/2015|          10|       310|             1|
| 1/21/2015|           8|       346|             1|
| 1/22/2015|           4|       311|             1|
|  2/2/2015|           6|       312|             1|
| 2/15/2015|           1|       312|             1|
| 2/19/2015|           9|       311|             1|
| 2/24/2015|           8|       314|             1|
|  3/8/2015|           8|       350|             1|
| 3/13/2015|           9|       350|             1|
| 3/14/2015|           4|       346|             1|
| 3/15/2015|           9|       340|             1|
| 3/22/2015|           4|       311|    

In [3]:
# Convert to date and int columns
df_returns_Silver = df_returns_Bronze \
    .withColumn("ReturnDate", to_date(col("ReturnDate"), "M/d/yyyy")) \
    .withColumn("TerritoryKey", col("TerritoryKey").cast("int")) \
    .withColumn("ProductKey", col("ProductKey").cast("int")) \
    .withColumn("ReturnQuantity", col("ReturnQuantity").cast("int"))

# Display the result
df_returns_Silver.printSchema()
df_returns_Silver.show(10, truncate=False)

StatementMeta(, 9efc9766-a2ce-4c4e-a406-5129cb4214f3, 5, Finished, Available, Finished)

root
 |-- ReturnDate: date (nullable = true)
 |-- TerritoryKey: integer (nullable = true)
 |-- ProductKey: integer (nullable = true)
 |-- ReturnQuantity: integer (nullable = true)

+----------+------------+----------+--------------+
|ReturnDate|TerritoryKey|ProductKey|ReturnQuantity|
+----------+------------+----------+--------------+
|2015-01-18|9           |312       |1             |
|2015-01-18|10          |310       |1             |
|2015-01-21|8           |346       |1             |
|2015-01-22|4           |311       |1             |
|2015-02-02|6           |312       |1             |
|2015-02-15|1           |312       |1             |
|2015-02-19|9           |311       |1             |
|2015-02-24|8           |314       |1             |
|2015-03-08|8           |350       |1             |
|2015-03-13|9           |350       |1             |
+----------+------------+----------+--------------+
only showing top 10 rows



In [4]:
# Write DimReturns Table to Silver Layer (Delta Format)

df_returns_Silver.write\
    .mode("append")\
    .format("delta")\
    .saveAsTable("DimReturns")

StatementMeta(, 9efc9766-a2ce-4c4e-a406-5129cb4214f3, 6, Finished, Available, Finished)

In [5]:
# Verify if Silver write Succeeded
df_returns_Silver_check = spark.read.table("DimReturns")
df_returns_Silver_check.show(5)
df_returns_Silver_check.printSchema()

StatementMeta(, 9efc9766-a2ce-4c4e-a406-5129cb4214f3, 7, Finished, Available, Finished)

+----------+------------+----------+--------------+
|ReturnDate|TerritoryKey|ProductKey|ReturnQuantity|
+----------+------------+----------+--------------+
|2015-01-18|           9|       312|             1|
|2015-01-18|          10|       310|             1|
|2015-01-21|           8|       346|             1|
|2015-01-22|           4|       311|             1|
|2015-02-02|           6|       312|             1|
+----------+------------+----------+--------------+
only showing top 5 rows

root
 |-- ReturnDate: date (nullable = true)
 |-- TerritoryKey: integer (nullable = true)
 |-- ProductKey: integer (nullable = true)
 |-- ReturnQuantity: integer (nullable = true)

