# **Bronze to Silver Transformation â€” DimTerritories**


## Introduction
This notebook loads the AdventureWorks Territories dataset from the Bronze layer, cleans and standardizes the data, and outputs a curated Silver-layer table named DimTerritory.

In [1]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

StatementMeta(, 6bc189f6-b20f-47d9-8084-b05e49696c31, 3, Finished, Available, Finished)

In [2]:
# Load Bronze Data
df_territories_Bronze = \
    spark.read.format("csv")\
    .option("header", True)\
    .option("inferScheme", True)\
    .load("abfss://MonoWS@onelake.dfs.fabric.microsoft.com/MonoLH_Bronze.Lakehouse/Files/Raw/AdventureWorks_Territories/AdventureWorks_Territories.csv")


# Preview dtype and data
df_territories_Bronze.printSchema()
df_territories_Bronze.show()

StatementMeta(, 6bc189f6-b20f-47d9-8084-b05e49696c31, 4, Finished, Available, Finished)

root
 |-- SalesTerritoryKey: string (nullable = true)
 |-- Region: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Continent: string (nullable = true)

+-----------------+--------------+--------------+-------------+
|SalesTerritoryKey|        Region|       Country|    Continent|
+-----------------+--------------+--------------+-------------+
|                1|     Northwest| United States|North America|
|                2|     Northeast| United States|North America|
|                3|       Central| United States|North America|
|                4|     Southwest| United States|North America|
|                5|     Southeast| United States|North America|
|                6|        Canada|        Canada|North America|
|                7|        France|        France|       Europe|
|                8|       Germany|       Germany|       Europe|
|                9|     Australia|     Australia|      Pacific|
|               10|United Kingdom|United Kingdom|       Eur

In [3]:
# Transform Territories Data

df_territories_Silver = (
    df_territories_Bronze
        # Cast key column
        .withColumn("SalesTerritoryKey", col("SalesTerritoryKey").cast("int"))
        
        # Drop duplicate keys
        .dropDuplicates(["SalesTerritoryKey"])
)

df_territories_Silver.printSchema()
df_territories_Silver.show()

StatementMeta(, 6bc189f6-b20f-47d9-8084-b05e49696c31, 5, Finished, Available, Finished)

root
 |-- SalesTerritoryKey: integer (nullable = true)
 |-- Region: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Continent: string (nullable = true)

+-----------------+--------------+--------------+-------------+
|SalesTerritoryKey|        Region|       Country|    Continent|
+-----------------+--------------+--------------+-------------+
|                1|     Northwest| United States|North America|
|                2|     Northeast| United States|North America|
|                3|       Central| United States|North America|
|                4|     Southwest| United States|North America|
|                5|     Southeast| United States|North America|
|                6|        Canada|        Canada|North America|
|                7|        France|        France|       Europe|
|                8|       Germany|       Germany|       Europe|
|                9|     Australia|     Australia|      Pacific|
|               10|United Kingdom|United Kingdom|       Eu

In [4]:
# Write DimTerritories Table to Silver Layer (Delta Format)

df_territories_Silver.write\
    .mode("append")\
    .format("delta")\
    .saveAsTable("DimTerritories")

StatementMeta(, 6bc189f6-b20f-47d9-8084-b05e49696c31, 6, Finished, Available, Finished)

In [5]:
df_territories_Silver_check = spark.read.table("DimTerritories")
df_territories_Silver_check.show(5)
df_territories_Silver_check.printSchema()

StatementMeta(, 6bc189f6-b20f-47d9-8084-b05e49696c31, 7, Finished, Available, Finished)

+-----------------+---------+-------------+-------------+
|SalesTerritoryKey|   Region|      Country|    Continent|
+-----------------+---------+-------------+-------------+
|                1|Northwest|United States|North America|
|                2|Northeast|United States|North America|
|                3|  Central|United States|North America|
|                4|Southwest|United States|North America|
|                5|Southeast|United States|North America|
+-----------------+---------+-------------+-------------+
only showing top 5 rows

root
 |-- SalesTerritoryKey: integer (nullable = true)
 |-- Region: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Continent: string (nullable = true)

