# Init

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

# Read from Bronze table

In [0]:
df = spark.table("workspace.bronze.erp_px_cat_g1v2")

df.display()

# Trimming

In [0]:
df1 = df
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df1 = df1.withColumn(field.name, trim(field.name))

display(df1)

# Normalize maintenance flag to boolean

In [0]:
df2 = df1.withColumn("maintenance", 
                     when(col("maintenance") == 'Yes', bool(True))
                     .when(col("maintenance") == 'No', bool(False))
                     .otherwise(col("MAINTENANCE"))
                     )
display(df2)

# Renaming the columns

In [0]:
rename_map = {
    "ID": "category_id",
    "CAT": "category",
    "SUBCAT": "subcategory",
    "MAINTENANCE": "maintenance_flag"
}

df3 = df2

for old_name, new_name in rename_map.items():
    df3 = df3.withColumnRenamed(old_name, new_name)

display(df3)

# Write to Silver Table

In [0]:
df3.write.mode("overwrite").saveAsTable("workspace.silver.erp_product_category")

# Sanity check for the table

In [0]:
%sql
select * from workspace.silver.erp_product_category limit 10