In [0]:
from silver.Transform_Functions.Trim import func_trim_col
from pyspark.sql import functions as F
from pyspark.sql.functions import trim,col

# READING FROM BRONZE TABLE

In [0]:
df = spark.table("workspace.bronze.crm_cust_info_raw")

# Data Transformation

## Trim whitespaces

In [0]:
df = func_trim_col(df)

## Check Column "cst_key" all value have prefix "AW"

In [0]:
nonMatch_prefix = (
    df.filter(F.col("cst_key").isNotNull())
    .filter(~F.col("cst_key").startswith("AW"))
    
)

### In this column values without "AW" prefix have null values in all other columns, so we will remove those rows.

In [0]:
df = df.filter(F.col("cst_key").isNull() | F.col("cst_key").startswith("AW"))

## Drop duplicates - Keeping unique customer id

In [0]:
if df.select("cst_id").distinct().count() != df.select("cst_id").count():
    print("Duplicates present...")
    print("Removing duplicate cst_id records...")
    df = df.dropDuplicates(["cst_id"])
else:
    print("No duplicates")

## Normalize Abbreviation

In [0]:
df = (
    df.withColumn(
        "cst_marital_status",
        F.when(F.upper(F.col("cst_marital_status")) == "M", "Married")
        .when(F.upper(F.col("cst_marital_status")) == "S", "Single")
        .otherwise("n/a")
    )
    .withColumn(
        "cst_gndr",
        F.when(F.upper(F.col("cst_gndr")) == "M", "Male")
        .when(F.upper(F.col("cst_gndr")) == "F", "Female")
        .otherwise("n/a")
    )
)

## Validate Data Type

In [0]:
df.printSchema()

## Renaming the columns

In [0]:
cust_table_header = {
    "cst_id": "customer_id",
    "cst_key": "customer_key",
    "cst_firstname": "first_name",
    "cst_lastname": "last_name",
    "cst_marital_status": "marital_status",
    "cst_gndr": "gender",
    "cst_create_date": "create_date"}

In [0]:
for old_header, new_header in cust_table_header.items():
    df = df.withColumnRenamed(old_header, new_header)

# Write into Silver Schema

In [0]:
(
    df.write.mode("overwrite").format("delta").saveAsTable("silver.crm_customers")
)


In [0]:
%sql
SELECT * FROM silver.crm_customers
LIMIT 5;