In [0]:
from datetime import datetime
from pyspark.sql import Row

def log_etl(step: str, status: str, message: str):
    log_df = spark.createDataFrame([Row(
        step=step,
        status=status,
        timestamp=datetime.now(),
        message=message
    )])
    log_df.write.format("delta").mode("append").saveAsTable("silver.log_etl")

In [0]:
%sql
select * from staging.test_customer_data

In [0]:
df_bronze = spark.sql("""
WITH step1 AS (
  SELECT 
    site_id,
    site__name,
    bus_addr1,
    bus_addr2,
    bus_city,
    bus_state,
    bus_zip_code,
    first__name,
    middle__name,
    last__name,
    address1,
    address2,
    city,
    state,
    zip_code,
    regexp_replace(home_phone, '[^0-9]', '') AS home_phone,
    regexp_replace(cell_phone, '[^0-9]', '') AS cell_phone,
    email,
    rfid,
    veh_desc,
    arm_p_lan,
    arm_status,
    arm__joined,
    arm__last_bill_date,
    arm__expires,
    customer_id,
    card_id,
    payment__token,
    token__authorizer__code,
    split(rfid, '\\|') AS rfid_array
  FROM staging.test_customer_data
),
step2 AS (
  SELECT
    site_id,
    site__name,
    bus_addr1,
    bus_addr2,
    bus_city,
    bus_state,
    bus_zip_code,
    first__name,
    middle__name,
    last__name,
    address1,
    address2,
    city,
    state,
    zip_code,
    home_phone,
    cell_phone,
    email,
    exploded_rfid AS extra_number_rfid,
    veh_desc,
    arm_p_lan,
    arm_status,
    arm__joined,
    arm__last_bill_date,
    arm__expires,
    customer_id,
    card_id,
    payment__token,
    token__authorizer__code
  FROM step1
  LATERAL VIEW posexplode(rfid_array) AS idx, exploded_rfid
)
SELECT *
FROM step2""")
df_bronze.display()


In [0]:
%sql
create schema if not exists bronze

In [0]:
try:
    df_bronze.write.format("delta").mode("overwrite").saveAsTable("bronze.cleaned_customer_data")
    log_etl("Load cleaned_customer_data", "SUCCESS", "cleaned_customer_data data successfully loaded into bronze.cleaned_customer_data")
except Exception as e:
    log_etl("Load cleaned_customer_data", "ERROR", f"Failed to load Customer data: {str(e)}")

In [0]:
%sql
select * from bronze.cleaned_customer_data