In [0]:
df_customer = spark.sql("""
WITH deduplicated AS (
  SELECT *
  FROM (
    SELECT
      CASE
        WHEN customer_id = 'nan' THEN sha2(
          concat_ws(coalesce(first__name, '-1')
          ,coalesce(last__name, '-1') 
          ,coalesce(cell_phone, '-1') 
          ,coalesce(email, '-1') ), 256)
        ELSE customer_id
      END           AS customer_id,
      first__name   AS first_name,
      last__name    AS last_name,
      cell_phone    AS phone,
      arm_status,
      CAST(CASE 
       WHEN LOWER(arm__last_bill_date) = 'nat' THEN NULL
       ELSE arm__last_bill_date
       END AS TIMESTAMP) AS arm_lastbilldate,
      CAST(CASE
        WHEN LOWER(arm__expires) = 'nat' THEN NULL
        ELSE arm__expires
        END AS TIMESTAMP)        AS arm_expires,
      CASE
        WHEN payment__token = 'nan' THEN NULL
        ELSE payment__token
      END               AS payment_token,
      ROW_NUMBER() OVER (PARTITION BY customer_id, cell_phone ORDER BY customer_id) AS rn
    FROM bronze.cleaned_customer_data
  ) t
  WHERE rn = 1
)

SELECT * EXCEPT (rn)
FROM deduplicated
""")
df_customer.display()


In [0]:
%sql
create schema silver

In [0]:
df_customer.write.format("delta").mode("overwrite").option('overwriteSchema', True).saveAsTable("silver.customer")

In [0]:
df_vehicle  = spark.sql("""
WITH filtered AS (
  SELECT
    CASE
        WHEN customer_id = 'nan' THEN sha2(
          concat_ws(coalesce(first__name, '-1')
          ,coalesce(last__name, '-1') 
          ,coalesce(cell_phone, '-1') 
          ,coalesce(email, '-1') ), 256)
        ELSE customer_id
      END           AS customer_id,
    extra_number_rfid as rfid
  FROM bronze.cleaned_customer_data
  WHERE extra_number_rfid IS NOT NULL AND extra_number_rfid != ''
),
deduplicated AS (
  SELECT *
  FROM (
    SELECT *,
           ROW_NUMBER() OVER (PARTITION BY customer_id, rfid ORDER BY customer_id) AS rn
    FROM filtered
  ) t
  WHERE rn = 1
)

SELECT * EXCEPT (rn)
FROM deduplicated""")
df_vehicle.display()

In [0]:
df_vehicle.write.format("delta").mode("overwrite").option('overwriteSchema', True).saveAsTable("silver.vehicle")

In [0]:
%sql
SELECT * FROM silver.customer
WHERE payment_token IS NULL;

In [0]:
%sql
SELECT * FROM silver.customer
WHERE arm_status = 'Active'
  AND (arm_lastbilldate < current_date() OR arm_expires < current_date());

In [0]:
%sql
SELECT * FROM silver.customer
WHERE arm_expires > current_date();