## Transform Customer Data
1. Remove records with NULL customer_id 
1. Remove exact duplicate records
1. Remove duplicate records based on created_timestamp
1. CAST the columns to the correct Data Type
1. Write transformed data to the Silver schema

### 1. Remove records with NULL customer_id

In [0]:
use catalog pavan_catalog_all;
use schema bronze;

In [0]:
-- create or replace view 
--   pavan_catalog_all.bronze.v_customers
-- AS
-- SELECT * from bronze.bronze_customers

In [0]:
SELECT * 
 FROM pavan_catalog_all.bronze.v_customers
WHERE customer_id IS NOT NULL; 

### 2. Remove exact duplicate records

In [0]:
SELECT * 
 FROM pavan_catalog_all.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id; 

In [0]:
SELECT DISTINCT * 
 FROM pavan_catalog_all.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id; 

In [0]:
SELECT customer_id,
      MAX(created_timestamp),
      MAX(customer_name),
      MAX(date_of_birth),
      MAX(email),
      MAX(member_since),
      MAX(telephone)
 FROM pavan_catalog_all.bronze.v_customers
WHERE customer_id IS NOT NULL
GROUP BY customer_id
ORDER BY customer_id; 

In [0]:
CREATE OR REPLACE TEMPORARY VIEW v_customers_distinct
AS
SELECT DISTINCT * 
 FROM pavan_catalog_all.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id; 

### 3. Remove duplicate records based on created_timestamp

In [0]:
SELECT customer_id,
       MAX(created_timestamp) AS max_created_timestamp
 FROM v_customers_distinct
GROUP BY customer_id;

In [0]:
WITH cte_max AS 
(
  SELECT customer_id,
       MAX(created_timestamp) AS max_created_timestamp
  FROM v_customers_distinct
  GROUP BY customer_id
)
SELECT t.*
  FROM v_customers_distinct t
  JOIN cte_max m 
    ON t.customer_id = m.customer_id 
    AND t.created_timestamp = m.max_created_timestamp;

### 4. CAST the column values to the correct data type

In [0]:
WITH cte_max AS 
(
  SELECT customer_id,
       MAX(created_timestamp) AS max_created_timestamp
  FROM v_customers_distinct
  GROUP BY customer_id
)
SELECT CAST(t.created_timestamp AS TIMESTAMP) AS created_timestamp,
       t.customer_id,
       t.customer_name,
       CAST(t.date_of_birth AS DATE) AS date_of_birth,
       t.email,
       CAST(t.member_since AS DATE) AS member_since,
       t.telephone
  FROM v_customers_distinct t
  JOIN cte_max m 
    ON t.customer_id = m.customer_id 
    AND t.created_timestamp = m.max_created_timestamp;

### 5. Write Data to a Delta Table

In [0]:
CREATE TABLE pavan_catalog_all.silver.customers
AS
WITH cte_max AS 
(
  SELECT customer_id,
       MAX(created_timestamp) AS max_created_timestamp
  FROM v_customers_distinct
  GROUP BY customer_id
)
SELECT CAST(t.created_timestamp AS TIMESTAMP) AS created_timestamp,
       t.customer_id,
       t.customer_name,
       CAST(t.date_of_birth AS DATE) AS date_of_birth,
       t.email,
       CAST(t.member_since AS DATE) AS member_since,
       t.telephone
  FROM v_customers_distinct t
  JOIN cte_max m 
    ON t.customer_id = m.customer_id 
    AND t.created_timestamp = m.max_created_timestamp;

In [0]:
SELECT * FROM pavan_catalog_all.silver.customers;

In [0]:
DESCRIBE EXTENDED pavan_catalog_all.silver.customers;