# Transform Customer Data
1. Remove records with NULL customer_id
2. Remove exact duplicate records
3. Remove duplicate records based on created_timestamp
4. CAST the columns to the correct Data Type
5. Write transformed data to the Silver schema

## 1. Remove records with NULL customer_id

In [0]:
%sql
SELECT * 
FROM gizmobox.bronze.v_customers
WHERE customer_id IS NOT NULL;

## 2. Remove exact duplicate records

In [0]:
%sql
SELECT * 
FROM gizmobox.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id;

In [0]:
%sql
SELECT DISTINCT * 
FROM gizmobox.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id;

In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW v_customers_distinct
AS 
SELECT DISTINCT * 
FROM gizmobox.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id;

In [0]:
%sql
SELECT customer_id, MAX(created_timestamp) AS max_created_timestamp
FROM v_customers_distinct 
GROUP BY customer_id;

In [0]:
%sql
WITH cte_max as 
(
  SELECT customer_id, MAX(created_timestamp) AS max_created_timestamp
  FROM v_customers_distinct 
  GROUP BY customer_id
)
SELECT t.* 
FROM v_customers_distinct t
JOIN cte_max m
ON t.customer_id = m.customer_id 
AND t.created_timestamp = m.max_created_timestamp;

## 3. Remove duplicate records based on created_timestamp

In [0]:
%sql
WITH cte_max as 
(
  SELECT customer_id, MAX(created_timestamp) AS max_created_timestamp
  FROM v_customers_distinct 
  GROUP BY customer_id
)
SELECT 
  CAST(t.created_timestamp AS timestamp) AS created_timestamp,
  t.customer_id,
  t.customer_name,
  CAST(t.date_of_birth AS date) AS date_of_birth,
  t.email,
  CAST(t.member_since AS date) AS member_since,
  t.telephone
FROM v_customers_distinct t
JOIN cte_max m
ON t.customer_id = m.customer_id 
AND t.created_timestamp = m.max_created_timestamp
;

## 4. CAST the columns to the correct Data Type

In [0]:
%sql
WITH cte_max as 
(
  SELECT customer_id, MAX(created_timestamp) AS max_created_timestamp
  FROM v_customers_distinct 
  GROUP BY customer_id
)
SELECT 
  CAST(t.created_timestamp AS timestamp) AS created_timestamp,
  t.customer_id,
  t.customer_name,
  CAST(t.date_of_birth AS date) AS date_of_birth,
  t.email,
  CAST(t.member_since AS date) AS member_since,
  t.telephone
FROM v_customers_distinct t
JOIN cte_max m
ON t.customer_id = m.customer_id 
AND t.created_timestamp = m.max_created_timestamp
;

## 5. Write transformed data to the Silver schema

In [0]:
%sql
DROP TABLE IF EXISTS gizmobox.silver.customers;
CREATE TABLE gizmobox.silver.customers
AS
WITH cte_max as 
(
  SELECT customer_id, MAX(created_timestamp) AS max_created_timestamp
  FROM v_customers_distinct 
  GROUP BY customer_id
)
SELECT 
  CAST(t.created_timestamp AS timestamp) AS created_timestamp,
  t.customer_id,
  t.customer_name,
  CAST(t.date_of_birth AS date) AS date_of_birth,
  t.email,
  CAST(t.member_since AS date) AS member_since,
  t.telephone
FROM v_customers_distinct t
JOIN cte_max m
ON t.customer_id = m.customer_id 
AND t.created_timestamp = m.max_created_timestamp
;

In [0]:
%sql
SELECT * FROM gizmobox.silver.customers;

In [0]:
%sql 
DESCRIBE EXTENDED gizmobox.silver.customers;