# Transform Customers Data
1. Remove records with NULL customer_id
2. Remove exact duplicate records
3. Remove duplicate records based on created_timestamp
4. CAST the columns to the correct Data type
5. Write Transformed data into Silver Schema

### 1. Remove records with NULL customer_id

In [0]:
%sql
select * from gizmobox.bronze.v_customers
where customer_id is not null;

### 2. Remove exact duplicate records

In [0]:
select * from gizmobox.bronze.v_customers
where customer_id is not null
order by customer_id;
-- 60 records

In [0]:
select distinct * 
from gizmobox.bronze.v_customers
where customer_id is not null
order by customer_id;
-- 55 records, 5 exact duplicate records are deleted

### 3. Remove duplicate records based on created_timestamp

In [0]:
-- do not use below approach to remove duplicate records (not exact duplicates)
select customer_id,
        max(created_timestamp),
        max(customer_name),
        max(date_of_birth),
        max(email),
        max(member_since),
        max(telephone)
from gizmobox.bronze.v_customers
where customer_id is not null
group by customer_id
order by customer_id;
-- It gives 50 records but this is not accurate

### Use this approach to delete duplicates using created_timestamp

In [0]:
create or replace temporary view tv_customers_distinct
as
select distinct * 
from gizmobox.bronze.v_customers
where customer_id is not null
order by customer_id;

In [0]:
select customer_id,
        max(created_timestamp) as max_created_timestamp
from tv_customers_distinct
group by customer_id;
-- 50 records,

In [0]:
with cte_max as (
  select customer_id,
        max(created_timestamp) as max_created_timestamp
  from tv_customers_distinct
  group by customer_id
)
select t.*
from tv_customers_distinct t
join cte_max c
on t.customer_id = c.customer_id
and t.created_timestamp = c.max_created_timestamp;

### 4.CAST the columns to the correct Data type

In [0]:
with cte_max as (
  select customer_id,
        max(created_timestamp) as max_created_timestamp
  from tv_customers_distinct
  group by customer_id
)
select cast(t.created_timestamp as timestamp) as created_timestamp,
t.customer_id,
t.customer_name,
cast(t.date_of_birth as date) as date_of_birth,
t.email,
cast(t.member_since as date) as member_since,
t.telephone
from tv_customers_distinct t
join cte_max c
on t.customer_id = c.customer_id
and t.created_timestamp = c.max_created_timestamp;

### 5. Write Transformed data into Silver Schema

In [0]:
create table gizmobox.silver.customers
as
with cte_max as (
  select customer_id,
        max(created_timestamp) as max_created_timestamp
  from tv_customers_distinct
  group by customer_id
)
select cast(t.created_timestamp as timestamp) as created_timestamp,
t.customer_id,
t.customer_name,
cast(t.date_of_birth as date) as date_of_birth,
t.email,
cast(t.member_since as date) as member_since,
t.telephone
from tv_customers_distinct t
join cte_max c
on t.customer_id = c.customer_id
and t.created_timestamp = c.max_created_timestamp;

In [0]:
select * from gizmobox.silver.customers;
-- It has 50 records
-- All the data types are accurate as we developed above

In [0]:
describe extended gizmobox.silver.customers;