In [0]:
%sql
-- Regresando al estado inicial de orders (Borrando datos insertados)
DELETE FROM company.sales.orders
WHERE OrderDate >= '2024-02-11'

num_affected_rows
0


In [0]:
%sql
-- Limpiando la base de datos
DROP DATABASE IF EXISTS company.orders_dwh CASCADE

# DATAWAREHOUSING

In [0]:
%sql
CREATE DATABASE company.orders_dwh

### Stagging Layer

In [0]:
%sql
CREATE OR REPLACE TABLE company.orders_dwh.stg_sales (
    OrderID INT,
    OrderDate DATE,
    CustomerID INT,
    CustomerName VARCHAR(100),
    CustomerEmail VARCHAR(100),
    ProductID INT,
    ProductName VARCHAR(100),
    ProductCategory VARCHAR(50),
    RegionID INT,
    RegionName VARCHAR(50),
    Country VARCHAR(50),
    Quantity INT,
    UnitPrice DECIMAL(10,2),
    TotalAmount DECIMAL(10,2)
);


In [0]:
%sql
-- Initial Load
--INSERT INTO company.orders_dwh.stg_sales
CREATE OR REPLACE TABLE company.orders_dwh.stg_sales
AS
SELECT * FROM company.sales.orders
WHERE OrderDate > (SELECT COALESCE(MAX(OrderDate), '1900-01-01') FROM company.orders_dwh.stg_sales)

num_affected_rows,num_inserted_rows


### Transformation

In [0]:
%sql
CREATE OR REPLACE VIEW company.orders_dwh.vw_trans_sales
AS
SELECT 
    CAST(OrderID AS INT) AS order_id,
    CAST(OrderDate AS DATE) AS order_date,
    CAST(CustomerID AS INT) AS customer_id,
    CAST(CustomerName AS VARCHAR(100)) AS customer_name,
    CAST(CustomerEmail AS VARCHAR(100)) AS customer_email,
    CAST(ProductID AS INT) AS product_id,
    CAST(ProductName AS VARCHAR(100)) AS product_name,
    CAST(ProductCategory AS VARCHAR(50)) AS product_category,
    CAST(RegionID AS INT) AS region_id,
    CAST(RegionName AS VARCHAR(50)) AS region_name,
    CAST(Country AS VARCHAR(50)) AS country,
    CAST(Quantity AS INT) AS quantity,
    CAST(UnitPrice AS DECIMAL(10,2)) AS unit_price,
    CAST(TotalAmount AS DECIMAL(10,2)) AS total_amount
FROM company.orders_dwh.stg_sales
WHERE Quantity IS NOT NULL;

### Core Layer

#### Dim Customers

In [0]:
%sql
CREATE OR REPLACE TABLE company.orders_dwh.dim_customers
(
  dim_customers_key BIGINT GENERATED ALWAYS AS IDENTITY,
  customer_id INT,
  customer_name STRING,
  customer_email STRING
)

In [0]:
%sql
CREATE OR REPLACE VIEW company.orders_dwh.vw_dim_customers AS
SELECT DISTINCT
  customer_id,
  customer_name,
  customer_email
FROM company.orders_dwh.vw_trans_sales

In [0]:
%sql
MERGE INTO company.orders_dwh.dim_customers dc
USING company.orders_dwh.vw_dim_customers vdc ON dc.customer_id = vdc.customer_id
WHEN MATCHED THEN UPDATE SET
  dc.customer_name = vdc.customer_name,
  dc.customer_email = vdc.customer_email
WHEN NOT MATCHED THEN INSERT
  (customer_id, customer_name, customer_email)
VALUES
  (vdc.customer_id, vdc.customer_name, vdc.customer_email)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
7,0,0,7


#### Dim Products

In [0]:
%sql
CREATE OR REPLACE TABLE company.orders_dwh.dim_products
(
  dim_products_key BIGINT GENERATED ALWAYS AS IDENTITY,
  product_id INT,
  product_name STRING,
  product_category STRING
)

In [0]:
%sql
CREATE OR REPLACE VIEW company.orders_dwh.vw_dim_products
AS 
SELECT DISTINCT
  product_id,
  product_name,
  product_category
FROM company.orders_dwh.vw_trans_sales

In [0]:
%sql
MERGE INTO company.orders_dwh.dim_products dc
USING company.orders_dwh.vw_dim_products vdp ON dc.product_id = vdp.product_id
WHEN MATCHED THEN UPDATE SET
  dc.product_name = vdp.product_name,
  dc.product_category = vdp.product_category
WHEN NOT MATCHED THEN INSERT
  (product_id, product_name, product_category)
VALUES
  (vdp.product_id, vdp.product_name, vdp.product_category)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
9,0,0,9


#### Dim Regions

In [0]:
%sql
CREATE OR REPLACE TABLE company.orders_dwh.dim_regions
(
  dim_regions_key BIGINT GENERATED ALWAYS AS IDENTITY,
  region_id INT,
  region_name STRING,
  country STRING
)

In [0]:
%sql
CREATE OR REPLACE VIEW company.orders_dwh.vw_dim_regions
AS 
SELECT DISTINCT
  region_id,
  region_name,
  country
FROM company.orders_dwh.vw_trans_sales

In [0]:
%sql
MERGE INTO company.orders_dwh.dim_regions dc
USING company.orders_dwh.vw_dim_regions vdr ON dc.country = vdr.country
WHEN MATCHED THEN UPDATE SET
  dc.region_id = vdr.region_id,
  dc.region_name = vdr.region_name
WHEN NOT MATCHED THEN INSERT
  (region_id, region_name, country)
VALUES
  (vdr.region_id, vdr.region_name, vdr.country)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
8,0,0,8


#### Dim Dates

In [0]:
%sql
CREATE OR REPLACE TABLE company.orders_dwh.dim_dates
(
  dim_dates_key BIGINT GENERATED ALWAYS AS IDENTITY,
  order_date Date
)

In [0]:
%sql
CREATE OR REPLACE VIEW company.orders_dwh.vw_dim_dates
AS 
SELECT DISTINCT
  order_date
FROM company.orders_dwh.vw_trans_sales

In [0]:
%sql
MERGE INTO company.orders_dwh.dim_dates dc
USING company.orders_dwh.vw_dim_dates vdd ON dc.order_date = vdd.order_date
WHEN NOT MATCHED THEN INSERT
  (order_date)
VALUES
  (vdd.order_date)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
10,0,0,10


### FACT TABLE

In [0]:
%sql
CREATE OR REPLACE TABLE company.orders_dwh.fact_sales
(
  order_id INT,
  quantity DECIMAL,
  unit_price DECIMAL,
  total_amount DECIMAL,
  dim_products_key INT,
  dim_customers_key INT,
  dim_regions_key INT,
  dim_dates_key INT
)

In [0]:
%sql
CREATE OR REPLACE VIEW company.orders_dwh.vw_fact_sales
AS
SELECT 
  F.order_id,
  F.quantity,
  F.unit_price,
  F.total_amount,
  DP.dim_products_key,
  DC.dim_customers_key,
  DR.dim_regions_key,
  DD.dim_dates_key
FROM  
  company.orders_dwh.vw_trans_sales F 
LEFT JOIN 
  company.orders_dwh.dim_customers DC 
  ON F.customer_id = DC.customer_id
LEFT JOIN 
  company.orders_dwh.dim_products DP 
  ON F.product_id = DP.product_id
LEFT JOIN 
  company.orders_dwh.dim_regions DR 
  ON F.country = DR.country
LEFT JOIN 
  company.orders_dwh.dim_dates DD 
  ON F.order_date = DD.order_date

In [0]:
%sql
INSERT INTO company.orders_dwh.fact_sales
SELECT * FROM company.orders_dwh.vw_fact_sales

num_affected_rows,num_inserted_rows
10,10


### Business Questions

In [0]:
%sql
-- ¿Qué productos se venden más en cada región?
WITH ranked_products AS (
  SELECT
    dr.region_name,
    dp.product_name,
    SUM(fs.quantity) AS total_quantity,
    ROW_NUMBER() OVER (PARTITION BY dr.region_name ORDER BY SUM(fs.quantity) DESC) AS rank
  FROM company.orders_dwh.fact_sales fs
  LEFT JOIN company.orders_dwh.dim_regions dr ON fs.dim_regions_key = dr.dim_regions_key
  LEFT JOIN company.orders_dwh.dim_products dp ON fs.dim_products_key = dp.dim_products_key
  GROUP BY
    dr.region_name,
    dp.product_name
)
SELECT
  rp.region_name,
  rp.product_name,
  rp.total_quantity
FROM ranked_products rp
WHERE rp.rank <= 2

region_name,product_name,total_quantity
Asia,Tablet,3
Asia,Keyboard,3
Europe,Monitor,2
Europe,Smartphone,1
North America,Laptop,3
North America,Mouse,1


In [0]:
%sql
-- ¿Qué clientes generan mayores ingresos?
WITH ranked_customers AS (
    SELECT
        dc.customer_name,
        SUM(fs.total_amount) AS total_income,
        ROW_NUMBER() OVER (ORDER BY SUM(fs.total_amount) DESC) AS rank
    FROM company.orders_dwh.fact_sales fs
    LEFT JOIN company.orders_dwh.dim_customers dc ON fs.dim_customers_key = dc.dim_customers_key
    GROUP BY dc.customer_name
)
SELECT
    rc.customer_name,
    rc.total_income
FROM ranked_customers rc
WHERE rc.rank <= 3


customer_name,total_income
Alice Johnson,1750
Charlie Brown,900
Bob Smith,900


In [0]:
%sql
-- ¿Cómo varían las ventas día a día?
SELECT
    dd.order_date,
    SUM(fs.total_amount) AS total_income
FROM company.orders_dwh.fact_sales fs
LEFT JOIN company.orders_dwh.dim_dates dd ON fs.dim_dates_key = dd.dim_dates_key
GROUP BY dd.order_date
ORDER BY dd.order_date

order_date,total_income
2024-02-01,1600
2024-02-02,500
2024-02-03,900
2024-02-04,150
2024-02-05,400
2024-02-06,400
2024-02-07,800
2024-02-08,500
2024-02-09,300
2024-02-10,50


### Llegaron nuevos datos!

In [0]:
%sql
-- Dos nuevos registros: Actualización de Nombre e Email para CustomerID 101, y Nuevo CustomerID 108
INSERT INTO company.sales.orders (OrderID, OrderDate, CustomerID, CustomerName, CustomerEmail, ProductID, ProductName, ProductCategory, RegionID, RegionName, Country, Quantity, UnitPrice, TotalAmount) 
VALUES 
(11, '2024-02-11', 101, 'Pepe', 'pepe@utec.com', 201, 'Laptop', 'Electronics', 301, 'North America', 'USA', 2, 800.00, 1600.00),
(12, '2024-02-12', 108, 'Angel', 'angel@utec.com', 202, 'Smartphone', 'Electronics', 302, 'Europe', 'Germany', 1, 500.00, 500.00);


num_affected_rows,num_inserted_rows
2,2


#### Stagging Layer

In [0]:
%sql
CREATE OR REPLACE TABLE company.orders_dwh.stg_sales
SELECT * FROM company.sales.orders
WHERE OrderDate > (SELECT COALESCE(MAX(OrderDate), '1900-01-01') FROM company.orders_dwh.stg_sales)

num_affected_rows,num_inserted_rows


In [0]:
%sql
SELECT * FROM company.orders_dwh.stg_sales

OrderID,OrderDate,CustomerID,CustomerName,CustomerEmail,ProductID,ProductName,ProductCategory,RegionID,RegionName,Country,Quantity,UnitPrice,TotalAmount
11,2024-02-11,101,Pepe,pepe@utec.com,201,Laptop,Electronics,301,North America,USA,2,800.0,1600.0
12,2024-02-12,108,Angel,angel@utec.com,202,Smartphone,Electronics,302,Europe,Germany,1,500.0,500.0


#### Transformation

In [0]:
%sql
SELECT * FROM company.orders_dwh.vw_trans_sales

order_id,order_date,customer_id,customer_name,customer_email,product_id,product_name,product_category,region_id,region_name,country,quantity,unit_price,total_amount
11,2024-02-11,101,Pepe,pepe@utec.com,201,Laptop,Electronics,301,North America,USA,2,800.0,1600.0
12,2024-02-12,108,Angel,angel@utec.com,202,Smartphone,Electronics,302,Europe,Germany,1,500.0,500.0


#### Core Layer

In [0]:
%sql
SELECT * FROM company.orders_dwh.dim_customers

dim_customers_key,customer_id,customer_name,customer_email
1,101,Alice Johnson,alice@example.com
2,103,Charlie Brown,charlie@example.com
3,105,Eve Adams,eve@example.com
4,102,Bob Smith,bob@example.com
5,107,Grace White,grace@example.com
6,104,David Lee,david@example.com
7,106,Frank Miller,frank@example.com


In [0]:
%sql
SELECT * FROM company.orders_dwh.vw_dim_customers

customer_id,customer_name,customer_email
101,Pepe,pepe@utec.com
108,Angel,angel@utec.com


In [0]:
%sql
MERGE INTO company.orders_dwh.dim_customers dc
USING company.orders_dwh.vw_dim_customers vdc ON dc.customer_id = vdc.customer_id
WHEN MATCHED THEN UPDATE SET
  dc.customer_name = vdc.customer_name,
  dc.customer_email = vdc.customer_email
WHEN NOT MATCHED THEN INSERT
  (customer_id, customer_name, customer_email)
VALUES
  (vdc.customer_id, vdc.customer_name, vdc.customer_email)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,1,0,1


In [0]:
%sql
SELECT * FROM company.orders_dwh.dim_customers

dim_customers_key,customer_id,customer_name,customer_email
2,103,Charlie Brown,charlie@example.com
3,105,Eve Adams,eve@example.com
4,102,Bob Smith,bob@example.com
5,107,Grace White,grace@example.com
6,104,David Lee,david@example.com
7,106,Frank Miller,frank@example.com
8,108,Angel,angel@utec.com
1,101,Pepe,pepe@utec.com


In [0]:
%sql
MERGE INTO company.orders_dwh.dim_products dc
USING company.orders_dwh.vw_dim_products vdp ON dc.product_id = vdp.product_id
WHEN MATCHED THEN UPDATE SET
  dc.product_name = vdp.product_name,
  dc.product_category = vdp.product_category
WHEN NOT MATCHED THEN INSERT
  (product_id, product_name, product_category)
VALUES
  (vdp.product_id, vdp.product_name, vdp.product_category)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,2,0,0


In [0]:
%sql
MERGE INTO company.orders_dwh.dim_regions dc
USING company.orders_dwh.vw_dim_regions vdr ON dc.country = vdr.country
WHEN MATCHED THEN UPDATE SET
  dc.region_id = vdr.region_id,
  dc.region_name = vdr.region_name
WHEN NOT MATCHED THEN INSERT
  (region_id, region_name, country)
VALUES
  (vdr.region_id, vdr.region_name, vdr.country)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,2,0,0


In [0]:
%sql
MERGE INTO company.orders_dwh.dim_dates dc
USING company.orders_dwh.vw_dim_dates vdd ON dc.order_date = vdd.order_date
WHEN NOT MATCHED THEN INSERT
  (order_date)
VALUES
  (vdd.order_date)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,0,0,2


#### FACT TABLE

In [0]:
%sql
SELECT * FROM company.orders_dwh.vw_fact_sales

order_id,quantity,unit_price,total_amount,dim_products_key,dim_customers_key,dim_regions_key,dim_dates_key
11,2,800.0,1600.0,5,1,6,11
12,1,500.0,500.0,6,8,7,12


In [0]:
%sql
INSERT INTO company.orders_dwh.fact_sales
SELECT * FROM company.orders_dwh.vw_fact_sales

num_affected_rows,num_inserted_rows
2,2


In [0]:
%sql
SELECT * FROM company.orders_dwh.fact_sales

order_id,quantity,unit_price,total_amount,dim_products_key,dim_customers_key,dim_regions_key,dim_dates_key
1,2,800,1600,5,1,6,2
2,1,500,500,6,4,7,1
3,3,300,900,3,2,3,3
4,1,150,150,9,1,6,9
5,1,400,400,4,6,4,8
6,2,200,400,2,4,1,4
7,1,800,800,5,3,2,10
8,2,250,500,7,7,8,5
9,3,100,300,8,5,5,6
10,1,50,50,1,6,6,7
