In [None]:
#Data Warehouse Design – Star Schema

#a. Design a star schema for the e-commerce data

refer to 2.ecommerce_star_schema_valid.drawio in /docs

#b. Create Dimension and Fact Tables

FactSales

| Column                           | Description                     |
| -------------------------------- | ------------------------------- |
| order\_id (PK)                   | Order ID                        |
| order\_item\_id (PK)             | Item index in the order         |
| product\_id (FK)                 | Product sold                    |
| seller\_id (FK)                  | Seller                          |
| customer\_id (FK)                | Customer                        |
| price                            | Product price                   |
| freight\_value                   | Freight cost                    |
| payment\_value                   | Total paid (from payment table) |
| payment\_type                    | Payment method                  |
| review\_score                    | Score from review               |
| order\_status                    | Delivered, shipped, etc.        |
| order\_purchase\_timestamp       | Purchase date                   |
| order\_delivered\_customer\_date | Delivery date                   |
| order\_estimated\_delivery\_date | Estimated delivery              |


DimCustomer

| Column                           | Description        |
| -------------------------------- | ------------------ |
| customer\_id (PK)                | Unique customer ID |
| customer\_unique\_id             | Logical person ID  |
| customer\_zip\_code\_prefix (FK) | ZIP prefix         |
| customer\_city                   | City               |
| customer\_state                  | State              |


DimProduct

| Column                       | Description        |
| ---------------------------- | ------------------ |
| product\_id (PK)             | Product ID         |
| product\_category\_name      | In Portuguese      |
| product\_name\_length        | Text length        |
| product\_description\_length | Description length |
| product\_photos\_qty         | Number of photos   |
| product\_weight\_g           | Weight             |
| product\_length\_cm          | Length             |
| product\_height\_cm          | Height             |
| product\_width\_cm           | Width              |


DimSeller

| Column                         | Description |
| ------------------------------ | ----------- |
| seller\_id (PK)                | Seller ID   |
| seller\_zip\_code\_prefix (FK) | ZIP prefix  |
| seller\_city                   | City        |
| seller\_state                  | State       |


DimDate

| Column        | Description  |
| ------------- | ------------ |
| date (PK)     | Date value   |
| year          | Year         |
| month         | Month        |
| day           | Day          |
| week          | ISO week     |
| weekday\_name | e.g., Monday |
| is\_weekend   | True/False   |


DimGeolocation

| Column                 | Description |
| ---------------------- | ----------- |
| zip\_code\_prefix (PK) | ZIP prefix  |
| latitude               | Avg lat     |
| longitude              | Avg long    |
| city                   | City        |
| state                  | State       |


In [None]:
#c. Implement Schema in SQL (e.g., PostgreSQL / SQLite)

CREATE TABLE DimCustomer (
    customer_id TEXT PRIMARY KEY,
    customer_unique_id TEXT,
    customer_zip_code_prefix INTEGER,
    customer_city TEXT,
    customer_state TEXT
);

CREATE TABLE DimProduct (
    product_id TEXT PRIMARY KEY,
    product_category_name TEXT,
    product_name_length INTEGER,
    product_description_length INTEGER,
    product_photos_qty INTEGER,
    product_weight_g INTEGER,
    product_length_cm INTEGER,
    product_height_cm INTEGER,
    product_width_cm INTEGER
);

CREATE TABLE DimSeller (
    seller_id TEXT PRIMARY KEY,
    seller_zip_code_prefix INTEGER,
    seller_city TEXT,
    seller_state TEXT
);

CREATE TABLE DimDate (
    date DATE PRIMARY KEY,
    year INTEGER,
    month INTEGER,
    day INTEGER,
    week INTEGER,
    weekday_name TEXT,
    is_weekend BOOLEAN
);

CREATE TABLE DimGeolocation (
    zip_code_prefix INTEGER PRIMARY KEY,
    latitude REAL,
    longitude REAL,
    city TEXT,
    state TEXT
);

CREATE TABLE FactSales (
    order_id TEXT,
    order_item_id INTEGER,
    customer_id TEXT,
    product_id TEXT,
    seller_id TEXT,
    price REAL,
    freight_value REAL,
    payment_value REAL,
    payment_type TEXT,
    review_score INTEGER,
    order_status TEXT,
    order_purchase_timestamp TIMESTAMP,
    order_delivered_customer_date TIMESTAMP,
    order_estimated_delivery_date TIMESTAMP,
    PRIMARY KEY (order_id, order_item_id),
    FOREIGN KEY (customer_id) REFERENCES DimCustomer(customer_id),
    FOREIGN KEY (product_id) REFERENCES DimProduct(product_id),
    FOREIGN KEY (seller_id) REFERENCES DimSeller(seller_id)
);