### Gold Layer — Business-Ready Data
Dims and fact built from Silver.

#### Environment Setup

In [None]:

from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Medallion-SQL").getOrCreate()
print("Spark version:", spark.version)
DATA_BASE = "/content"


#### Create `gold` database

In [None]:
spark.sql("""CREATE DATABASE IF NOT EXISTS gold""")

#### Dimension: Customers

In [None]:
spark.sql("""DROP TABLE IF EXISTS gold.dim_customers""")

In [None]:
spark.sql("""CREATE TABLE gold.dim_customers
USING parquet
AS
SELECT
  cst_id        AS customer_id,
  cst_firstname AS first_name,
  cst_lastname  AS last_name,
  cst_gndr      AS gender,
  cst_marital_status AS marital_status,
  cst_create_date    AS record_created_at
FROM silver.crm_cust_info""")

In [None]:
spark.sql("""SELECT * FROM gold.dim_customers LIMIT 20""").show(20, truncate=False)

In [None]:
spark.sql("""SELECT COUNT(*) AS rows FROM gold.dim_customers""").show(truncate=False)

#### Dimension: Products

In [None]:
spark.sql("""DROP TABLE IF EXISTS gold.dim_products""")

In [None]:
spark.sql("""CREATE TABLE gold.dim_products
USING parquet
AS
SELECT
  prd_id      AS product_id,
  prd_key     AS product_key,
  prd_nm      AS product_name,
  prd_line    AS product_line,
  prd_cost    AS prd_cost,
  prd_start_dt,
  prd_end_dt
FROM silver.crm_prd_info""")

In [None]:
spark.sql("""SELECT * FROM gold.dim_products LIMIT 20""").show(20, truncate=False)

In [None]:
spark.sql("""SELECT COUNT(*) AS rows FROM gold.dim_products""").show(truncate=False)

#### Fact: Sales (includes `product_key` for joining to product dimension)

In [None]:
spark.sql("""DROP TABLE IF EXISTS gold.fact_sales""")

In [None]:
spark.sql("""CREATE TABLE gold.fact_sales
USING parquet
AS
SELECT
  s.sale_id,
  s.cst_id      AS customer_id,
  s.prd_key     AS product_key,
  s.qty, s.price, s.amount,
  s.order_dt, s.ship_dt, s.due_dt
FROM silver.crm_sales_details s
LEFT JOIN gold.dim_products p
  ON s.prd_key = p.product_key""")

In [None]:
spark.sql("""SELECT * FROM gold.fact_sales LIMIT 20""").show(20, truncate=False)

In [None]:
spark.sql("""SELECT COUNT(*) AS rows FROM gold.fact_sales""").show(truncate=False)

In [None]:
spark.sql("""SELECT * FROM gold.vw_sales_star LIMIT 20""").show(20, truncate=False)