In [0]:
# ============================================================
# BRONZE INGESTION - RETAIL MEDALLION DATA PIPELINE
# ============================================================
#
# Objetivo:
# Ingerir datos crudos (CSV) almacenados en un Volume de Unity Catalog
# y crear tablas Delta Bronze mediante PySpark.
#
# Arquitectura:
# CSV (Volume) -> Bronze (Delta Tables)
#
# Dominio:
# Retail (customers, products, orders)
#
# NOTA:
# - No se crean tablas desde la UI
# - Todo el proceso se ejecuta por código
# - Compatible con Databricks Serverless + Unity Catalog
# ============================================================


# ------------------------------------------------------------
# 1. CONFIGURACIÓN GENERAL
# ------------------------------------------------------------

# # Esquema del proyecto donde se crearán las tablas Bronze
# BRONZE_SCHEMA = "workspace.retail_medallion_pipeline"

# # Ruta al Volume que contiene los archivos CSV crudos
# RAW_PATH = "data/raw"

BRONZE_SCHEMA = "workspace.retail_medallion_pipeline_schema"
RAW_PATH = "/Volumes/workspace/retail_medallion_pipeline_schema/raw_volume"

# ------------------------------------------------------------
# 2. VERIFICACIÓN DE ARCHIVOS CRUDOS (VOLUME)
# ------------------------------------------------------------

display(dbutils.fs.ls(RAW_PATH))

# ------------------------------------------------------------
# 3. INGESTA BRONZE - CUSTOMERS
# ------------------------------------------------------------
# - Lectura del CSV sin transformaciones
# - Preservación del esquema original
# - Escritura como tabla Delta gestionada

df_customers = (
    spark.read
    .option("header", True)
    .csv(f"{RAW_PATH}/customers.csv")
)

df_customers.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(f"{BRONZE_SCHEMA}.bronze_customers")


# ------------------------------------------------------------
# 4. INGESTA BRONZE - PRODUCTS
# ------------------------------------------------------------

df_products = (
    spark.read
    .option("header", True)
    .csv(f"{RAW_PATH}/products.csv")
)

df_products.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(f"{BRONZE_SCHEMA}.bronze_products")


# ------------------------------------------------------------
# 5. INGESTA BRONZE - ORDERS
# ------------------------------------------------------------

df_orders = (
    spark.read
    .option("header", True)
    .csv(f"{RAW_PATH}/orders.csv")
)

df_orders.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(f"{BRONZE_SCHEMA}.bronze_orders")


# ------------------------------------------------------------
# 6. VALIDACIONES BÁSICAS DE LA CAPA BRONZE
# ------------------------------------------------------------
# Chequeamos que las tablas existan y tengan datos

spark.sql(f"""
SELECT 'customers' AS table_name, COUNT(*) AS row_count
FROM {BRONZE_SCHEMA}.bronze_customers
UNION ALL
SELECT 'products', COUNT(*)
FROM {BRONZE_SCHEMA}.bronze_products
UNION ALL
SELECT 'orders', COUNT(*)
FROM {BRONZE_SCHEMA}.bronze_orders
""").show()