
# Movimiento de órdenes de FarmAhorra de Raw a Bronze

## 0. Prerequisitos

In [0]:
from pyspark.sql import functions as F, types as T
from delta.tables import DeltaTable
from pyspark.sql.window import Window

## 1. Contexto Unity-Catalog

In [0]:
%sql
USE CATALOG farma;
CREATE SCHEMA IF NOT EXISTS bronze;
USE SCHEMA bronze;

## 2. Definición del contrato operativo

In [0]:
# Config para Bronze
EXTERNAL_BASE = "abfss://datalake@adlsfarmahorra.dfs.core.windows.net/farma"

cfg = {
    "raw_path": f"{EXTERNAL_BASE}/raw/orders_farmahorra",  # de aquí leemos JSON
    "bronze_orders_tbl": "farma.bronze.orders",
    "bronze_items_tbl":  "farma.bronze.order_items",
}

## 3. Creación y actualización de tabla farma.bronze.orders

In [0]:
# 1) Lee todo RAW (JSON)
raw_glob = f"{cfg['raw_path']}/*/*"   # .../orders_farmahorra/ingest_date=YYYY-MM-DD/part-*.json
raw_df = spark.read.json(raw_glob)

# 2) Normaliza campos de órdenes
# Esquema esperado para items (array de struct)
items_cast_expr = """
transform(items, i ->
  named_struct(
    'package_ndc_11', cast(i.package_ndc_11 as string),
    'quantity',       cast(i.quantity       as int),
    'unit_price',     cast(i.unit_price     as double),
    'line_total',     cast(i.line_total     as double),
    'descripcion',    cast(i.descripcion    as string),
    'generic_name',   cast(i.generic_name   as string)
  )
)
"""

bronze_df = (
    raw_df
      .withColumnRenamed("_id", "mongo_id")
      .withColumn("discount_pct", F.col("discount_pct").cast("int"))
      .withColumn("subtotal",     F.col("subtotal").cast("double"))
      .withColumn("discount",     F.col("discount").cast("double"))
      .withColumn("total",        F.col("total").cast("double"))
      .withColumn("confirmed_at", F.to_timestamp("confirmed_at"))
      .withColumn("items", F.expr(items_cast_expr))
      # Tomamos el primer (y único) item; si el array está vacío → null
      .withColumn("first_item", F.element_at(F.col("items"), 1))
      # Aplanamos campos del item
      .withColumn("package_ndc_11", F.col("first_item.package_ndc_11"))
      .withColumn("quantity",       F.col("first_item.quantity"))
      .withColumn("unit_price",     F.col("first_item.unit_price"))
      .withColumn("line_total",     F.col("first_item.line_total"))
      .withColumn("descripcion",    F.col("first_item.descripcion"))
      .withColumn("generic_name",   F.col("first_item.generic_name"))
      .withColumn("items_count",    F.size("items"))           # para validar que sea 1
      .withColumn("confirmed_date", F.to_date("confirmed_at"))
      .withColumn("_load_ts",       F.current_timestamp())
      .drop("first_item", "items")  # ya no necesitamos el array
      .select(
          "mongo_id",
          "external_order_id","farmacia_order_id","id_farmacia","client_id",
          "discount_pct","subtotal","discount","total",
          "confirmed_at","confirmed_date","source",
          "package_ndc_11","quantity","unit_price","line_total","descripcion","generic_name",
          "items_count", "_load_ts"
      )
)

# 3) Dedupe por mongo_id para escoger la versión de orden más reciente
w = Window.partitionBy("mongo_id").orderBy(
        F.col("confirmed_at").desc_nulls_last(),
        F.col("_load_ts").desc()  # desempate
)
bronze_src = (bronze_df
              .withColumn("_rn", F.row_number().over(w))
              .where("_rn = 1")
              .drop("_rn"))

# 4) Crea y actualiza la tabla Delta en Bronze ----
target_tbl = "farma.bronze.orders"

if spark.catalog.tableExists(target_tbl):
    # UPSERT por clave
    tgt = DeltaTable.forName(spark, target_tbl)
    (tgt.alias("t")
        .merge(bronze_src.alias("s"), "t.mongo_id = s.mongo_id")
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        .execute())
else:
    # Primera carga: crea la tabla gestionada por UC en farma.bronze
    bronze_src.write.format("delta").saveAsTable(target_tbl)

print("Bronze actualizado: ", target_tbl, " y filas:", spark.table(target_tbl).count())


Bronze actualizado:  farma.bronze.orders  y filas: 400


## 4. Verificaciones

In [0]:
%sql
-- Deben existir y ser consultables
SELECT COUNT(*) FROM farma.bronze.orders;

-- Muestra 20 registros
SELECT external_order_id, package_ndc_11, quantity, unit_price, line_total, total
FROM farma.bronze.orders
ORDER BY confirmed_at DESC
LIMIT 20;

external_order_id,package_ndc_11,quantity,unit_price,line_total,total
FAC-002-00400,68462018449,1,2.15,2.15,2.04
FAC-001-00399,93206450,2,773.4,1546.8,1469.46
FAC-002-00398,52709170103,2,39.36,78.72,74.78
FAC-003-00397,70010002703,1,66.4,66.4,63.08
FAC-003-00396,68084080521,1,4.81,4.81,4.57
FAC-002-00395,51672423501,2,52.7,105.4,100.13
FAC-002-00394,51672405104,2,78.81,157.62,149.74
FAC-003-00393,378027493,2,10.4,20.8,19.76
FAC-003-00392,57237005820,2,6.95,13.9,13.21
FAC-002-00391,64380026101,2,24.29,48.58,46.15
