In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_date, when, lit
from delta import *

builder = SparkSession.builder \
    .appName("Lab_SECOP_Silver") \
    .master("spark://spark-master:7077") \
    .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.0.0") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

# Leer Bronce (ruta real del contenedor)
df_bronze = spark.read.format("delta").load("/app/data/lakehouse/bronze/secop")

# Tipado fuerte
df_transformed = df_bronze \
    .withColumn("fecha_de_firma", to_date(col("fecha_de_firma"), "yyyy-MM-dd")) \
    .withColumn("fecha_de_inicio_del_contrato", to_date(col("fecha_de_inicio_del_contrato"), "yyyy-MM-dd")) \
    .withColumn("fecha_de_fin_del_contrato", to_date(col("fecha_de_fin_del_contrato"), "yyyy-MM-dd"))

# Quality Gate
regla_precio = col("precio_base") > 0
regla_fecha = col("fecha_de_firma").isNotNull()

# Split
df_silver = df_transformed.filter(regla_precio & regla_fecha)

df_quarantine = df_transformed.filter(~(regla_precio & regla_fecha)) \
    .withColumn(
        "motivo_rechazo",
        when(~regla_precio, lit("precio_base <= 0"))
        .when(~regla_fecha, lit("fecha_de_firma nula"))
        .otherwise(lit("error_desconocido"))
    )

# Escritura
df_silver.write.format("delta") \
    .mode("overwrite") \
    .save("/app/data/lakehouse/silver/secop")

df_quarantine.write.format("delta") \
    .mode("overwrite") \
    .save("/app/data/lakehouse/quarantine/secop_errors")

print("Plata y Cuarentena generadas correctamente.")


:: loading settings :: url = jar:file:/opt/spark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
io.delta#delta-spark_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-c0fec0c8-7675-424b-a109-2404595cc689;1.0
	confs: [default]
	found io.delta#delta-spark_2.12;3.0.0 in central
	found io.delta#delta-storage;3.0.0 in central
	found org.antlr#antlr4-runtime;4.9.3 in central
:: resolution report :: resolve 465ms :: artifacts dl 20ms
	:: modules in use:
	io.delta#delta-spark_2.12;3.0.0 from central in [default]
	io.delta#delta-storage;3.0.0 from central in [default]
	org.antlr#antlr4-runtime;4.9.3 from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      default     |   3   |   0   |   0   |  

Plata y Cuarentena generadas correctamente.


                                                                                