In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_date, when, lit
from delta import *

builder = SparkSession.builder \
    .appName("Lab_SECOP_Silver") \
    .master("spark://spark-master:7077") \
    .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.0.0") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

# Leer Bronce (ruta real del contenedor)
df_bronze = spark.read.format("delta").load("/app/data/lakehouse/bronze/secop")

# Tipado fuerte
df_transformed = df_bronze \
    .withColumn("fecha_de_firma", to_date(col("fecha_de_firma"), "yyyy-MM-dd")) \
    .withColumn("fecha_de_inicio_del_contrato", to_date(col("fecha_de_inicio_del_contrato"), "yyyy-MM-dd")) \
    .withColumn("fecha_de_fin_del_contrato", to_date(col("fecha_de_fin_del_contrato"), "yyyy-MM-dd"))

# Quality Gate
regla_precio = col("precio_base") > 0
regla_fecha = col("fecha_de_firma").isNotNull()

# Split
df_silver = df_transformed.filter(regla_precio & regla_fecha)

df_quarantine = df_transformed.filter(~(regla_precio & regla_fecha)) \
    .withColumn(
        "motivo_rechazo",
        when(~regla_precio, lit("precio_base <= 0"))
        .when(~regla_fecha, lit("fecha_de_firma nula"))
        .otherwise(lit("error_desconocido"))
    )

# Escritura
df_silver.write.format("delta") \
    .mode("overwrite") \
    .save("/app/data/lakehouse/silver/secop")

df_quarantine.write.format("delta") \
    .mode("overwrite") \
    .save("/app/data/lakehouse/quarantine/secop_errors")

print("Plata y Cuarentena generadas correctamente.")


26/02/02 00:41:01 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources
26/02/02 00:41:16 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources
26/02/02 00:41:31 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources
26/02/02 00:41:46 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources
26/02/02 00:42:01 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources
26/02/02 00:42:16 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure th

Plata y Cuarentena generadas correctamente.


                                                                                