## Librerias

In [0]:
from pyspark.sql import DataFrame
from pyspark.sql.functions import col, current_timestamp
from pyspark.sql.types import StringType
from delta.tables import DeltaTable

## Variables

In [0]:
def variables_globales() -> dict:
    return {
        "container": dbutils.secrets.get("scope-mbc", "secret-env-container"),
        "storage_account": dbutils.secrets.get("scope-mbc", "secret-env-storage-account"),
        "path_base": f"abfss://{dbutils.secrets.get('scope-mbc', 'secret-env-container')}@{dbutils.secrets.get('scope-mbc', 'secret-env-storage-account')}.dfs.core.windows.net" # path_base = f"abfss://{container}@{storage_account}.dfs.core.windows.net"
    }

## Funciones

In [0]:
def read_landing(path: str) -> DataFrame:
    path_base = variables_globales()["path_base"]

    df = spark.read.format("parquet").load(f"{path_base}/{path}")
    columns_to_cast = [col(c).cast("string").alias(c) for c in df.columns]

    return df.select(*columns_to_cast)



In [0]:
def write_bronze(df: DataFrame, tabla: str) -> None:
    path_base = variables_globales()["path_base"]
    df.write.format("delta").mode("append").saveAsTable(tabla)

In [0]:
def merge(table_name: str, df: DataFrame, identity_column: list = []) -> None:
    """
    Ejecuta un merge dinámico sobre una tabla Delta Lake utilizando las claves primarias detectadas automáticamente.

    Parameters
    ----------
    silver_table_name : str
        Nombre de la tabla Delta destino en formato 'schema.table'.
    df_result : DataFrame
        DataFrame fuente con los datos a insertar o actualizar.
    """

    # Obtener columnas clave desde el catálogo
    query = f"""
    SELECT cu.column_name
    FROM system.information_schema.key_column_usage AS cu
    INNER JOIN system.information_schema.table_constraints AS tc
      USING (constraint_catalog, constraint_schema, constraint_name)
    WHERE concat_ws(".", cu.table_schema, cu.table_name) = '{table_name}'
      AND tc.constraint_type = 'PRIMARY KEY'
      AND cu.table_catalog = 'lakehouse'
    ORDER BY ordinal_position
    """

    df_query = spark.sql(query)
    columns_key = [row['column_name'] for row in df_query.collect()]

    # Construir condiciones de merge
    merge_conditions = " AND ".join([f"m.{c} = in.{c}" for c in columns_key])

    # Cargar tabla Delta
    delta_table = DeltaTable.forName(spark, table_name)
    target_columns = delta_table.toDF().columns

    exclusion_list_update = set(columns_key + ["FechaAuditoriaCreacion"])
    exclusion_list_insert = set(identity_column)

    columns_to_update = {
        col: f"in.{col}" for col in target_columns if col not in exclusion_list_update
    }

    columns_to_insert = {
        col: f"in.{col}" for col in target_columns if col not in exclusion_list_insert
    }

    # Ejecutar merge
    (
        delta_table.alias("m")
            .merge(df.alias("in"), merge_conditions)
            .whenMatchedUpdate(set=columns_to_update)
            .whenNotMatchedInsert(values=columns_to_insert)
            .execute()
    )