## Widgets

In [0]:
dbutils.widgets.text("proceso", "")
dbutils.widgets.text("tabla", "")

proceso = dbutils.widgets.get("proceso")
tabla = dbutils.widgets.get("tabla")

## Librerias

In [0]:
from pyspark.sql.functions import col
from pyspark.sql.types import StringType

## Variables

In [0]:
container = dbutils.secrets.get("scope-mbc", "secret-env-container")
storage_account = dbutils.secrets.get("scope-mbc", "secret-env-storage-account")
path_base = f"abfss://{container}@{storage_account}.dfs.core.windows.net"

## Funciones

In [0]:
df_query = spark.sql(f"select path_landing, tabla_bronze from auditoria.parametros where proceso = '{proceso}' and tabla_bronze = '{tabla}'")

landing = df_query.first()["path_landing"]
tabla_bronze = df_query.first()["tabla_bronze"]

print(landing)
print(tabla_bronze)


In [0]:
def get_all_columns_as_string(df):
    """Convierte dinámicamente todas las columnas a tipo string"""
    return df.select([col(c).cast(StringType()).alias(c) for c in df.columns])

## Lee landing e inserta incremental en Bronze

In [0]:
path_landing = f"{path_base}/landing/{landing}/"
path_checkpoint = f"{path_base}/checkpoints/landing/{tabla_bronze}/"
path_schema = f"{path_base}/schema/landing/{tabla_bronze}/"

# dbutils.fs.rm(path_checkpoint, recurse=True)
# dbutils.fs.rm(path_schema, recurse=True)

df_stream = (
    spark.readStream
    .format("cloudFiles")
    .option("cloudFiles.format", "parquet")
    .option("cloudFiles.schemaLocation", path_schema)
    .option("cloudFiles.includeExistingFiles", "true")
    .load(path_landing)
    .drop("_rescued_data")  # Elimina explícitamente la columna
    .transform(get_all_columns_as_string)  # Conversión dinámica a string
    .writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", path_checkpoint)
    #.trigger(availableNow=True)
    .toTable(tabla_bronze)
)