In [0]:
%run ../00_functions/functions

In [0]:
from pyspark.sql import functions as F
from delta.tables import DeltaTable
from datetime import datetime
from pyspark.sql.functions import current_date

In [0]:
df_hospedes = (
    spark.read.table("production.transient.hospedes")
    .select(
        F.col("hospede_id"),
        F.col("nome"),
        F.col("cpf"),
        F.col("data_nascimento"),
        F.col("sexo"),
        F.col("nacionalidade"),
        F.col("tipo_cliente"),
        F.col("email"),
        F.col("telefone"),
        F.col("estado"),
        F.col("cidade"),
        F.col("bairro"),
        F.col("rua"),
        F.col("numero"),
        F.col("complemento"),
        F.col("cep")
    )
    .withColumn("cpf", encrypt_udf(col("cpf")))
    .withColumn("sexo", encrypt_udf(col("sexo"))) #Considerado sensível pela LGPD se contempla distinção de gênero
    .withColumn("start_date", F.current_date())
    .withColumn("update_date", F.lit(None).cast("date"))
)

display(df_hospedes.limit(5))

In [0]:
catalog_table = "production.raw.tb_hospedes"

try:
    delta_table = DeltaTable.forName(spark, catalog_table)

    delta_table.alias("target").merge(
        df_hospedes.alias("source"),
        "target.hospede_id = source.hospede_id"
    ).whenMatchedUpdate(
        set={
            **{col: f"source.{col}" for col in df_hospedes.columns if col != "update_date"},
            "update_date": current_date()
        }
    ).whenNotMatchedInsertAll().execute()

except Exception:
    df_hospedes.write.format("delta").mode("overwrite").saveAsTable(catalog_table)