In [0]:
import delta

def table_exists(database, table):
    count = (spark.sql(f"SHOW TABLES FROM {database}")
                  .filter(f"database='{database}' AND tableName='{table}'")
                  .count())    
    return count == 1

In [0]:
schema = "bronze"
tablename = dbutils.widgets.get("tablename")
id_field = dbutils.widgets.get("id_field")
timestamp_field = dbutils.widgets.get("timestamp_field")

In [0]:
if not table_exists(schema, tablename):
    print("Tabela não existente, criando.")
    df_full = spark.read.format("csv").options(sep=";", header=True).load(f"/Volumes/workspace/upsell/full_load/{tablename}/")
    (df_full.coalesce(1).write.format("delta").mode("overwrite").saveAsTable(f"{schema}.{tablename}"))
else:
    print("Tabela já existente, ignorando carga completa.")

### Atualização da tabela - Incremental

In [0]:
# Cria uma view para executar consultas SQL.
(spark.read
  .format("csv")
  .options(sep=";", header=True)
  .load(f"/Volumes/workspace/upsell/cdc/{tablename}/")
  .createOrReplaceTempView(F"view_{tablename}"))

# Consulta para pegar o dado mais recente de cada cliente para realizar o incremental.
query = f'''
    SELECT * 
    FROM view_{tablename}
    QUALIFY ROW_NUMBER() OVER (PARTITION BY {id_field} ORDER BY {timestamp_field} DESC) = 1
'''

df_cdc_unique = spark.sql(query)

In [0]:
bronze = delta.DeltaTable.forName(spark, f"{schema}.{tablename}")

# Consolidação do dado mais recente entre a Bronze e os novos dados que chegou, denominado UPSERT.
(bronze.alias("b")
       .merge(df_cdc_unique.alias("d"), f"b.{id_field} = d.{id_field}")
       .whenMatchedDelete(condition = "d.OP = 'D'")
       .whenMatchedUpdateAll(condition = "d.OP = 'U'")
       .whenNotMatchedInsertAll(condition = "d.OP = 'I' or d.OP = 'U'")
       #.execute()
)