In [0]:
%run ../../../utils

## Parametros y rutas

In [0]:
# Ruta base en tu Data Lake
silver_table_name = "silver.sales_creditcard"

## Lectura de Bronze

In [0]:
df_bronze = (
    spark.read.table("bronze.sales_creditcard ")
)

## Casting y estandarización de tipos

In [0]:
df_cast = (
    df_bronze
      .withColumn("CreditCardID",   col("CreditCardID").cast("bigint"))
      .withColumn("CardType",       col("CardType").cast("string"))
      .withColumn("CardNumber",     col("CardNumber").cast("string"))
      .withColumn("ExpMonth",       col("ExpMonth").cast("tinyint"))
      .withColumn("ExpYear",        col("ExpYear").cast("smallint"))
      .withColumn("ModifiedDate",   col("ModifiedDate").cast("timestamp"))
)

## Limpieza básica

In [0]:
# Filtrar claves vacías
df_clean = df_cast.filter(
    col("CreditCardID").isNotNull() &
    col("CardNumber").isNotNull() &
    col("CardType").isNotNull()
)

# Eliminar duplicados naturales
df_nodup = df_clean.dropDuplicates(["CreditCardID"])

df_result= (
    df_nodup
      .withColumn("FechaAuditoriaCreacion", current_timestamp())
      .withColumn("FechaAuditoriaModificacion", current_timestamp())
)

## Merge incremental

In [0]:
deleted_rows, updated_rows, inserted_rows, file_bytes = merge(silver_table_name, df_result, ["CreditCardID"])

## IngestionLog

In [0]:
end_time = datetime.now()
layer = silver_table_name.split('.')[0]
table = silver_table_name.split('.')[1]
start_time = datetime.strptime(dbutils.widgets.get('StartTime'), '%Y-%m-%d %H:%M:%S')

In [0]:
print(f"""

  TablaOrigen: {table}
  Capa: {layer}
  Endtime: {end_time}
  DurationSeconds: {int((end_time - end_time).total_seconds())}
  """)

In [0]:
print(f"""
  JobId: {dbutils.widgets.get('JobId')}
  JobRunId: {dbutils.widgets.get('JobRunId')}
  TaskRunId: {dbutils.widgets.get('TaskRunId')}
  TablaOrigen: {table}
  Capa: {layer}
  StartTime: {dbutils.widgets.get('StartTime')}
  Endtime: {end_time}
  DurationSeconds: {int((end_time - start_time).total_seconds())}
  RowsIn: 0
  RowsInserted: {inserted_rows}
  RowsUpdated: {updated_rows}
  RowsDropped: {deleted_rows}
  FileBytes: {file_bytes}
  """)

In [0]:
# dbutils.jobs.taskValues.set(key = "JobId", value = JobId)
# dbutils.jobs.taskValues.set(key = "JobRunId", value = JobRunId)