In [0]:
%run ./00_Setup_Environment

In [0]:
source_path = '/Volumes/adventureworks/dataset/sales'
schema = 'adventureworks.bronze'


In [0]:
files_name = [
    f.name.removesuffix(".csv")
    for f in dbutils.fs.ls(source_path)
    if f.name.endswith(".csv")
]
files_name


In [0]:
tables = []
for table in files_name:
    table_name = to_snake_case(table)
    tables.append((table, table_name))
tables

In [0]:
def ingest_table(source_table, target_table):
    """Ingere arquivo csv do Volume para Bronze"""
    start = time.time()
    
    try:
        print(f"{source_table} -> bronze.{target_table}")
        
        # Ler dados
        df = (spark.read \
            .format("csv")
            .option("header", "true")
            .option("sep", ";")
            .option("encoding", "ISO-8859-1")
            .load(f'{source_path}/{source_table}.csv'))
        
        # Adicionar metadados
        df = df \
            .withColumn("_ingestion_timestamp", current_timestamp()) \
            .withColumn("_source_table", lit(source_table)) \
            .withColumn("_batch_id", lit(str(uuid.uuid4())[:8]))
        
        # Salvar como Delta
        path = f"{bronze_path}/{target_table}"
        df.write \
            .mode("overwrite") \
            .format("delta") \
            .option("overwriteSchema", "true") \
            .save(path)
        
        # Criar tabela no catálogo
        df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(f"{target_table}")
        
        # Registrar metadados
        duration = round(time.time() - start, 2)
        count = df.count()
        log_etl(target_table, "bronze", "SUCCESS", count, None, duration)
        
        return count
        
    except Exception as e:
        duration = round(time.time() - start, 2)
        log_etl(target_table, "bronze", "FAILED", 0, str(e), duration)
        print(f"Erro: {str(e)[:200]}...")
        raise e


In [0]:
spark.sql(f"USE {schema}")

results = {}
for source, target in tables:
    try:
        count = ingest_table(source, target)
        results[target] = count
    except Exception as e:
        results[target] = f"ERRO: {str(e)[:50]}"

In [0]:
print("="*50)
for table, result in results.items():
    status = "Success" if isinstance(result, int) else "Failed"
    print(f"{status} {table}: {result}")
print("="*50)

In [0]:
%sql
-- Verificar tabelas criadas
SHOW TABLES IN bronze;