In [None]:
import duckdb
import os
from dotenv import load_dotenv, find_dotenv
from deltalake import DeltaTable, write_deltalake

load_dotenv(find_dotenv())

con = duckdb.connect()

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_MINIO")
AWS_SECRET_KEY_ID = os.getenv("AWS_SECRET_KEY_MINIO")
HOST_MINIO = os.getenv("HOST_MINIO")
PORT_MINIO = os.getenv("PORT_MINIO")

con.sql(f""" CREATE SECRET secret1 (
            TYPE S3,
            KEY_ID '{AWS_ACCESS_KEY_ID}',
            SECRET '{AWS_SECRET_KEY_ID}',
            REGION 'us-east-1',
            ENDPOINT '{HOST_MINIO}:{PORT_MINIO}',
            URL_STYLE 'path',
            USE_SSL 'false'

        );
    """)

storage_options = {
    "AWS_ACCESS_KEY_ID": f"{AWS_ACCESS_KEY_ID}",
    "AWS_SECRET_ACCESS_KEY": f"{AWS_SECRET_KEY_ID}",
    "AWS_ENDPOINT_URL": f"http://{HOST_MINIO}:{PORT_MINIO}",
    "AWS_allow_http": "true",
    "AWS_REGION": "us-east-1",
    "AWS_S3_ALLOW_UNSAFE_RENAME": "true",
}

In [41]:
path_land = "s3://land/uff/projeto_comex"
path_bronze = "s3://bronze/uff/projeto_comex"

In [42]:
df = con.sql(f""" 
           SELECT
           CAST(CO_PAIS AS INT) AS CODIGO_PAIS,
           CAST(CO_BLOCO AS INT) AS CODIGO_BLOCO,
           NO_BLOCO AS NOME_BLOCO,
           FROM '{path_land}/PAIS_BLOCO.parquet'
           """).to_arrow_table()

In [None]:
write_deltalake(
    f"{path_bronze}/PAIS_BLOCO", df, mode="append", storage_options=storage_options
)

In [55]:
table_path = f"{path_bronze}/PAIS_BLOCO"
table = DeltaTable(table_path, storage_options=storage_options)

In [56]:
(
    table.merge(
        source=df,
        predicate="target.CODIGO_PAIS = source.CODIGO_PAIS",
        source_alias="source",
        target_alias="target",
    )
).when_matched_update_all().when_not_matched_insert_all().execute()

{'num_source_rows': 322,
 'num_target_rows_inserted': 0,
 'num_target_rows_updated': 424,
 'num_target_rows_deleted': 0,
 'num_target_rows_copied': 0,
 'num_output_rows': 424,
 'num_target_files_scanned': 1,
 'num_target_files_skipped_during_scan': 0,
 'num_target_files_added': 1,
 'num_target_files_removed': 1,
 'execution_time_ms': 86,
 'scan_time_ms': 0,
 'rewrite_time_ms': 21}

In [57]:
con.close()