In [1]:
import duckdb
import os
from dotenv import load_dotenv, find_dotenv
from deltalake import DeltaTable, write_deltalake

load_dotenv(find_dotenv())

con = duckdb.connect()

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_MINIO")
AWS_SECRET_KEY_ID = os.getenv("AWS_SECRET_KEY_MINIO")
HOST_MINIO = os.getenv("HOST_MINIO")
PORT_MINIO = os.getenv("PORT_MINIO")

con.sql(f""" CREATE SECRET secret1 (
            TYPE S3,
            KEY_ID '{AWS_ACCESS_KEY_ID}',
            SECRET '{AWS_SECRET_KEY_ID}',
            REGION 'us-east-1',
            ENDPOINT '{HOST_MINIO}:{PORT_MINIO}',
            URL_STYLE 'path',
            USE_SSL 'false'

        );
    """)

storage_options = {
    "AWS_ACCESS_KEY_ID": f"{AWS_ACCESS_KEY_ID}",
    "AWS_SECRET_ACCESS_KEY": f"{AWS_SECRET_KEY_ID}",
    "AWS_ENDPOINT_URL": f"http://{HOST_MINIO}:{PORT_MINIO}",
    "AWS_allow_http": "true",
    "AWS_REGION": "us-east-1",
    "AWS_S3_ALLOW_UNSAFE_RENAME": "true",
}

In [3]:
path_silver = "s3://silver/uff/projeto_comex"
path_gold = "s3://gold/uff/projeto_comex"

In [15]:
# Descreve a tabela dim_pais
description = con.sql(f"""
    DESCRIBE SELECT DISTINCT
    *
    FROM delta_scan('{path_gold}/dim_pais')
""")

# Exibe a descrição
description

┌─────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│ column_name │ column_type │  null   │   key   │ default │  extra  │
│   varchar   │   varchar   │ varchar │ varchar │ varchar │ varchar │
├─────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ PK_PAIS     │ INTEGER     │ YES     │ NULL    │ NULL    │ NULL    │
│ NOME_PAIS   │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ NOME_BLOCO  │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
└─────────────┴─────────────┴─────────┴─────────┴─────────┴─────────┘

In [10]:
# Executa a consulta SQL para obter os dados e a descrição da tabela
dim_pais = con.sql(f"""
                  
                  WITH silver_pais_ex AS
                  (
                      select distinct
                        FK_PAIS AS PK_PAIS,
                        NOME_PAIS,
                        NOME_BLOCO,
                      FROM delta_scan('{path_silver}/EXP')
                  ),
                  silver_pais_im AS
                  (
                      select distinct
                        FK_PAIS AS PK_PAIS,
                        NOME_PAIS,
                        NOME_BLOCO,
                    FROM delta_scan('{path_silver}/IMP')
                  )
                  SELECT * FROM( 
                    select
                      *
                    from silver_pais_ex
                    union
                    select
                      *
                    from silver_pais_im
                  ) A
                  WHERE A.PK_PAIS NOT IN(
                    SELECT DISTINCT PK_PAIS FROM delta_scan('{path_gold}/dim_pais')
                  )                   
                  """).to_arrow_table()



In [7]:
table_path = f"{path_gold}/dim_pais"

In [9]:
if len(dim_pais) > 0:
    write_deltalake(
        f"{path_gold}/dim_pais",
        dim_pais,
        mode="append",
        storage_options=storage_options,
    )