In [10]:
import duckdb
import os
from dotenv import load_dotenv, find_dotenv
from deltalake import DeltaTable, write_deltalake

load_dotenv(find_dotenv())

con = duckdb.connect()

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_MINIO")
AWS_SECRET_KEY_ID = os.getenv("AWS_SECRET_KEY_MINIO")
HOST_MINIO = os.getenv("HOST_MINIO")
PORT_MINIO = os.getenv("PORT_MINIO")

con.sql(f""" CREATE SECRET secret1 (
            TYPE S3,
            KEY_ID '{AWS_ACCESS_KEY_ID}',
            SECRET '{AWS_SECRET_KEY_ID}',
            REGION 'us-east-1',
            ENDPOINT '{HOST_MINIO}:{PORT_MINIO}',
            URL_STYLE 'path',
            USE_SSL 'false'

        );
    """)

storage_options = {
    "AWS_ACCESS_KEY_ID": f"{AWS_ACCESS_KEY_ID}",
    "AWS_SECRET_ACCESS_KEY": f"{AWS_SECRET_KEY_ID}",
    "AWS_ENDPOINT_URL": f"http://{HOST_MINIO}:{PORT_MINIO}",
    "AWS_allow_http": "true",
    "AWS_REGION": "us-east-1",
    "AWS_S3_ALLOW_UNSAFE_RENAME": "true",
}

In [11]:
path_silver = "s3://silver/uff/projeto_comex"
path_gold = "s3://gold/uff/projeto_comex"

In [12]:
dim_tempo = con.sql("""
                WITH generate_date AS (
                    SELECT CAST(RANGE AS DATE) AS date_key 
                  FROM RANGE(DATE '1997-01-01', DATE '2030-12-31', INTERVAL 1 DAY)
                )

                SELECT date_key AS data,
                    YEAR(date_key) * 10000 + MONTH(date_key) * 100 + DAYOFMONTH(date_key) AS cod_date,
                    YEAR(date_key) *100 + MONTH(date_key) AS cod_ano_mes,
                    MONTH(date_key) AS mes,
                    MONTHNAME(date_key) AS mes_nome,
                    DAYOFMONTH(date_key) AS dia,
                    CAST(YEAR(date_key) AS INT) AS ano
                FROM generate_date

        """).to_arrow_table()

In [6]:
dim_tempo.to_pandas()

Unnamed: 0,data,cod_date,cod_ano_mes,mes,mes_nome,dia,ano
0,1997-01-01,19970101,199701,1,January,1,1997
1,1997-01-02,19970102,199701,1,January,2,1997
2,1997-01-03,19970103,199701,1,January,3,1997
3,1997-01-04,19970104,199701,1,January,4,1997
4,1997-01-05,19970105,199701,1,January,5,1997
...,...,...,...,...,...,...,...
12412,2030-12-26,20301226,203012,12,December,26,2030
12413,2030-12-27,20301227,203012,12,December,27,2030
12414,2030-12-28,20301228,203012,12,December,28,2030
12415,2030-12-29,20301229,203012,12,December,29,2030


In [15]:
write_deltalake(
        f'{path_gold}/dim_tempo',
        dim_tempo,
        mode='overwrite',
        storage_options=storage_options
)

In [16]:
con.close()