In [0]:
%run /Workspace/Users/valterlafuentejunior@gmail.com/ProjetoMedalhao/00.config/config

In [0]:
from pyspark.sql.functions import current_date, date_format, col
import requests
import pandas as pd
from datetime import datetime
from delta.tables import DeltaTable
from pyspark.sql.types import *

In [0]:
tabela_destino = "workspace.bronze_etl.ipca"


In [0]:
# API do banco Central
URL = 'https://api.bcb.gov.br/dados/serie/bcdata.sgs.433/dados'



params = {
    'formato': 'json',
    'dataInicial': '01/01/2024',
    'dataFinal': datetime.now().strftime('%d/%m/%Y')  # Data atual no formato dia/mês/ano
}

# Chamada da API
response = requests.get(URL, params=params)
data = response.json()


In [0]:
# Conversão para dataframe
df = spark.createDataFrame(data)

In [0]:
df = df.toDF('data', 'ipca')
# Converte a coluna 'valor' de string para double (float)
df = df.withColumn("ipca", col("ipca").cast("double"))

In [0]:
df = df.withColumn("data_coleta", current_date())

In [0]:
# Merge delta lake
if df is not None and df.limit(1).count() > 0:
    print(f"Total de registros carregados: {df.count()}")

    if spark.catalog.tableExists(tabela_destino):
        print(f"Tabela {tabela_destino} já existe — atualizando dados...")
        delta_table = DeltaTable.forName(spark, tabela_destino)
        
        (
            delta_table.alias("t")
            .merge(
                df.alias("s"),
                "t.data_coleta = s.data_coleta AND t.data = s.data"
            )
            .whenMatchedUpdateAll()
            .whenNotMatchedInsertAll()
            .execute()
        )
    else:
        print(f"Tabela {tabela_destino} não existe — criando nova tabela...")
        df.write.format("delta").partitionBy("data_coleta").mode("append").saveAsTable(tabela_destino)
else:
    print("Nenhum dado para atualizar.")        