In [0]:
catalogo = "medalhao"
bronze_db_name = "bronze"

In [0]:
from pyspark.sql import functions as F

def ingest_csv(nome_arquivo, nome_tabela):
   
    try:
        table_name = nome_tabela
        landing_path = f"/Volumes/medalhao/default/landing/{nome_arquivo}"

        # Leitura do arquivo CSV
        df = spark.read.csv(landing_path, header=True, inferSchema=True)

        # Validação: arquivo vazio
        if df.count() == 0:
            raise ValueError(f"O arquivo {nome_arquivo} está vazio ou não pôde ser lido.")

        # Adiciona timestamp de ingestão
        df_with_metadata = df.withColumn("ingestion_timestamp", F.current_timestamp())

        # Escrita no formato Delta
        df_with_metadata.write.format("delta").mode("append").saveAsTable(f"{catalogo}.{bronze_db_name}.{table_name}")

        print(f"✅ Tabela bronze.{nome_tabela} criada com sucesso!\n")

    except Exception as e:
        print(f"Erro ao processar {nome_tabela}: {str(e)}")

In [0]:
ingest_csv("olist_customers_dataset.csv", "ft_consumidores")
ingest_csv("olist_geolocation_dataset.csv", "ft_geolocalizacao")
ingest_csv("olist_order_items_dataset.csv", "ft_itens_pedidos")
ingest_csv("olist_order_payments_dataset.csv", "ft_pagamentos_pedidos")
ingest_csv("olist_order_reviews_dataset.csv", "ft_avaliacoes_pedidos")
ingest_csv("olist_orders_dataset.csv", "ft_pedidos")
ingest_csv("olist_products_dataset.csv", "ft_produtos")
ingest_csv("olist_sellers_dataset.csv", "ft_vendedores")
ingest_csv("product_category_name_translation.csv", "dm_categoria_produtos_traducao")

In [0]:
import requests
import json
from pyspark.sql.functions import udf, col
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

def call_api(url_param):
    try:
        response = requests.get(url_param, timeout=30)
        response.raise_for_status()  # Raise an exception for HTTP errors
        return response.text
    except requests.exceptions.RequestException as e:
        return f"Error: {e}"
        

In [0]:
data_inicio_formatada = "11-27-2017"
data_fim_formatada = "11-28-2017"

import requests

url = (
    f"https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/CotacaoDolarPeriodo"
    f"(dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)"
    f"?@dataInicial='{data_inicio_formatada}'"
    f"&@dataFinalCotacao='{data_fim_formatada}'"
    f"&$select=dataHoraCotacao,cotacaoCompra&$format=json"
)

headers = {
    'Cookie': 'BIGipServer~was_p_as3~was_p~pool_was_443_p=4275048876.47873.0000; JSESSIONID=0000HtHVCLHTsK-EWG0R60uBL2U:1dof89mke'
}

response = requests.get(url, headers=headers)

print(response.text)

In [0]:
import http.client

conn = http.client.HTTPSConnection("olinda.bcb.gov.br")
payload = ''
headers = {
  'Cookie': 'BIGipServer~was_p_as3~was_p~pool_was_443_p=4275048876.47873.0000; JSESSIONID=0000uF5a70eO9nroez4e7WKOia5:1cn7jtfnj'
}
conn.request("GET", "/olinda/servico/PTAX/versao/v1/odata/CotacaoDolarPeriodo(dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)?@dataInicial='11-27-2017'&@dataFinalCotacao='12-04-2017'&$top=100&$format=json&$select=cotacaoCompra,dataHoraCotacao", payload, headers)
res = conn.getresponse()
data = res.read()
print(data.decode("utf-8"))

In [0]:
from pyspark.sql import Row

df_cotacao_dolar = spark.createDataFrame([Row(data_cotacao_dolar=3.2)])