In [0]:
spark.sql("CREATE DATABASE IF NOT EXISTS bronze")
spark.sql("CREATE DATABASE IF NOT EXISTS silver")

path_base = "/Volumes/workspace/default/brazillian_e-commerce/" 

In [0]:
from pyspark.sql import functions as F
import requests
from datetime import datetime

def load_bronze(file_name, table_name):
    df = (spark.read
          .option("header", True)
          .option("inferSchema", True)
          .csv(path_base + file_name)
          .withColumn("ingestion_timestamp", F.current_timestamp()))
    
    df.write.mode("overwrite").saveAsTable(table_name)
    print(f"Tabela criada: {table_name} âœ… ({df.count()} linhas)")

load_bronze("olist_customers_dataset.csv", "bronze.ft_consumidores")
load_bronze("olist_geolocation_dataset.csv", "bronze.ft_geolocalizacao")
load_bronze("olist_order_items_dataset.csv", "bronze.ft_itens_pedidos")
load_bronze("olist_order_payments_dataset.csv", "bronze.ft_pagamentos_pedidos")
load_bronze("olist_order_reviews_dataset.csv", "bronze.ft_avaliacoes_pedidos")
load_bronze("olist_orders_dataset.csv", "bronze.ft_pedidos")
load_bronze("olist_products_dataset.csv", "bronze.ft_produtos")
load_bronze("olist_sellers_dataset.csv", "bronze.ft_vendedores")
load_bronze("product_category_name_translation.csv", "bronze.dm_categoria_produtos_traducao")

data_inicio = "01-01-2017"
data_fim = "12-31-2018"

url = f"https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/" \
      f"CotacaoDolarPeriodo(dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)?" \
      f"@dataInicial='{data_inicio}'&@dataFinalCotacao='{data_fim}'" \
      f"&$select=dataHoraCotacao,cotacaoCompra&$format=json"

response = requests.get(url)
dados = response.json()["value"]

df_dolar = spark.createDataFrame(dados)
df_dolar = df_dolar.withColumn("ingestion_timestamp", F.current_timestamp())

df_dolar.write.mode("overwrite").saveAsTable("bronze.dm_cotacao_dolar")

display(df_dolar.limit(5))

spark.sql("SHOW TABLES IN bronze").display()