#### Importação de Pacotes

In [0]:
from pyspark.sql.functions import *
import requests, time

#### Definição do Unity Catalog e Schema

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS bees;
USE CATALOG bees;

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS brewery;
USE SCHEMA brewery;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS brewery_bronze
COMMENT 'Tabela Bronze dos dados de cervejarias consumidos da API openbrewerydb.org'

#### Consumo da API

In [0]:
url = 'https://api.openbrewerydb.org/v1/breweries'

In [0]:
def get_data(url, page=1):
    """
    Método para obter dados da API.
    Em caso de falha, realiza 3 tentativas com intervalo de 5 segundos.
    Args:
        url (str): URL da API
        page (int): Numero da pagina
    """
    for i in range(3):
        try:
            response = requests.get(f'{url}?page={page}&per_page=200')
            response.raise_for_status()
            return response.json()
        except:
            time.sleep(5)
    raise Exception(f"Erro durante requisicao: {url} - {response.status_code}")

In [0]:
list_breweries = []
page = 1
while True:
    data = get_data(url, page)
    if not data:
        break
    list_breweries.extend(data)
    page += 1

In [0]:
schema = """
    id STRING,
    name STRING,
    brewery_type STRING,
    address_1 STRING,
    address_2 STRING,
    address_3 STRING,
    city STRING,
    state_province STRING,
    postal_code STRING,
    country STRING,
    longitude DOUBLE,
    latitude DOUBLE,
    phone STRING,
    website_url STRING,
    state STRING,
    street STRING
"""
df = spark.createDataFrame(list_breweries, schema)

#### Comparação da extração com a quantidade informada no metadata

In [0]:
total_df = df.count()
total_metadata = get_data(f'{url}/meta')['total']
if total_df != total_metadata:
    raise Exception(
        f"Total de registros coletados diferente do total do metadata: {total_df} != {total_metadata}"
    )

#### Output tabela bronze

In [0]:
(df.write.format("delta")
         .mode("overwrite")
         .option("mergeSchema", True)
         .saveAsTable("brewery_bronze")
)