In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
import requests
import pandas as pd
import time

# 1. Inicializa a sessão do Spark
spark = SparkSession.builder \
    .appName("OpenBreweryDB Ingestion") \
    .getOrCreate()

In [206]:
# 2. Função que faz a requisição para uma página da API
def request2dataframe(page, per_page=200):
    url = "https://api.openbrewerydb.org/v1/breweries"
    params = {
        "page": page,
        "per_page": per_page
    }
    try:
        response = requests.get(url, params=params, timeout=10)
        if response.status_code != 200:
            print(f"[!] Erro na página {page}: {response.status_code}")
            return None

        data = response.json()
        if not data:
            return None

        return pd.DataFrame(data)
    except Exception as e:
        print(f"[!] Exceção na página {page}: {e}")
        return None

In [211]:
# 3. Coleta de dados paginados
def coletar_dados():
    page = 1
    resultados = []
    while True:
        df = request2dataframe(page)
        if df is None or df.empty:
            break
        resultados.append(df)
        #print(f"[✓] Página {page} coletada com {len(df)} registros")
        page += 1
        time.sleep(0.2)  # Boa prática para evitar sobrecarga em APIs públicas

    if resultados:
        return pd.concat(resultados, ignore_index=True)
    else:
        return pd.DataFrame()

In [212]:
# 4. Obter os dados como DataFrame Pandas
dados_pd = coletar_dados()

# 5. Converter para DataFrame Spark
df_spark = spark.createDataFrame(dados_pd)

# 6. Exibir os dados
df_spark.show(20, truncate=False)

# 7. (Opcional) Salvar em CSV ou Parquet
# df_spark.write.mode("overwrite").parquet("dados_breweries.parquet")

# 8. Encerrar sessão Spark
spark.stop()

+------------------------------------+-------------------------------+------------+---------------------------+---------+---------+--------------+--------------+-----------+-------------+----------------+---------------+------------+----------------------------------+-------------+---------------------------+
|id                                  |name                           |brewery_type|address_1                  |address_2|address_3|city          |state_province|postal_code|country      |longitude       |latitude       |phone       |website_url                       |state        |street                     |
+------------------------------------+-------------------------------+------------+---------------------------+---------+---------+--------------+--------------+-----------+-------------+----------------+---------------+------------+----------------------------------+-------------+---------------------------+
|5128df48-79fc-4f0f-8b52-d06be54d0cec|(405) Brewing Co             

In [218]:
print(f"Total de registros no pandas DataFrame: {dados_pd.shape[0]}")


Total de registros no pandas DataFrame: 8408


In [226]:
!git config --global user.name "leticiaJFS"
!git config --global user.email "le-tyfagundes@hotmail.com"

In [225]:
!git clone https://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@github.com/leticiaJFS/BreweriesCase.git

Cloning into 'BreweriesCase'...


In [227]:
!git add .
!git commit -m "Adicionando camada Bronze"

fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git


In [228]:
%cd /content/BreweriesCase

/content/BreweriesCase


In [230]:

!ls -a

.  ..  .git


In [232]:
!git push origin main

error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/leticiaJFS/BreweriesCase.git'
[m

In [235]:
# Cria o branch 'main' (ou usa 'master', se preferir esse nome)
!git branch -M main

# Faz o push inicial e seta o branch remoto
!git push -u origin main


error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/leticiaJFS/BreweriesCase.git'
[m

In [234]:
!git push


error: src refspec refs/heads/main does not match any
[31merror: failed to push some refs to 'https://github.com/leticiaJFS/BreweriesCase.git'
[m