#### Download and Process Copom Minutes

This notebook automates the process of extracting, processing, and downloading the minutes from the Brazilian Central Bank's Monetary Policy Committee (Copom).
 * It connects to the Brazilian Central Bank's API to get information about the latest Copom minutes.
 * It iterates through the extracted data to download the English version of each Copom minute's PDF and saves it to a local folder.

In [1]:
import requests
import pandas as pd

In [4]:
FOLDER_RAW = "./data/raw"

In [5]:
# Extracting and saving info about the Copom Minutes from the Brazilian Central Bank API.
url = "https://www.bcb.gov.br/api/servico/sitebcb/copomminutes/ultimas?quantidade=1000&filtro="

response = requests.get(url)
response.json()["conteudo"]

df_minutes_info = pd.DataFrame(response.json()["conteudo"])
df_minutes_info = df_minutes_info[["DataReferencia", "Titulo", "LinkPagina", "Url"]]
df_minutes_info = df_minutes_info[df_minutes_info["Titulo"] != "Changes in Copom meetings"]

df_minutes_info.to_excel(f"{FOLDER_RAW}/minutes_info.xlsx", index=False)

In [None]:
def download_pdf(url: str, save_folder: str, file_name: str) -> None:
    """
    Download a PDF file from a URL and save it to a specified folder with a given file name.
    
    Parameters:
    - url (str): The URL of the PDF file to download.
    - save_folder (str): The folder where the PDF file will be saved.
    - file_name (str): The name to save the PDF file as.
    """
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status() 

        with open(f"{save_folder}/{file_name}", 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"[INFO] Download successful: {file_name}!")

    except requests.exceptions.RequestException as e:
        print(f"[INFO] Error downloading {file_name}: {e}")

In [None]:
# Iterating df_minutes_info to download the english version of Copom Minutes from the Brazilian Central Bank website.

save_folder_path = f"{FOLDER_RAW}/copom_minutes_raw"
bcb_url = "https://www.bcb.gov.br"

for index, row in df_minutes_info.iterrows():
    file_url = f"https://www.bcb.gov.br{row['Url']}"
    download_pdf(file_url, save_folder_path, f"{row['Titulo']}.pdf")