## BACEN: 

In [1]:
import os
import requests
import pandas as pd
from datetime import date, timedelta
from pandas_gbq import to_gbq
from google.oauth2 import service_account
from dotenv import load_dotenv

# --- Configuration ---
# Load environment variables from .env file
load_dotenv()

# GCP project and table details
PROJECT_ID = 'menu-engineering-466520'
TABLE_ID = 'raw_external.bacen_dollar_exchange_rate'

# Date range for the data extraction (based on our inspection)
START_DATE = date(2010, 2, 27)
END_DATE = date(2023, 1, 15)

# The BACEN API is more stable with smaller date range chunks
CHUNK_DAYS = 365

# Explicitly load credentials from the JSON file specified in .env
credentials_path = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
try:
    credentials = service_account.Credentials.from_service_account_file(credentials_path)
    print("Successfully loaded service account credentials.")
except Exception as e:
    print(f"Error loading credentials: {e}")
    credentials = None

# --- Main Execution ---
if credentials:
    all_data = []
    current_start = START_DATE

    print(f"\n--- Starting data extraction from BACEN API ({START_DATE} to {END_DATE}) ---")
    while current_start <= END_DATE:
        current_end = current_start + timedelta(days=CHUNK_DAYS)
        if current_end > END_DATE:
            current_end = END_DATE

        # Format dates for the API URL: MM-DD-YYYY
        start_str = current_start.strftime('%m-%d-%Y')
        end_str = current_end.strftime('%m-%d-%Y')

        # Construct the API URL
        url = f"https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/CotacaoDolarPeriodo(dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)?@dataInicial='{start_str}'&@dataFinalCotacao='{end_str}'&$format=json"
        
        try:
            # Make the API request
            response = requests.get(url, timeout=30) # Added a timeout for safety
            
            if response.status_code == 200:
                data = response.json().get('value', [])
                if data:
                    all_data.extend(data)
                    print(f"Successfully fetched {len(data)} records for period {start_str} to {end_str}")
                else:
                    print(f"No data returned for period {start_str} to {end_str}")
            else:
                print(f"Failed to fetch data for period {start_str} to {end_str}. Status code: {response.status_code}")
        
        except requests.exceptions.RequestException as e:
            print(f"An error occurred during API request for period {start_str} to {end_str}: {e}")

        # Move to the next period
        current_start = current_end + timedelta(days=1)

    # --- Process and Load Data to BigQuery ---
    if all_data:
        # Convert the collected JSON data to a Pandas DataFrame
        df = pd.DataFrame(all_data)

        # Select and rename relevant columns for clarity
        df = df[['cotacaoVenda', 'dataHoraCotacao']]
        df.rename(columns={
            'cotacaoVenda': 'usd_exchange_rate_sell',
            'dataHoraCotacao': 'exchange_date_time'
        }, inplace=True)

        # Convert columns to the correct data types
        df['usd_exchange_rate_sell'] = pd.to_numeric(df['usd_exchange_rate_sell'])
        df['exchange_date_time'] = pd.to_datetime(df['exchange_date_time'], utc=True)
        
        # Create a pure DATE column for easier joins
        df['exchange_date'] = df['exchange_date_time'].dt.date

        print(f"\n--- Preparing to upload {len(df)} rows to BigQuery table {TABLE_ID}... ---")
        
        try:
            # Load the DataFrame to BigQuery
            to_gbq(
                df,
                destination_table=TABLE_ID,
                project_id=PROJECT_ID,
                credentials=credentials,
                if_exists='replace' # Use 'replace' to overwrite the table on each run, making the script rerunnable
            )
            print("✅ Upload to BigQuery complete!")
        except Exception as e:
            print(f"❌ Failed to upload data to BigQuery: {e}")
            
    else:
        print("\nNo data was fetched from the API. Nothing to upload.")

Successfully loaded service account credentials.

--- Starting data extraction from BACEN API (2010-02-27 to 2023-01-15) ---
Successfully fetched 253 records for period 02-27-2010 to 02-27-2011
Successfully fetched 251 records for period 02-28-2011 to 02-28-2012
Successfully fetched 251 records for period 02-29-2012 to 02-28-2013
Successfully fetched 255 records for period 03-01-2013 to 03-01-2014
Successfully fetched 251 records for period 03-02-2014 to 03-02-2015
Successfully fetched 251 records for period 03-03-2015 to 03-02-2016
Successfully fetched 253 records for period 03-03-2016 to 03-03-2017
Successfully fetched 248 records for period 03-04-2017 to 03-04-2018
Successfully fetched 251 records for period 03-05-2018 to 03-05-2019
Successfully fetched 254 records for period 03-06-2019 to 03-05-2020
Successfully fetched 250 records for period 03-06-2020 to 03-06-2021
Successfully fetched 252 records for period 03-07-2021 to 03-07-2022
Successfully fetched 217 records for period 03-