In [15]:
# Fetch Brazilian Economic Data from BCB API
import pandas as pd
import requests
import numpy as np
from datetime import datetime, timedelta

print("Fetching Brazilian Economic Data from Central Bank API...")
print("Time range: 2016-09-04 to 2018-10-17")
print("=" * 60)

# BCB API endpoints and series codes
BCB_BASE_URL = "https://api.bcb.gov.br/dados/serie/bcdata.sgs"
START_DATE = "04/09/2016"  # Your data start date
END_DATE = "17/10/2018"    # Your data end date

# Economic indicators with their BCB series codes
economic_series = {
    'usd_brl_rate': {
        'code': 1,
        'name': 'USD/BRL Exchange Rate',
        'frequency': 'daily'
    },
    'selic_rate': {
        'code': 11, 
        'name': 'SELIC Interest Rate',
        'frequency': 'irregular'  # Updated when rate changes
    },
    'selic_target': {
        'code': 432,
        'name': 'SELIC Target Rate', 
        'frequency': 'irregular'
    },
    'ipca_inflation': {
        'code': 433,
        'name': 'IPCA Inflation (Monthly)',
        'frequency': 'monthly'
    }
}

def fetch_bcb_series(series_code, start_date, end_date):
    """Fetch data from Brazilian Central Bank API"""
    
    url = f"{BCB_BASE_URL}.{series_code}/dados"
    params = {
        'formato': 'json',
        'dataInicial': start_date,
        'dataFinal': end_date
    }
    
    try:
        print(f"Fetching series {series_code}...")
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        data = response.json()
        
        if data:
            df = pd.DataFrame(data)
            df['data'] = pd.to_datetime(df['data'], format='%d/%m/%Y')
            df['valor'] = pd.to_numeric(df['valor'], errors='coerce')
            df = df.dropna()  # Remove any invalid values
            
            print(f"✅ Series {series_code}: {len(df)} observations")
            return df
        else:
            print(f"❌ Series {series_code}: No data returned")
            return None
            
    except Exception as e:
        print(f"❌ Error fetching series {series_code}: {e}")
        return None

# Fetch all economic series
economic_data = {}

for key, series_info in economic_series.items():
    df = fetch_bcb_series(series_info['code'], START_DATE, END_DATE)
    if df is not None:
        df.columns = ['date', key]
        economic_data[key] = df
        
        print(f"   Range: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}")
        print(f"   Sample values: {df[key].head(3).tolist()}")

print(f"\n📊 Successfully fetched {len(economic_data)} economic series")

# =============================================================================
# CREATE COMPREHENSIVE ECONOMIC LOOKUP TABLE
# =============================================================================

print("\n" + "=" * 60)
print("CREATING DAILY ECONOMIC LOOKUP TABLE")
print("=" * 60)

# Create date range for your order data
date_range = pd.date_range(start='2016-09-04', end='2018-10-17', freq='D')
economic_lookup = pd.DataFrame({'purchase_date': date_range})

print(f"Created date range: {len(economic_lookup)} days")

# Merge each economic series
for key, df in economic_data.items():
    series_info = economic_series[key]
    
    # Forward-fill for indicators that don't change daily
    if series_info['frequency'] in ['irregular', 'monthly']:
        # Sort by date and forward-fill to get the rate in effect each day
        df_sorted = df.sort_values('date')
        
        # Create a complete date range and forward-fill
        full_range = pd.DataFrame({'date': date_range})
        merged = full_range.merge(df_sorted, on='date', how='left')
        merged[key] = merged[key].fillna(method='ffill')  # Forward fill
        
        economic_lookup[key] = merged[key].values
        
        print(f"✅ {series_info['name']}: Forward-filled for daily coverage")
        
    else:
        # For daily series, merge directly
        economic_lookup = economic_lookup.merge(
            df, left_on='purchase_date', right_on='date', how='left'
        ).drop('date', axis=1)
        
        print(f"✅ {series_info['name']}: Direct daily merge")

# Fill any remaining gaps with interpolation
for key in economic_data.keys():
    if key in economic_lookup.columns:
        # Linear interpolation for small gaps
        economic_lookup[key] = economic_lookup[key].interpolate(method='linear')
        
        # Forward fill any remaining NaNs at the start
        economic_lookup[key] = economic_lookup[key].fillna(method='ffill')
        
        # Backward fill any remaining NaNs at the end  
        economic_lookup[key] = economic_lookup[key].fillna(method='bfill')

# =============================================================================
# ADD DERIVED ECONOMIC FEATURES
# =============================================================================

print("\nAdding derived economic features...")

# Exchange rate features
if 'usd_brl_rate' in economic_lookup.columns:
    # Rate of change (daily)
    economic_lookup['usd_brl_change'] = economic_lookup['usd_brl_rate'].pct_change()
    
    # Volatility (7-day rolling standard deviation)
    economic_lookup['usd_brl_volatility'] = economic_lookup['usd_brl_rate'].rolling(7).std()
    
    # High/low flags (compared to 30-day average)
    economic_lookup['usd_brl_30day_avg'] = economic_lookup['usd_brl_rate'].rolling(30).mean()
    economic_lookup['usd_brl_high'] = (economic_lookup['usd_brl_rate'] > economic_lookup['usd_brl_30day_avg'] * 1.05).astype(int)
    economic_lookup['usd_brl_low'] = (economic_lookup['usd_brl_rate'] < economic_lookup['usd_brl_30day_avg'] * 0.95).astype(int)

# Interest rate features
if 'selic_target' in economic_lookup.columns:
    # Rate change events
    economic_lookup['selic_changed'] = (economic_lookup['selic_target'].diff() != 0).astype(int)
    
    # High interest rate periods (above median)
    median_selic = economic_lookup['selic_target'].median()
    economic_lookup['high_interest_period'] = (economic_lookup['selic_target'] > median_selic).astype(int)

# =============================================================================
# SUMMARY AND VALIDATION
# =============================================================================

print(f"\n" + "=" * 60)
print("ECONOMIC DATA SUMMARY")
print("=" * 60)

print(f"Economic lookup table shape: {economic_lookup.shape}")
print(f"Date range: {economic_lookup['purchase_date'].min()} to {economic_lookup['purchase_date'].max()}")

# Show summary statistics
print(f"\nEconomic indicators summary:")
for key in economic_data.keys():
    if key in economic_lookup.columns:
        series = economic_lookup[key]
        print(f"{key}:")
        print(f"  Mean: {series.mean():.4f}")
        print(f"  Min: {series.min():.4f} | Max: {series.max():.4f}")
        print(f"  Missing values: {series.isna().sum()}")

# Show sample data
print(f"\nSample of economic lookup table:")
sample_cols = ['purchase_date'] + [col for col in economic_lookup.columns if col != 'purchase_date'][:5]
print(economic_lookup[sample_cols].head(10).to_string(index=False))

# Save economic lookup table
economic_lookup.to_csv("../data/economic_indicators.csv", index=False)
print(f"\nEconomic lookup table saved to: economic_indicators.csv")

print(f"\nColumns in economic lookup table:")
for i, col in enumerate(economic_lookup.columns, 1):
    print(f"{i:2d}. {col}")

print(f"\nTo link with orders data:")
print("JOIN orders ON orders.purchase_date = economic_indicators.date")

print(f"\n🎯 BUSINESS IMPACT:")
print("- USD/BRL rate affects purchasing power for imported goods")
print("- SELIC rate impacts credit purchases and consumer spending")  
print("- Rate changes create economic uncertainty periods")
print("- High volatility periods may reduce consumer confidence")


Fetching Brazilian Economic Data from Central Bank API...
Time range: 2016-09-04 to 2018-10-17
Fetching series 1...
✅ Series 1: 530 observations
   Range: 2016-09-05 to 2018-10-17
   Sample values: [3.2721, 3.2452, 3.1934]
Fetching series 11...
✅ Series 11: 530 observations
   Range: 2016-09-05 to 2018-10-17
   Sample values: [0.052531, 0.052531, 0.052531]
Fetching series 432...
✅ Series 432: 774 observations
   Range: 2016-09-04 to 2018-10-17
   Sample values: [14.25, 14.25, 14.25]
Fetching series 433...
✅ Series 433: 26 observations
   Range: 2016-09-01 to 2018-10-01
   Sample values: [0.08, 0.26, 0.18]

📊 Successfully fetched 4 economic series

CREATING DAILY ECONOMIC LOOKUP TABLE
Created date range: 774 days
✅ USD/BRL Exchange Rate: Direct daily merge
✅ SELIC Interest Rate: Forward-filled for daily coverage
✅ SELIC Target Rate: Forward-filled for daily coverage
✅ IPCA Inflation (Monthly): Forward-filled for daily coverage

Adding derived economic features...

ECONOMIC DATA SUMMARY


  merged[key] = merged[key].fillna(method='ffill')  # Forward fill
  merged[key] = merged[key].fillna(method='ffill')  # Forward fill
  merged[key] = merged[key].fillna(method='ffill')  # Forward fill
  economic_lookup[key] = economic_lookup[key].fillna(method='ffill')
  economic_lookup[key] = economic_lookup[key].fillna(method='bfill')


In [16]:
economic_df = pd.read_csv("../data/economic_indicators.csv")
economic_df.columns

Index(['purchase_date', 'usd_brl_rate', 'selic_rate', 'selic_target',
       'ipca_inflation', 'usd_brl_change', 'usd_brl_volatility',
       'usd_brl_30day_avg', 'usd_brl_high', 'usd_brl_low', 'selic_changed',
       'high_interest_period'],
      dtype='object')

In [18]:
try:
    economic_df = pd.read_csv("../data/economic_indicators.csv")
    economic_df['purchase_date'] = pd.to_datetime(economic_df['purchase_date'])
    print(f"Loaded economic data: {economic_df.shape}")
except FileNotFoundError:
    print("Economic indicators file not found. Run the economic data collection code first.")
    exit()

print("\n" + "=" * 60)
print("ECONOMIC DATA OVERVIEW")
print("=" * 60)

print(f"Date range: {economic_df['purchase_date'].min()} to {economic_df['purchase_date'].max()}")
print(f"Total days: {len(economic_df)}")

print(f"\nColumns in dataset:")
for i, col in enumerate(economic_df.columns, 1):
    print(f"{i:2d}. {col}")

print(f"\nData types:")
print(economic_df.dtypes)

print(f"\n" + "=" * 60)
print("SAMPLE DATA")
print("=" * 60)

print(f"First 10 rows:")
print(economic_df.head(10).to_string(index=False))

print(f"\nLast 10 rows:")
print(economic_df.tail(10).to_string(index=False))

print(f"\nRandom sample of 10 rows:")
print(economic_df.sample(10).sort_values('purchase_date').to_string(index=False))


Loaded economic data: (774, 12)

ECONOMIC DATA OVERVIEW
Date range: 2016-09-04 00:00:00 to 2018-10-17 00:00:00
Total days: 774

Columns in dataset:
 1. purchase_date
 2. usd_brl_rate
 3. selic_rate
 4. selic_target
 5. ipca_inflation
 6. usd_brl_change
 7. usd_brl_volatility
 8. usd_brl_30day_avg
 9. usd_brl_high
10. usd_brl_low
11. selic_changed
12. high_interest_period

Data types:
purchase_date           datetime64[ns]
usd_brl_rate                   float64
selic_rate                     float64
selic_target                   float64
ipca_inflation                 float64
usd_brl_change                 float64
usd_brl_volatility             float64
usd_brl_30day_avg              float64
usd_brl_high                     int64
usd_brl_low                      int64
selic_changed                    int64
high_interest_period             int64
dtype: object

SAMPLE DATA
First 10 rows:
purchase_date  usd_brl_rate  selic_rate  selic_target  ipca_inflation  usd_brl_change  usd_brl_volatili

In [19]:
print(f"\n" + "=" * 60)
print("DATA QUALITY CHECK")
print("=" * 60)

print(f"Missing values per column:")
missing_counts = economic_df.isnull().sum()
for col, count in missing_counts.items():
    if count > 0:
        print(f"  {col}: {count} missing ({count/len(economic_df)*100:.1f}%)")

if missing_counts.sum() == 0:
    print("  No missing values found")

print(f"\nDuplicate dates: {economic_df['purchase_date'].duplicated().sum()}")

# Check for gaps in dates
date_gaps = economic_df['purchase_date'].diff().dt.days
gaps = date_gaps[date_gaps > 1]
if len(gaps) > 0:
    print(f"Date gaps found: {len(gaps)} instances")
else:
    print("No date gaps found")



DATA QUALITY CHECK
Missing values per column:
  usd_brl_change: 1 missing (0.1%)
  usd_brl_volatility: 6 missing (0.8%)
  usd_brl_30day_avg: 29 missing (3.7%)

Duplicate dates: 0
No date gaps found


Economic Indicators (Source: Brazilian Central Bank API)

usd_brl_rate - Daily USD/BRL exchange rate
selic_rate - SELIC interest rate (when changed)
selic_target - SELIC target rate
ipca_inflation - Monthly IPCA inflation rate
usd_brl_change - Daily exchange rate change percentage
usd_brl_volatility - 7-day rolling volatility
usd_brl_high - Above 30-day average flag
usd_brl_low - Below 30-day average flag
selic_changed - Interest rate change event flag
high_interest_period - Above median rate flag