## Step 1: Import Libraries

In [1]:
%run ../make_clean_names.py

In [2]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import logging
import concurrent.futures
from datetime import datetime
from typing import Dict, List
from concurrent.futures import ThreadPoolExecutor
import polars as pl

from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_session():
    session = requests.Session()
    retries = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504]
    )
    session.mount('https://', HTTPAdapter(max_retries=retries, pool_maxsize=10))
    return session

# Get API key from environment variables
FMP_API_KEY = os.getenv('FMP_API_KEY')
if not FMP_API_KEY:
    raise ValueError("FMP_API_KEY not found in environment variables")

## Step 2: Import Forex Pairs

In [3]:
def load_pairs(file_path: str) -> List[str]:
    """Load pairs from a text file"""
    try:
        with open(file_path, 'r') as f:
            pairs = [line.strip() for line in f if line.strip()]
        print(f"Loaded {len(pairs)} pairs from {file_path}")
        return pairs
    except Exception as e:
        print(f"Error loading pairs: {str(e)}")
        return []

pairs_file = '../fx_pairs.txt'
pairs = load_pairs(pairs_file)

if pairs:
    print("pairs:", pairs)
else:
    print("No pairs loaded.")

Loaded 4 pairs from ../fx_pairs.txt
pairs: ['USDEUR', 'USDBRL', 'USDGBP', 'USDJPY']


## Step 2: Extract Data from FMP into Polars

In [4]:
start_date = '2020-01-01'
end_date = datetime.today().strftime('%Y-%m-%d')

def fetch_forex_data(pair: str, api_key: str, session: requests.Session) -> Dict:
    """Fetch historical forex data for a single pair with pair identifier"""
    url = f"https://financialmodelingprep.com/api/v3/historical-chart/1day/{pair}"
    params = {
        "from": start_date,
        "to": end_date,
        "apikey": api_key
    }
        
    try:
        response = session.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        # Add pair identifier to each record
        return [{"pair": pair, **record} for record in data] if data else None
    except Exception as e:
        logger.error(f"Error fetching {pair}: {str(e)}")
        return None

def fetch_all_forex_data(pairs: List[str], api_key: str) -> List[Dict]:
    """Fetch and combine forex data for multiple pairs"""
    session = create_session()
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [
            executor.submit(fetch_forex_data, pair, api_key, session)
            for pair in pairs
        ]
        results = [
            f.result() for f in concurrent.futures.as_completed(futures)
        ]
    
    # Flatten results list and remove None values
    all_data = []
    for result in results:
        if result:
            all_data.extend(result)
    return all_data

# Execute fetching
fx_data = fetch_all_forex_data(pairs, FMP_API_KEY)

# Convert to Polars DataFrame if data exists
if fx_data:
    df = pl.DataFrame(fx_data)

## Step 3: Clean Column Names

In [5]:
df = make_clean_names(df)

In [6]:
df

pair,date,open,low,high,close,volume
str,str,f64,f64,f64,f64,i64
"""USDEUR""","""2025-01-08 00:00:00""",0.9666,0.9652,0.96945,0.96913,25230
"""USDEUR""","""2025-01-07 00:00:00""",0.9623,0.9586,0.9668,0.9666,82064
"""USDEUR""","""2025-01-06 00:00:00""",0.9697,0.9585,0.9711,0.9622,0
"""USDEUR""","""2025-01-05 00:00:00""",0.9702,0.9701,0.9702,0.9702,0
"""USDEUR""","""2025-01-03 00:00:00""",0.9742,0.9698,0.9743,0.9698,68388
…,…,…,…,…,…,…
"""USDJPY""","""2020-01-17 00:00:00""",110.16,110.046,110.29,110.149,129811
"""USDJPY""","""2020-01-16 00:00:00""",109.897,109.85,110.179,110.16,129501
"""USDJPY""","""2020-01-15 00:00:00""",109.982,109.789,110.011,109.897,155661
"""USDJPY""","""2020-01-14 00:00:00""",109.944,109.852,110.214,109.982,186928


## Step 4: Write Polars to Parquet

In [7]:
# Define the output directory
output_dir = "../../../data/finance"

# Write the processed DataFrame to a Parquet file
df.write_parquet(f'{output_dir}/historical_fx_quotes.parquet')

## Step 5: Read Parquet (Validate)

In [8]:
# Validate the output by reading the Parquet file and displaying the first few rows
pl.scan_parquet(f'{output_dir}/historical_fx_quotes.parquet').head().collect()

pair,date,open,low,high,close,volume
str,str,f64,f64,f64,f64,i64
"""USDEUR""","""2025-01-08 00:00:00""",0.9666,0.9652,0.96945,0.96913,25230
"""USDEUR""","""2025-01-07 00:00:00""",0.9623,0.9586,0.9668,0.9666,82064
"""USDEUR""","""2025-01-06 00:00:00""",0.9697,0.9585,0.9711,0.9622,0
"""USDEUR""","""2025-01-05 00:00:00""",0.9702,0.9701,0.9702,0.9702,0
"""USDEUR""","""2025-01-03 00:00:00""",0.9742,0.9698,0.9743,0.9698,68388
