## Step 1: Import Libraries

In [1]:
%run ../make_clean_names.py

In [2]:
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import logging

import polars as pl
from typing import List, Dict, Any
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_session():
    session = requests.Session()
    retries = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504]
    )
    session.mount('https://', HTTPAdapter(max_retries=retries, pool_maxsize=10))
    return session

# Get API key from environment variables
FMP_API_KEY = os.getenv('FMP_API_KEY')
if not FMP_API_KEY:
    raise ValueError("FMP_API_KEY not found in environment variables")

## Step 2: Extract Data from FMP into Polars

In [3]:
def fetch_ma_data(api_key: str, session: requests.Session, start_page: int = 0, end_page: int = 5) -> List[Dict]:
    """
    Fetch historical M&A data from FMP API with pagination
    Args:
        api_key: FMP API key
        session: requests session
        start_page: starting page number (default 0)
        end_page: ending page number (default 5)
    """
    all_results = []
    url = "https://financialmodelingprep.com/api/v4/mergers-acquisitions-rss-feed"
    
    for page in range(start_page, end_page + 1):
        params = {
            "page": page,
            "apikey": api_key
        }
        
        try:
            logger.info(f"Fetching page {page}")
            response = session.get(url, params=params, timeout=10)
            response.raise_for_status()
            page_data = response.json()
            all_results.extend(page_data)
        except Exception as e:
            logger.error(f"Error fetching page {page}: {str(e)}")
            continue
            
    return all_results

session = create_session()

# Fetch data from all pages
ma_data = fetch_ma_data(FMP_API_KEY, session)

# Convert to Polars DataFrame if data exists
if ma_data:
    df = pl.DataFrame(ma_data)

INFO:__main__:Fetching page 0


ERROR:__main__:Error fetching page 0: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v4/mergers-acquisitions-rss-feed?page=0&apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


INFO:__main__:Fetching page 1


INFO:__main__:Fetching page 2


INFO:__main__:Fetching page 3


INFO:__main__:Fetching page 4


INFO:__main__:Fetching page 5


## Step 3: Clean Column Names

In [4]:
df = make_clean_names(df)

## Step 4: Write Polars to Parquet

In [5]:
# Define the output directory
output_dir = "../../../data/finance"

# Write the processed DataFrame to a Parquet file
df.write_parquet(f'{output_dir}/historical_ma_transactions.parquet')

## Step 5: Read Parquet (Validate)

In [6]:
# Validate the output by reading the Parquet file and displaying the first few rows
pl.scan_parquet(f'{output_dir}/historical_ma_transactions.parquet').head().collect()

company_name,cik,symbol,targeted_company_name,targeted_cik,targeted_symbol,transaction_date,acceptance_time,url
str,str,str,str,str,str,str,str,str
"""SWIFTMERGE ACQUISITION CORP.""","""0001845123""","""IVCPU""","""SPAC AT""",,"""ANNA""","""2024-07-03""","""2024-07-03 21:26:35""","""https://www.sec.gov/Archives/e…"
"""SWIFTMERGE ACQUISITION CORP.""","""0001845123""","""IVCP""","""SPAC AT""",,"""ANNA""","""2024-07-03""","""2024-07-03 21:26:35""","""https://www.sec.gov/Archives/e…"
"""SWIFTMERGE ACQUISITION CORP.""","""0001845123""","""IVCPW""","""SPAC AT""",,"""ANNA""","""2024-07-03""","""2024-07-03 21:26:35""","""https://www.sec.gov/Archives/e…"
"""UNITED BANKSHARES INC/WV""","""0000729986""","""UBSI""","""PIEDMONT BANK""",,,"""2024-07-03""","""2024-07-03 14:12:13""","""https://www.sec.gov/Archives/e…"
"""ACRI CAPITAL MERGER SUB I INC.""","""0002013807""","""FOXX""","""ACRI CAPITAL ACQUISITION CORPO…","""0001914023""","""ACACU""","""2024-06-28""","""2024-06-28 17:29:32""","""https://www.sec.gov/Archives/e…"
