## Step 1: Import Libraries

In [1]:
%run ../make_clean_names.py

In [2]:
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import logging

import polars as pl
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_session():
    session = requests.Session()
    retries = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504]
    )
    session.mount('https://', HTTPAdapter(max_retries=retries, pool_maxsize=10))
    return session

# Get API key from environment variables
FMP_API_KEY = os.getenv('FMP_API_KEY')
if not FMP_API_KEY:
    raise ValueError("FMP_API_KEY not found in environment variables")

## Step 2: Import Symbols

In [3]:
def load_symbols(file_path: str) -> List[str]:
    """Load symbols from a text file"""
    try:
        with open(file_path, 'r') as f:
            symbols = [line.strip() for line in f if line.strip()]
        print(f"Loaded {len(symbols)} symbols from {file_path}")
        return symbols
    except Exception as e:
        print(f"Error loading symbols: {str(e)}")
        return []

symbols_file = '../tickers.txt'
symbols = load_symbols(symbols_file)

if symbols:
    print("Symbols:", symbols)
else:
    print("No symbols loaded.")

Loaded 60 symbols from ../tickers.txt
Symbols: ['AAPL', 'MSFT', 'GOOGL', 'GOOG', 'META', 'NVDA', 'AVGO', 'ORCL', 'CRM', 'ACN', 'ADBE', 'CSCO', 'INTC', 'NFLX', 'DIS', 'CMCSA', 'VZ', 'T', 'AMZN', 'TSLA', 'HD', 'MCD', 'NKE', 'SBUX', 'TGT', 'LOW', 'WMT', 'PG', 'KO', 'PEP', 'COST', 'BRK-B', 'JPM', 'BAC', 'WFC', 'GS', 'MS', 'BLK', 'UNH', 'JNJ', 'PFE', 'ABBV', 'MRK', 'LLY', 'CAT', 'BA', 'HON', 'UPS', 'RTX', 'GE', 'XOM', 'CVX', 'COP', 'SLB', 'LIN', 'APD', 'ECL', 'PLD', 'AMT', 'CCI']


## Step 3: Extract Data from FMP into Polars

In [4]:
def fetch_company_profile(symbol: str, api_key: str, session: requests.Session) -> Dict:
    """Fetch company profile data from FMP API"""
    url = f"https://financialmodelingprep.com/api/v3/profile/{symbol}"
    params = {"apikey": api_key}
    
    try:
        response = session.get(url, params=params, timeout=10)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        logger.error(f"Error fetching {symbol}: {str(e)}")
        return None

def fetch_all_profiles(symbols: List[str], api_key: str) -> List[Dict]:
    session = create_session()
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [
            executor.submit(fetch_company_profile, symbol, api_key, session)
            for symbol in symbols
        ]
        results = [
            f.result() for f in concurrent.futures.as_completed(futures)
        ]
    
    return [r[0] for r in results if r and isinstance(r, list)]

# Execute fetching
company_profiles = fetch_all_profiles(symbols, FMP_API_KEY)

# Convert to Polars DataFrame more efficiently
if company_profiles:
    df = pl.DataFrame(company_profiles)

ERROR:__main__:Error fetching AAPL: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/AAPL?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching GOOG: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/GOOG?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching ORCL: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/ORCL?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching ACN: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/ACN?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching META: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/META?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching MSFT: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/MSFT?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching NVDA: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/NVDA?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching AVGO: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/AVGO?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching CRM: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/CRM?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching GOOGL: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/GOOGL?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching INTC: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/INTC?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching NFLX: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/NFLX?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching ADBE: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/ADBE?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching CSCO: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/CSCO?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching DIS: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/DIS?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching VZ: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/VZ?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching CMCSA: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/CMCSA?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching T: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/T?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching AMZN: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/AMZN?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching TSLA: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/TSLA?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching NKE: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/NKE?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching TGT: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/TGT?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching HD: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/profile/HD?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI (Caused by ResponseError('too many 429 error responses'))


## Step 4: Clean Column Names

In [5]:
df = make_clean_names(df)

## Step 5: Write Polars to Parquet

In [6]:
output_dir = "../../../data/finance"

# Write DataFrame to Parquet
df.write_parquet(f'{output_dir}/company_profile.parquet')

## Step 6: Read Parquet (Validate)

In [7]:
pl.scan_parquet(f'{output_dir}/company_profile.parquet').head().collect()

symbol,price,beta,vol_avg,mkt_cap,last_div,range,changes,company_name,currency,cik,isin,cusip,exchange,exchange_short_name,industry,website,description,ceo,sector,country,full_time_employees,phone,address,city,state,zip,dcf_diff,dcf,image,ipo_date,default_image,is_etf,is_actively_trading,is_adr,is_fund
str,f64,f64,i64,i64,f64,str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,f64,str,str,bool,bool,bool,bool,bool
"""COST""",919.75,0.789,1912049,408276105250,4.5,"""675.96-1008.25""",-3.75,"""Costco Wholesale Corporation""","""USD""","""0000909832""","""US22160K1051""","""22160K105""","""NASDAQ Global Select""","""NASDAQ""","""Discount Stores""","""https://www.costco.com""","""Costco Wholesale Corporation, …","""Mr. Ron M. Vachris""","""Consumer Defensive""","""US""","""333000""","""425 313 8100""","""999 Lake Drive""","""Issaquah""","""WA""","""98027""",470.40803,449.341969,"""https://images.financialmodeli…","""1986-07-09""",False,False,True,False,False
"""BRK-B""",462.81,0.871,4006022,998414907743,0.0,"""358.3-491.67""",4.3,"""Berkshire Hathaway Inc.""","""USD""","""0001067983""","""US0846707026""","""084670702""","""New York Stock Exchange""","""NYSE""","""Insurance - Diversified""","""https://www.berkshirehathaway.…","""Berkshire Hathaway Inc., throu…","""Mr. Warren E. Buffett""","""Financial Services""","""US""","""396500""","""402 346 1400""","""3555 Farnam Street""","""Omaha""","""NE""","""68131""",255.2737,207.536301,"""https://images.financialmodeli…","""1996-05-09""",False,False,True,False,False
"""JPM""",254.27,1.091,8903742,715856501800,4.8,"""165.24-257.035""",1.92,"""JPMorgan Chase & Co.""","""USD""","""0000019617""","""US46625H1005""","""46625H100""","""New York Stock Exchange""","""NYSE""","""Banks - Diversified""","""https://www.jpmorganchase.com""","""JPMorgan Chase & Co. operates …","""Mr. James Dimon""","""Financial Services""","""US""","""316043""","""212 270 6000""","""383 Madison Avenue""","""New York""","""NY""","""10179""",72.85032,181.419675,"""https://images.financialmodeli…","""1980-03-17""",False,False,True,False,False
"""MCD""",279.74,0.735,3642327,200467278800,6.78,"""243.53-317.9""",-2.56,"""McDonald's Corporation""","""USD""","""0000063908""","""US5801351017""","""580135101""","""New York Stock Exchange""","""NYSE""","""Restaurants""","""https://corporate.mcdonalds.co…","""McDonald's Corporation operate…","""Mr. Christopher J. Kempczinski""","""Consumer Cyclical""","""US""","""100000""","""630 623 3000""","""110 North Carpenter Street""","""Chicago""","""IL""","""60607""",-4.76954,284.509539,"""https://images.financialmodeli…","""1965-04-21""",False,False,True,False,False
"""BAC""",46.64,1.325,34122449,357863123200,1.0,"""31.4-48.08""",-0.46,"""Bank of America Corporation""","""USD""","""0000070858""","""US0605051046""","""060505104""","""New York Stock Exchange""","""NYSE""","""Banks - Diversified""","""https://www.bankofamerica.com""","""Bank of America Corporation, t…","""Mr. Brian Thomas Moynihan""","""Financial Services""","""US""","""213000""","""704 386 5681""","""Bank of America Corporate Cent…","""Charlotte""","""NC""","""28255""",14.68456,31.955438,"""https://images.financialmodeli…","""1973-02-21""",False,False,True,False,False
