## Step 1: Import Libraries

In [1]:
%run ../make_clean_names.py

In [2]:
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import logging

import polars as pl
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_session():
    session = requests.Session()
    retries = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504]
    )
    session.mount('https://', HTTPAdapter(max_retries=retries, pool_maxsize=10))
    return session

# Get API key from environment variables
FMP_API_KEY = os.getenv('FMP_API_KEY')
if not FMP_API_KEY:
    raise ValueError("FMP_API_KEY not found in environment variables")

## Step 2: Import Symbols

In [3]:
def load_symbols(file_path: str) -> List[str]:
    """Load symbols from a text file"""
    try:
        with open(file_path, 'r') as f:
            symbols = [line.strip() for line in f if line.strip()]
        print(f"Loaded {len(symbols)} symbols from {file_path}")
        return symbols
    except Exception as e:
        print(f"Error loading symbols: {str(e)}")
        return []

symbols_file = '../tickers.txt'
symbols = load_symbols(symbols_file)

if symbols:
    print("Symbols:", symbols)
else:
    print("No symbols loaded.")

Loaded 60 symbols from ../tickers.txt
Symbols: ['AAPL', 'MSFT', 'GOOGL', 'GOOG', 'META', 'NVDA', 'AVGO', 'ORCL', 'CRM', 'ACN', 'ADBE', 'CSCO', 'INTC', 'NFLX', 'DIS', 'CMCSA', 'VZ', 'T', 'AMZN', 'TSLA', 'HD', 'MCD', 'NKE', 'SBUX', 'TGT', 'LOW', 'WMT', 'PG', 'KO', 'PEP', 'COST', 'BRK-B', 'JPM', 'BAC', 'WFC', 'GS', 'MS', 'BLK', 'UNH', 'JNJ', 'PFE', 'ABBV', 'MRK', 'LLY', 'CAT', 'BA', 'HON', 'UPS', 'RTX', 'GE', 'XOM', 'CVX', 'COP', 'SLB', 'LIN', 'APD', 'ECL', 'PLD', 'AMT', 'CCI']


## Step 3: Extract Data from FMP into Polars

In [4]:
def fetch_company_profile(symbol: str, api_key: str, session: requests.Session) -> Dict:
    """Fetch company profile data from FMP API"""
    url = f"https://financialmodelingprep.com/api/v3/profile/{symbol}"
    params = {"apikey": api_key}
    
    try:
        response = session.get(url, params=params, timeout=10)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        logger.error(f"Error fetching {symbol}: {str(e)}")
        return None

def fetch_all_profiles(symbols: List[str], api_key: str) -> List[Dict]:
    session = create_session()
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [
            executor.submit(fetch_company_profile, symbol, api_key, session)
            for symbol in symbols
        ]
        results = [
            f.result() for f in concurrent.futures.as_completed(futures)
        ]
    
    return [r[0] for r in results if r and isinstance(r, list)]

# Execute fetching
company_profiles = fetch_all_profiles(symbols, FMP_API_KEY)

# Convert to Polars DataFrame more efficiently
if company_profiles:
    df = pl.DataFrame(company_profiles)

## Step 4: Clean Column Names

In [5]:
df = make_clean_names(df)

## Step 5: Write Polars to Parquet

In [6]:
output_dir = "../../../data/finance"

# Write DataFrame to Parquet
df.write_parquet(f'{output_dir}/company_profile.parquet')

## Step 6: Read Parquet (Validate)

In [7]:
pl.scan_parquet(f'{output_dir}/company_profile.parquet').head().collect()

symbol,price,beta,vol_avg,mkt_cap,last_div,range,changes,company_name,currency,cik,isin,cusip,exchange,exchange_short_name,industry,website,description,ceo,sector,country,full_time_employees,phone,address,city,state,zip,dcf_diff,dcf,image,ipo_date,default_image,is_etf,is_actively_trading,is_adr,is_fund
str,f64,f64,i64,i64,f64,str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,f64,str,str,bool,bool,bool,bool,bool
"""CRM""",324.1379,1.288,6005034,310199970300,1.6,"""212.0-369.0""",4.1379,"""Salesforce, Inc.""","""USD""","""0001108524""","""US79466L3024""","""79466L302""","""New York Stock Exchange""","""NYSE""","""Software - Application""","""https://www.salesforce.com""","""Salesforce, Inc. provides cust…","""Mr. Marc R. Benioff""","""Technology""","""US""","""72682""","""415 901 7000""","""Salesforce Tower""","""San Francisco""","""CA""","""94105""",131.09183,188.908171,"""https://images.financialmodeli…","""2004-06-23""",False,False,True,False,False
"""ADBE""",428.16,1.299,3672226,186378048000,0.0,"""403.75-638.25""",1.23,"""Adobe Inc.""","""USD""","""0000796343""","""US00724F1012""","""00724F101""","""NASDAQ Global Select""","""NASDAQ""","""Software - Infrastructure""","""https://www.adobe.com""","""Adobe Inc. operates as a diver…","""Mr. Shantanu Narayen""","""Technology""","""US""","""29945""","""408 536 6000""","""345 Park Avenue""","""San Jose""","""CA""","""95110-2704""",-47.48005,474.410051,"""https://images.financialmodeli…","""1986-08-13""",False,False,True,False,False
"""CSCO""",60.49,0.819,19021724,240917152400,1.6,"""44.5-60.95""",0.67,"""Cisco Systems, Inc.""","""USD""","""0000858877""","""US17275R1023""","""17275R102""","""NASDAQ Global Select""","""NASDAQ""","""Communication Equipment""","""https://www.cisco.com""","""Cisco Systems, Inc. designs, m…","""Mr. Charles H. Robbins""","""Technology""","""US""","""90400""","""(408) 526-4000""","""170 West Tasman Drive""","""San Jose""","""CA""","""95134-1706""",0.15665,59.663353,"""https://images.financialmodeli…","""1990-02-16""",False,False,True,False,False
"""INTC""",21.125,1.026,69633322,91112125000,0.375,"""18.51-50.3""",1.455,"""Intel Corporation""","""USD""","""0000050863""","""US4581401001""","""458140100""","""NASDAQ Global Select""","""NASDAQ""","""Semiconductors""","""https://www.intel.com""","""Intel Corporation engages in t…","""Ms. Michelle C. Johnston Holth…","""Technology""","""US""","""124100""","""408 765 8080""","""2200 Mission College Boulevard""","""Santa Clara""","""CA""","""95054-1549""",19.43839,0.23161,"""https://images.financialmodeli…","""1980-03-17""",False,False,True,False,False
"""NFLX""",855.5499,1.25,3257490,365711649154,0.0,"""476.06-941.75""",13.1799,"""Netflix, Inc.""","""USD""","""0001065280""","""US64110L1061""","""64110L106""","""NASDAQ Global Select""","""NASDAQ""","""Entertainment""","""https://www.netflix.com""","""Netflix, Inc. provides enterta…","""Mr. Theodore A. Sarandos""","""Communication Services""","""US""","""13000""","""408 540 3700""","""100 Winchester Circle""","""Los Gatos""","""CA""","""95032""",791.36052,51.009481,"""https://images.financialmodeli…","""2002-05-23""",False,False,True,False,False
