# Financial Statements
- **Reference**: https://site.financialmodelingprep.com/developer/docs#income-statements-financial-statements

## Step 1: Import Libraries

In [1]:
%run ../make_clean_names.py

In [2]:
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import logging

import polars as pl
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_session():
    session = requests.Session()
    retries = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504]
    )
    session.mount('https://', HTTPAdapter(max_retries=retries, pool_maxsize=10))
    return session

# Get API key from environment variables
FMP_API_KEY = os.getenv('FMP_API_KEY')
if not FMP_API_KEY:
    raise ValueError("FMP_API_KEY not found in environment variables")

## Step 2: Import Symbols

In [3]:
def load_symbols(file_path: str) -> List[str]:
    """Load symbols from a text file"""
    try:
        with open(file_path, 'r') as f:
            symbols = [line.strip() for line in f if line.strip()]
        print(f"Loaded {len(symbols)} symbols from {file_path}")
        return symbols
    except Exception as e:
        print(f"Error loading symbols: {str(e)}")
        return []

symbols_file = '../tickers.txt'
symbols = load_symbols(symbols_file)

if symbols:
    print("Symbols:", symbols)
else:
    print("No symbols loaded.")

Loaded 61 symbols from ../tickers.txt
Symbols: ['AAPL', 'MSFT', 'GOOGL', 'GOOG', 'META', 'NVDA', 'AVGO', 'ORCL', 'CRM', 'ACN', 'ADBE', 'CSCO', 'INTC', 'NFLX', 'DIS', 'CMCSA', 'VZ', 'T', 'AMZN', 'TSLA', 'HD', 'MCD', 'NKE', 'SBUX', 'TGT', 'LOW', 'WMT', 'PG', 'KO', 'PEP', 'COST', 'BRK-B', 'JPM', 'BAC', 'WFC', 'GS', 'MS', 'BLK', 'UNH', 'JNJ', 'PFE', 'ABBV', 'MRK', 'LLY', 'CAT', 'BA', 'HON', 'UPS', 'RTX', 'GE', 'XOM', 'CVX', 'COP', 'SLB', 'LIN', 'APD', 'ECL', 'PLD', 'AMT', 'CCI', 'OSW']


## **Income Statement**

In [4]:
def fetch_data(symbol: str, api_key: str, session: requests.Session) -> Dict:
    """Fetch company profile data from FMP API"""
    url = f"https://financialmodelingprep.com/api/v3/income-statement/{symbol}"
    params = {"apikey": api_key,
              "period": "annual"}

    try:
        response = session.get(url, params=params, timeout=10)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        logger.error(f"Error fetching {symbol}: {str(e)}")
        return None

def fetch_all_data(symbols: List[str], api_key: str) -> List[Dict]:
    session = create_session()
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [
            executor.submit(fetch_data, symbol, api_key, session)
            for symbol in symbols
        ]
        results = [
            f.result() for f in concurrent.futures.as_completed(futures)
        ]
    
    return [r[0] for r in results if r and isinstance(r, list)]

# Execute fetching
data = fetch_all_data(symbols, FMP_API_KEY)

# Convert to Polars DataFrame more efficiently
if data:
    df = pl.DataFrame(data)

In [5]:
df = make_clean_names(df)

In [6]:
output_dir = "../../../data/finance"

# Write DataFrame to Parquet
df.write_parquet(f'{output_dir}/company_income_statement.parquet')

In [7]:
pl.scan_parquet(f'{output_dir}/company_income_statement.parquet').head().collect()

date,symbol,reported_currency,cik,filling_date,accepted_date,calendar_year,period,revenue,cost_of_revenue,gross_profit,gross_profit_ratio,research_and_development_expenses,general_and_administrative_expenses,selling_and_marketing_expenses,selling_general_and_administrative_expenses,other_expenses,operating_expenses,cost_and_expenses,interest_income,interest_expense,depreciation_and_amortization,ebitda,ebitdaratio,operating_income,operating_income_ratio,total_other_income_expenses_net,income_before_tax,income_before_tax_ratio,income_tax_expense,net_income,net_income_ratio,eps,epsdiluted,weighted_average_shs_out,weighted_average_shs_out_dil,link,final_link
str,str,str,str,str,str,str,str,i64,i64,i64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,i64,f64,i64,i64,f64,i64,i64,f64,f64,f64,i64,i64,str,str
"""2024-11-03""","""AVGO""","""USD""","""0001730168""","""2024-12-20""","""2024-12-20 17:26:46""","""2024""","""FY""",51574000000,19065000000,32509000000,0.630337,9310000000,0,0,4959000000,4777000000,19046000000,38111000000,304000000,3796000000,10010000000,23722000000,0.45996,13463000000,0.261042,-3547000000,9916000000,0.192267,3748000000,5895000000,0.114302,1.33,1.29,4624000000,4778000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2023-12-31""","""GOOG""","""USD""","""0001652044""","""2024-01-31""","""2024-01-30 21:43:43""","""2023""","""FY""",307394000000,133332000000,174062000000,0.56625,45427000000,16425000000,27917000000,44342000000,0,89769000000,223101000000,3865000000,308000000,11946000000,97971000000,0.318715,84293000000,0.274218,1424000000,85717000000,0.278851,11922000000,73795000000,0.240066,5.84,5.8,12630000000,12722000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2024-06-30""","""MSFT""","""USD""","""0000789019""","""2024-07-30""","""2024-07-30 16:06:22""","""2024""","""FY""",245122000000,74114000000,171008000000,0.697644,29510000000,7609000000,24456000000,32065000000,0,61575000000,135689000000,3157000000,2935000000,22287000000,133009000000,0.542624,109433000000,0.446443,-1646000000,107787000000,0.439728,19651000000,88136000000,0.35956,11.86,11.8,7431000000,7469000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2024-09-28""","""AAPL""","""USD""","""0000320193""","""2024-11-01""","""2024-11-01 06:01:36""","""2024""","""FY""",391035000000,210352000000,180683000000,0.462063,31370000000,0,0,26097000000,0,57467000000,267819000000,0,0,11445000000,134661000000,0.344371,123216000000,0.315102,269000000,123485000000,0.31579,29749000000,93736000000,0.239713,6.11,6.08,15343783000,15408095000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2024-08-31""","""ACN""","""USD""","""0001467373""","""2024-10-10""","""2024-10-10 06:43:59""","""2024""","""FY""",64896464000,43734147000,21162317000,0.326094,1150430000,4281316000,6846714000,11128030000,-711990000,11566470000,55300617000,272256000,58969000,1430042000,11188334000,0.172403,9595847000,0.147864,103476000,9699323000,0.149458,2280126000,7264787000,0.111944,11.58,11.44,627852613,635940044,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"


## **Balance Sheet**

In [8]:
def fetch_data(symbol: str, api_key: str, session: requests.Session) -> Dict:
    """Fetch company profile data from FMP API"""
    url = f"https://financialmodelingprep.com/api/v3/balance-sheet-statement/{symbol}"
    params = {"apikey": api_key,
              "period": "annual"}

    try:
        response = session.get(url, params=params, timeout=10)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        logger.error(f"Error fetching {symbol}: {str(e)}")
        return None

def fetch_all_data(symbols: List[str], api_key: str) -> List[Dict]:
    session = create_session()
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [
            executor.submit(fetch_data, symbol, api_key, session)
            for symbol in symbols
        ]
        results = [
            f.result() for f in concurrent.futures.as_completed(futures)
        ]
    
    return [r[0] for r in results if r and isinstance(r, list)]

# Execute fetching
data = fetch_all_data(symbols, FMP_API_KEY)

# Convert to Polars DataFrame more efficiently
if data:
    df = pl.DataFrame(data)

In [9]:
df = make_clean_names(df)

In [10]:
output_dir = "../../../data/finance"

# Write DataFrame to Parquet
df.write_parquet(f'{output_dir}/company_balance_sheet.parquet')

In [11]:
pl.scan_parquet(f'{output_dir}/company_balance_sheet.parquet').head().collect()

date,symbol,reported_currency,cik,filling_date,accepted_date,calendar_year,period,cash_and_cash_equivalents,short_term_investments,cash_and_short_term_investments,net_receivables,inventory,other_current_assets,total_current_assets,property_plant_equipment_net,goodwill,intangible_assets,goodwill_and_intangible_assets,long_term_investments,tax_assets,other_non_current_assets,total_non_current_assets,other_assets,total_assets,account_payables,short_term_debt,tax_payables,deferred_revenue,other_current_liabilities,total_current_liabilities,long_term_debt,deferred_revenue_non_current,deferred_tax_liabilities_non_current,other_non_current_liabilities,total_non_current_liabilities,other_liabilities,capital_lease_obligations,total_liabilities,preferred_stock,common_stock,retained_earnings,accumulated_other_comprehensive_income_loss,othertotal_stockholders_equity,total_stockholders_equity,total_equity,total_liabilities_and_stockholders_equity,minority_interest,total_liabilities_and_total_equity,total_investments,total_debt,net_debt,link,final_link
str,str,str,str,str,str,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,str
"""2023-12-31""","""GOOGL""","""USD""","""0001652044""","""2024-01-31""","""2024-01-30 21:43:43""","""2023""","""FY""",24048000000,86868000000,110916000000,47964000000,0,12650000000,171530000000,148436000000,29198000000,0,29198000000,31008000000,12169000000,10051000000,230862000000,0,402392000000,7493000000,2791000000,2748000000,4137000000,64645000000,81814000000,25350000000,911000000,485000000,10453000000,37199000000,0,16634000000,119013000000,0,76534000000,211247000000,-4402000000,0,283379000000,283379000000,402392000000,0,402392000000,117876000000,28504000000,4456000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2023-12-31""","""GOOG""","""USD""","""0001652044""","""2024-01-31""","""2024-01-30 21:43:43""","""2023""","""FY""",24048000000,86868000000,110916000000,47964000000,0,12650000000,171530000000,148436000000,29198000000,0,29198000000,31008000000,12169000000,10051000000,230862000000,0,402392000000,7493000000,2791000000,2748000000,4137000000,64645000000,81814000000,25350000000,911000000,485000000,10453000000,37199000000,0,16634000000,119013000000,0,76534000000,211247000000,-4402000000,0,283379000000,283379000000,402392000000,0,402392000000,117876000000,28504000000,4456000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2023-12-31""","""META""","""USD""","""0001326801""","""2024-02-02""","""2024-02-01 19:39:02""","""2023""","""FY""",41862000000,23541000000,65403000000,16169000000,0,3793000000,85365000000,109881000000,20654000000,788000000,21442000000,6141000000,0,6794000000,144258000000,0,229623000000,4849000000,1623000000,3655000000,0,21833000000,31960000000,35611000000,0,0,8884000000,44495000000,0,18849000000,76455000000,0,0,82070000000,-2155000000,73253000000,153168000000,153168000000,229623000000,0,229623000000,29682000000,37234000000,-4628000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2024-06-30""","""MSFT""","""USD""","""0000789019""","""2024-07-30""","""2024-07-30 16:06:22""","""2024""","""FY""",18315000000,57216000000,75531000000,56924000000,1246000000,26033000000,159734000000,154552000000,119220000000,27597000000,146817000000,14600000000,0,36460000000,352429000000,0,512163000000,21996000000,14871000000,5017000000,57582000000,25820000000,125286000000,82981000000,2602000000,2618000000,30199000000,118400000000,0,46222000000,243686000000,0,100923000000,173144000000,-5590000000,0,268477000000,268477000000,512163000000,0,512163000000,71816000000,97852000000,79537000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2024-11-03""","""AVGO""","""USD""","""0001730168""","""2024-12-20""","""2024-12-20 17:26:46""","""2024""","""FY""",9348000000,0,9348000000,4416000000,1760000000,4071000000,19595000000,2521000000,97873000000,40583000000,138456000000,0,0,5073000000,146050000000,0,165645000000,1662000000,1271000000,0,0,13764000000,16697000000,66295000000,0,0,14975000000,81270000000,0,0,97967000000,0,5000000,0,207000000,67466000000,67678000000,67678000000,165645000000,0,165645000000,0,67566000000,58218000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"


## **Cash Flow Statement**

In [12]:
def fetch_data(symbol: str, api_key: str, session: requests.Session) -> Dict:
    """Fetch company profile data from FMP API"""
    url = f"https://financialmodelingprep.com/api/v3/cash-flow-statement/{symbol}"
    params = {"apikey": api_key,
              "period": "annual"}

    try:
        response = session.get(url, params=params, timeout=10)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        logger.error(f"Error fetching {symbol}: {str(e)}")
        return None

def fetch_all_data(symbols: List[str], api_key: str) -> List[Dict]:
    session = create_session()
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [
            executor.submit(fetch_data, symbol, api_key, session)
            for symbol in symbols
        ]
        results = [
            f.result() for f in concurrent.futures.as_completed(futures)
        ]
    
    return [r[0] for r in results if r and isinstance(r, list)]

# Execute fetching
data = fetch_all_data(symbols, FMP_API_KEY)

# Convert to Polars DataFrame more efficiently
if data:
    df = pl.DataFrame(data)

ERROR:__main__:Error fetching SLB: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/cash-flow-statement/SLB?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI&period=annual (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching OSW: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/cash-flow-statement/OSW?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI&period=annual (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching AMT: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/cash-flow-statement/AMT?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI&period=annual (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching ECL: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/cash-flow-statement/ECL?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI&period=annual (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching PLD: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/cash-flow-statement/PLD?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI&period=annual (Caused by ResponseError('too many 429 error responses'))


ERROR:__main__:Error fetching CCI: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Max retries exceeded with url: /api/v3/cash-flow-statement/CCI?apikey=PBkrv7HEcbYqq5BmXIRgFjfxzbLzuKmI&period=annual (Caused by ResponseError('too many 429 error responses'))


In [13]:
df = make_clean_names(df)

In [14]:
output_dir = "../../../data/finance"

# Write DataFrame to Parquet
df.write_parquet(f'{output_dir}/company_cash_flow_statement.parquet')

In [15]:
pl.scan_parquet(f'{output_dir}/company_cash_flow_statement.parquet').head().collect()

date,symbol,reported_currency,cik,filling_date,accepted_date,calendar_year,period,net_income,depreciation_and_amortization,deferred_income_tax,stock_based_compensation,change_in_working_capital,accounts_receivables,inventory,accounts_payables,other_working_capital,other_non_cash_items,net_cash_provided_by_operating_activities,investments_in_property_plant_and_equipment,acquisitions_net,purchases_of_investments,sales_maturities_of_investments,other_investing_activites,net_cash_used_for_investing_activites,debt_repayment,common_stock_issued,common_stock_repurchased,dividends_paid,other_financing_activites,net_cash_used_provided_by_financing_activities,effect_of_forex_changes_on_cash,net_change_in_cash,cash_at_end_of_period,cash_at_beginning_of_period,operating_cash_flow,capital_expenditure,free_cash_flow,link,final_link
str,str,str,str,str,str,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,str
"""2024-01-31""","""CRM""","""USD""","""0001108524""","""2024-03-06""","""2024-03-06 16:32:48""","""2024""","""FY""",4136000000,5884000000,0,2787000000,-2850000000,-659000000,0,-478000000,-1713000000,277000000,10234000000,-736000000,-82000000,-4257000000,3748000000,0,-1327000000,-1811000000,0,-7620000000,0,1954000000,-7477000000,26000000,1456000000,8472000000,7016000000,10234000000,-736000000,9498000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2024-09-28""","""AAPL""","""USD""","""0000320193""","""2024-11-01""","""2024-11-01 06:01:36""","""2024""","""FY""",93736000000,11445000000,0,11688000000,3651000000,-5144000000,-1046000000,6020000000,3821000000,-2266000000,118254000000,-9447000000,0,-48656000000,62346000000,-1308000000,2935000000,-5998000000,0,-94949000000,-15234000000,-5802000000,-121983000000,0,-794000000,29943000000,30737000000,118254000000,-9447000000,108807000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2023-12-31""","""GOOG""","""USD""","""0001652044""","""2024-01-31""","""2024-01-30 21:43:43""","""2023""","""FY""",73795000000,11946000000,-7763000000,22460000000,-3845000000,-7833000000,2276000000,664000000,1048000000,5153000000,101746000000,-32251000000,-495000000,-80885000000,87619000000,-1051000000,-27063000000,-760000000,0,-61504000000,0,-9829000000,-72093000000,-421000000,2169000000,24048000000,21879000000,101746000000,-32251000000,69495000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2024-05-31""","""ORCL""","""USD""","""0001341439""","""2024-06-20""","""2024-06-20 16:21:35""","""2024""","""FY""",10467000000,6139000000,-2139000000,3974000000,-488000000,-965000000,0,-594000000,1071000000,720000000,18673000000,-6866000000,-63000000,-1003000000,572000000,0,-7360000000,-3667000000,742000000,-3242000000,-4391000000,4000000,-10554000000,-70000000,689000000,10454000000,9765000000,18673000000,-6866000000,11807000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
"""2024-01-28""","""NVDA""","""USD""","""0001045810""","""2024-02-21""","""2024-02-21 16:36:57""","""2024""","""FY""",29760000000,1508000000,-2489000000,3549000000,-3722000000,-6172000000,-98000000,1531000000,1017000000,-516000000,28090000000,-1069000000,-83000000,-18211000000,9782000000,-985000000,-10566000000,-1250000000,0,-9533000000,-395000000,-2455000000,-13633000000,0,3891000000,7280000000,3389000000,28090000000,-1069000000,27021000000,"""https://www.sec.gov/Archives/e…","""https://www.sec.gov/Archives/e…"
