In [8]:
import requests
import pandas as pd
import numpy as np
import time
import os
from datetime import datetime
import math
import yfinance as yf
from datetime import datetime, timedelta
import pandas_ta as ta

# Helper Functions

In [9]:
def save_dataframe_to_csv(df: pd.DataFrame, ticker: str, filename: str = None) -> bool:
    """
    Saves a pandas DataFrame to a CSV file.

    Args:
        df (pd.DataFrame): The DataFrame to save.
        ticker (str): The stock ticker symbol, used for the default filename.
        filename (str, optional): The desired filename. If None, a default
                                  filename is generated (e.g., 'AAPL_combined_data_YYYY-MM-DD.csv').
                                  Defaults to None.

    Returns:
        bool: True if the file was saved successfully, False otherwise.
    """
    if not isinstance(df, pd.DataFrame) or df.empty:
        print("Error: Input is not a valid or non-empty DataFrame. Nothing to save.")
        return False

    if filename is None:
        # Create a dynamic default filename if one isn't provided.
        # This prevents overwriting previous files.
        timestamp = datetime.now().strftime('%Y-%m-%d')
        filename = f"{ticker}_combined_data_{timestamp}.csv"

    try:
        # Save the DataFrame to a CSV file.
        # The index (which is the date) is crucial, so we ensure it's saved.
        df.to_csv(filename, index=True)
        print(f"\nDataFrame successfully saved to '{filename}'")
        return True
    except IOError as e:
        # Handle specific file system errors
        print(f"\nAn I/O error occurred while saving the file: {e}")
        return False
    except Exception as e:
        # Handle any other unexpected errors
        print(f"\nAn unexpected error occurred: {e}")
        return False

# Streaming Historical Fundamental Data

## Used APIs

1.  `/api/v3/ratios/{ticker}`: This provides most of the profitability, per-share, and valuation ratios.
2.  `/api/v3/financial-growth/{ticker}`: This provides the year-over-year growth rates.

In [10]:
def get_comprehensive_historical_ratios(api_key: str, ticker: str, period: str = "quarter", limit: int = 15) -> pd.DataFrame | None:
    # ... (code from your prompt) - Omitted for brevity
    print(f"\n--- Fetching Comprehensive Historical Data for {ticker} ---")
    base_url = 'https://financialmodelingprep.com/api/v3'
    params = {'apikey': api_key, 'period': period, 'limit': limit}
    try:
        ratios_endpoint = f'/ratios/{ticker}'
        ratios_response = requests.get(f"{base_url}{ratios_endpoint}", params=params)
        ratios_response.raise_for_status()
        ratios_data = ratios_response.json()
        if not ratios_data or 'Error Message' in ratios_data: return None
        ratios_df = pd.DataFrame(ratios_data).set_index('date')

        growth_endpoint = f'/financial-growth/{ticker}'
        growth_response = requests.get(f"{base_url}{growth_endpoint}", params=params)
        growth_response.raise_for_status()
        growth_data = growth_response.json()
        if not growth_data or 'Error Message' in growth_data: return None
        growth_df = pd.DataFrame(growth_data).set_index('date')

        combined_df = ratios_df.join(growth_df, lsuffix='_ratio', rsuffix='_growth', how='inner')

        column_map = {
            'returnOnEquity': 'ROE', 'returnOnAssets': 'ROA', 'grossProfitMargin': 'Gross Profit Margin',
            'eps': 'EPS', 'dividendYield': 'Dividend Yield', 'freeCashFlowPerShare': 'FCF per Share',
            'priceEarningsRatio': 'P/E Ratio', 'priceToBookRatio': 'P/B Ratio', 'priceToSalesRatio': 'P/S Ratio',
            'revenueGrowth': 'Revenue Growth', 'netIncomeGrowth': 'Net Income Growth',
            'freeCashFlowGrowth': 'FCF Growth', 'epsgrowth': 'EPS Growth'
        }
        final_columns = {api_name: new_name for api_name, new_name in column_map.items() if api_name in combined_df.columns}
        final_df = combined_df[list(final_columns.keys())].copy()
        final_df.rename(columns=final_columns, inplace=True)
        final_df.index = pd.to_datetime(final_df.index)
        final_df.sort_index(ascending=False, inplace=True)
        return final_df
    except Exception: return None

# Stream recent fundamental data

In [11]:
import requests
import pandas as pd
import time
import os
from datetime import datetime

def get_comprehensive_current_data(api_key: str, ticker: str) -> pd.DataFrame | None:
    """
    Fetches a snapshot of current fundamental and market data from multiple FMP
    endpoints and returns it as a single pandas DataFrame.

    Args:
        api_key (str): Your FMP API key.
        ticker (str): The stock ticker symbol.

    Returns:
        pd.DataFrame: A DataFrame containing the comprehensive current data,
                      with metrics as the index. Returns None if an error occurs.
    """
    print(f"\n--- Fetching Snapshot of Current Data for {ticker} ---")
    base_url = 'https://financialmodelingprep.com/api/v3'

    try:
        # --- 1. Fetch Data from 3 Separate Endpoints ---
        params = {'apikey': api_key}
        quote_resp = requests.get(f"{base_url}/quote/{ticker}", params=params)
        ttm_resp = requests.get(f"{base_url}/ratios-ttm/{ticker}", params=params)
        growth_params = {'apikey': api_key, 'period': 'annual', 'limit': 1}
        growth_resp = requests.get(f"{base_url}/financial-growth/{ticker}", params=growth_params)

        # Check for any request errors
        for resp in [quote_resp, ttm_resp, growth_resp]:
            resp.raise_for_status()

        quote_data = quote_resp.json()
        ttm_data = ttm_resp.json()
        growth_data = growth_resp.json()

        # --- 2. Validate and Extract Data ---
        if not all([quote_data, ttm_data, growth_data]):
            print(f"Incomplete data received for {ticker}.")
            return None

        quote = quote_data[0]
        ttm = ttm_data[0]
        growth = growth_data[0]

        # --- 3. Aggregate Data into a Dictionary ---
        data_dict = {
            # Market Data
            "Company Name": quote.get('name'),
            "Symbol": quote.get('symbol'),
            "Current Price": quote.get('price'),
            "Change": quote.get('change'),
            "Change %": quote.get('changesPercentage'),
            "Data Timestamp": datetime.fromtimestamp(quote.get('timestamp')).strftime('%Y-%m-%d %H:%M:%S'),

            # Valuation Ratios (TTM)
            "P/E Ratio": ttm.get('peRatioTTM'),
            "P/B Ratio": ttm.get('priceToBookRatioTTM'),
            "P/S Ratio": ttm.get('priceToSalesRatioTTM'),

            # Profitability & Per-Share (TTM)
            "Return on Equity (ROE)": ttm.get('returnOnEquityTTM'),
            "Return on Assets (ROA)": ttm.get('returnOnAssetsTTM'),
            "Gross Profit Margin": ttm.get('grossProfitMarginTTM'),
            "EPS (TTM)": ttm.get('epsTTM'),
            "Dividend Yield": ttm.get('dividendYieldTTM'),
            "FCF per Share": ttm.get('freeCashFlowPerShareTTM'),

            # Latest Reported Growth Rates (from last annual report)
            "Growth Report Date": growth.get('date'),
            "Revenue Growth": growth.get('revenueGrowth'),
            "Net Income Growth": growth.get('netIncomeGrowth'),
            "FCF Growth": growth.get('freeCashFlowGrowth'),
            "EPS Growth": growth.get('epsgrowth')
        }

        # --- 4. Convert to DataFrame and Return ---
        df = pd.DataFrame.from_dict(data_dict, orient='index', columns=['Value'])
        return df

    except requests.exceptions.RequestException as e:
        print(f"A network error occurred: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

# Generate historical technical indicators

In [12]:
def calculate_historical_technical_indicators(
    api_key: str,
    ticker: str,
    period: str,
    limit: int,
    vol_window_size: int = 21,  # <-- MODIFIED: Renamed for clarity
    # --- ADDED: Parameters for new indicators ---
    rsi_window: int = 14,
    cci_window: int = 20,
    atr_window: int = 14,
    ema_short_window: int = 10,
    ema_medium_window: int = 50,
    ema_long_window: int = 200
) -> pd.DataFrame | None:
    """
    Calculates annualized historical volatility and other technical indicators
    (RSI, CCI, ATR, EMAs) using price data from FMP.
    """
    print(f"\n--- Calculating Technical Indicators for {ticker} using FMP data ---")
    
    # --- 1. Translate period and limit to a start/end date for FMP ---
    end_date = datetime.now()
    if period.lower() == 'annual':
        years_needed = limit
    elif period.lower() in ['quarter', 'quarterly']:
        years_needed = math.ceil(limit / 4)
    else:
        print(f"Error: Invalid period '{period}'. Please use 'annual' or 'quarterly'.")
        return None
        
    start_date = end_date - timedelta(days=years_needed * 365)
    
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = end_date.strftime('%Y-%m-%d')
    print(f"Input: {limit} '{period}' periods -> Fetching price data from {start_date_str} to {end_date_str}.")

    # --- 2. Fetch Price Data from FMP ---
    base_url = 'https://financialmodelingprep.com/api/v3'
    endpoint = f'/historical-price-full/{ticker}'
    params = {
        'apikey': api_key,
        'from': start_date_str,
        'to': end_date_str
    }
    
    try:
        response = requests.get(f"{base_url}{endpoint}", params=params)
        response.raise_for_status()
        data = response.json()

        if not data or 'historical' not in data or not data['historical']:
            print(f"No historical price data found for {ticker} from FMP.")
            return None
        
        # --- 3. Process FMP Data into a DataFrame ---
        stock_data = pd.DataFrame(data['historical'])
        stock_data.set_index('date', inplace=True)
        stock_data.index = pd.to_datetime(stock_data.index)
        stock_data.sort_index(ascending=True, inplace=True)
        #save_dataframe_to_csv(stock_data, ticker, filename=f"cache/{ticker}_historical_prices.csv")
        
        # --- MODIFIED: Rename columns for pandas-ta compatibility (e.g., 'high' -> 'High') ---
        stock_data.rename(columns={
            'open': 'Open',
            'high': 'High',
            'low': 'Low',
            'close': 'Close',
            'volume': 'Volume'
        }, inplace=True)
        
        # --- 4. Calculate Technical Indicators ---
        print("Calculating Volatility, RSI, CCI, ATR, and EMAs...")

        # a) Historical Volatility (existing calculation)
        stock_data['LogReturn'] = np.log(stock_data['Close'] / stock_data['Close'].shift(1))
        rolling_std = stock_data['LogReturn'].rolling(window=vol_window_size).std()
        trading_days = 252
        stock_data['AnnualizedVolatility'] = rolling_std * np.sqrt(trading_days)
        
        # --- ADDED: Use pandas_ta to calculate new indicators ---
        # The 'append=True' argument adds the results as new columns to the stock_data DataFrame
        stock_data.ta.rsi(length=rsi_window, append=True)
        stock_data.ta.cci(length=cci_window, append=True)
        stock_data.ta.atr(length=atr_window, append=True)
        stock_data.ta.ema(length=ema_short_window, append=True)
        stock_data.ta.ema(length=ema_medium_window, append=True)
        stock_data.ta.ema(length=ema_long_window, append=True)
        
        # --- 5. Prepare and Return Final DataFrame ---
        
        # --- MODIFIED: Select all desired columns for the final output ---
        # Note: pandas-ta creates column names like 'RSI_14', 'EMA_10', etc.
        columns_to_return = [
            'Close', 'AnnualizedVolatility',
            f'RSI_{rsi_window}', 
            f'CCI_{cci_window}_D',  # pandas-ta adds '_D' for the default calculation
            f'ATRr_{atr_window}',   # pandas-ta adds 'r' for the default True Range method
            f'EMA_{ema_short_window}',
            f'EMA_{ema_medium_window}',
            f'EMA_{ema_long_window}'
        ]
        
        # Filter for columns that actually exist in the dataframe to avoid errors
        # This is robust in case a calculation failed or a column name is slightly different
        existing_columns = [col for col in columns_to_return if col in stock_data.columns]
        
        result_df = stock_data[existing_columns].copy()
        result_df.dropna(inplace=True) # Remove rows with NaN values (from rolling calculations)
        
        print(f"Successfully calculated indicators. Returning DataFrame with {len(result_df)} rows.")
        return result_df
        
    except requests.exceptions.RequestException as e:
        print(f"A network or API error occurred while fetching price data: {e}")
        return None
    except Exception as ex:
      print(f"An unexpected error occurred during calculations: {ex}")
      return None

In [None]:
def get_combined_historical_data(api_key: str, ticker: str, period: str = "quarter", limit: int = 20) -> pd.DataFrame | None:
    """
    Combines historical fundamental ratios with daily price and volatility data.

    This master function calls the helper functions to get low-frequency (ratios)
    and high-frequency (volatility) data, then merges them into a single,
    daily-indexed DataFrame using a forward-fill strategy.
    """
    print(f"\n{'='*20} STARTING COMBINED DATA FETCH FOR {ticker} {'='*20}")

    # 1. Get the low-frequency fundamental data (e.g., quarterly)
    ratios_df = get_comprehensive_historical_ratios(api_key, ticker, period, limit)
    if ratios_df is None:
        print("Failed to get historical ratios. Aborting combination.")
        return None
    
    # 2. Get the high-frequency daily price and volatility data
    volatility_df = calculate_historical_technical_indicators(api_key, ticker, period, limit)    
    if volatility_df is None:
        print("Failed to get historical volatility. Aborting combination.")
        return None
    
    print("\n--- Combining daily volatility with periodic fundamental data ---")

    # Merge on the 'date' column using an outer join to preserve all rows
    combined_df = pd.merge(ratios_df, volatility_df, on="date", how="outer")

    # Sort the merged dataframe by date
    combined_df.sort_values(by="date", inplace=True)

    # Perform linear interpolation
    combined_df.interpolate(method='linear', inplace=True)
    combined_df.bfill(inplace=True)

    # Optional: reset index if needed
    #combined_df.reset_index(drop=True, inplace=True)    
    
    save_dataframe_to_csv(combined_df, "cache/" + ticker)

    # 5. Drop any rows at the very beginning of the DataFrame that are still NaN.
    # This happens if the price history starts before the first financial report.
    combined_df.dropna(inplace=True)

    print(f"--- Combination complete. Final DataFrame has {len(combined_df)} rows. ---")
    print(f"{'='*20}   FINISHED COMBINED DATA FETCH FOR {ticker}   {'='*20}\n")

    return combined_df


In [None]:
# --- Main Execution ---
if __name__ == "__main__":
    # IMPORTANT: Replace with your FMP API Key
    FMP_API_KEY = "v0Y7rqjEfz0nBiixKBqJwLLgyFYbOUGA"
    TICKER_SYMBOL = "AAPL"
    DATA_PERIOD = "quarter"
    DATA_LIMIT = 30  # Fetch 30 quarters (10 years) of data

    master_df = get_combined_historical_data(
        api_key=FMP_API_KEY,
        ticker=TICKER_SYMBOL,
        period=DATA_PERIOD,
        limit=DATA_LIMIT
    )

    if master_df is not None:
        print("\n--- Master DataFrame Sample ---")

        # Display the last 5 rows to see the most recent data
        print("\nDataFrame Tail:")
        print(master_df)

        # To prove the forward-fill works, let's view data around a quarter-end date
        # Find the last report date in the data
        if not master_df.empty:
            last_date = master_df.index[-1]
            # Try to find a date like '2023-12-31' or '2024-03-31' near the end
            # This is a sample verification, the exact date might differ
            try:
                print("\n--- Verifying the forward-fill around a report date ---")
                print("Notice how ROE stays constant until the report date, then might change.")
                print(master_df.loc['2024-03-27':'2024-04-02'])
            except KeyError:
                print("\n(Could not find specific dates for verification, but tail shows data.)")



--- Fetching Comprehensive Historical Data for AAPL ---

--- Calculating Technical Indicators for AAPL using FMP data ---
Input: 12 'quarter' periods -> Fetching price data from 2022-07-11 to 2025-07-10.
Calculating Volatility, RSI, CCI, ATR, and EMAs...
Successfully calculated indicators. Returning DataFrame with 553 rows.

--- Combining daily volatility with periodic fundamental data ---

DataFrame successfully saved to 'cache/AAPL_combined_data_2025-07-10.csv'
--- Combination complete. Final DataFrame has 561 rows. ---


--- Master DataFrame Sample ---

DataFrame Tail:
                 ROE       ROA  Gross Profit Margin  Dividend Yield  \
date                                                                  
2023-04-25  0.387461  0.072457             0.442670        0.001399   
2023-04-26  0.386235  0.072178             0.442723        0.001396   
2023-04-27  0.385009  0.071899             0.442776        0.001393   
2023-04-28  0.383783  0.071620             0.442829        0.001