In [17]:
import os
import json
from dotenv import load_dotenv
from typing import Dict, Any
from datetime import datetime

import requests
import pandas as pd
import yfinance as yf

load_dotenv()
API_KEY = os.getenv("VANTAGE_API_KEY")

## Data Fetching and Caching

In [18]:
class APIRateLimitError(Exception):
    """Custom exception raised when the API rate limit is exceeded."""
    pass

def fetch_income_statement(symbol: str, api_key: str, folder: str = 'data') -> dict:
    file_path = os.path.join(folder, f'{symbol}_income_statement.json')
    if os.path.exists(file_path):
        with open(file_path, 'r') as f:
            return json.load(f)
    url = f'https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={symbol}&apikey={api_key}'
    response = requests.get(url)
    data = response.json()
    if "Information" in data and "rate limit" in data["Information"].lower():
        raise APIRateLimitError(data["Information"])
    os.makedirs(folder, exist_ok=True)
    with open(file_path, 'w') as f:
        json.dump(data, f)
    return data


def fetch_balance_sheet(symbol: str, api_key: str, folder: str = 'data') -> dict:
    file_path = os.path.join(folder, f'{symbol}_balance_sheet.json')
    if os.path.exists(file_path):
        with open(file_path, 'r') as f:
            return json.load(f)
    url = f'https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol={symbol}&apikey={api_key}'
    response = requests.get(url)
    data = response.json()
    if "Information" in data and "rate limit" in data["Information"].lower():
        raise APIRateLimitError(data["Information"])
    os.makedirs(folder, exist_ok=True)
    with open(file_path, 'w') as f:
        json.dump(data, f)
    return data


def fetch_cash_flow(symbol: str, api_key: str, folder: str = 'data') -> dict:
    file_path = os.path.join(folder, f'{symbol}_cash_flow.json')
    if os.path.exists(file_path):
        with open(file_path, 'r') as f:
            return json.load(f)
    url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={symbol}&apikey={api_key}'
    response = requests.get(url)
    data = response.json()
    if "Information" in data and "rate limit" in data["Information"].lower():
        raise APIRateLimitError(data["Information"])
    os.makedirs(folder, exist_ok=True)
    with open(file_path, 'w') as f:
        json.dump(data, f)
    return data

## Merge Reports

In [19]:
def convert_columns_to_numeric(df):
    """
    Convert necessary columns to numeric within a DataFrame.
    """
    numeric_cols = [
        'netIncome', 'totalRevenue', 'operatingCashflow', 'capitalExpenditures',
        'cashAndCashEquivalentsAtCarryingValue', 'totalAssets',
        'totalCurrentLiabilities', 'currentDebt', 'cashAndShortTermInvestments',
        'longTermDebt', 'commonStockSharesOutstanding'
    ]
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')


def merge_reports_df(income_statement: dict,
                     balance_sheet: dict,
                     cash_flow: dict) -> Dict[str, Any]:
    """
    Merge annual and quarterly reports from three API responses into two DataFrames.
    Return a dict with 'symbol', 'annual' DataFrame, and 'quarterly' DataFrame.
    """
    # --- Annual Reports ---
    annual_income = pd.DataFrame(income_statement.get('annualReports', []))
    annual_balance = pd.DataFrame(balance_sheet.get('annualReports', []))
    annual_cash = pd.DataFrame(cash_flow.get('annualReports', []))

    # Drop unwanted columns to avoid collisions in merges
    annual_balance.drop(columns=['reportedCurrency'], errors='ignore', inplace=True)
    annual_cash.drop(columns=['reportedCurrency', 'netIncome'], errors='ignore', inplace=True)

    # Convert 'fiscalDateEnding' to datetime and sort
    for df in (annual_income, annual_balance, annual_cash):
        if 'fiscalDateEnding' in df.columns:
            df['fiscalDateEnding'] = pd.to_datetime(df['fiscalDateEnding'])
            df.sort_values('fiscalDateEnding', inplace=True)
            df.reset_index(drop=True, inplace=True)

    # Merge annual data
    annual_df = pd.merge(annual_income, annual_balance, on='fiscalDateEnding', how='outer')
    annual_df = pd.merge(annual_df, annual_cash, on='fiscalDateEnding', how='outer')
    convert_columns_to_numeric(annual_df)

    # --- Quarterly Reports ---
    quarterly_income = pd.DataFrame(income_statement.get('quarterlyReports', []))
    quarterly_balance = pd.DataFrame(balance_sheet.get('quarterlyReports', []))
    quarterly_cash = pd.DataFrame(cash_flow.get('quarterlyReports', []))

    # Drop unwanted columns
    quarterly_balance.drop(columns=['reportedCurrency'], errors='ignore', inplace=True)
    quarterly_cash.drop(columns=['reportedCurrency', 'netIncome'], errors='ignore', inplace=True)

    for df in (quarterly_income, quarterly_balance, quarterly_cash):
        if 'fiscalDateEnding' in df.columns:
            df['fiscalDateEnding'] = pd.to_datetime(df['fiscalDateEnding'])
            df.sort_values('fiscalDateEnding', inplace=True)
            df.reset_index(drop=True, inplace=True)

    # Merge quarterly data
    quarterly_df = pd.merge(quarterly_income, quarterly_balance, on='fiscalDateEnding', how='outer')
    quarterly_df = pd.merge(quarterly_df, quarterly_cash, on='fiscalDateEnding', how='outer')
    convert_columns_to_numeric(quarterly_df)

    return {
        'symbol': income_statement.get('symbol', 'N/A'),
        'annual': annual_df,
        'quarterly': quarterly_df
    }

## Price Retrieval

In [20]:
def get_yearly_close_prices(ticker_symbol: str,
                            start_year: int,
                            end_year: int) -> pd.DataFrame:
    """
    Retrieve and return a DataFrame with columns:
        'year' and 'close' (the last close price of that year).
    """
    start_date = f"{start_year}-01-01"
    end_date = f"{end_year}-12-31"

    ticker = yf.Ticker(ticker_symbol)
    hist = ticker.history(start=start_date, end=end_date)

    if hist.empty:
        return pd.DataFrame(columns=['year', 'close'])

    if not isinstance(hist.index, pd.DatetimeIndex):
        hist.index = pd.to_datetime(hist.index)

    # Resample to get the last closing price of each year
    yearly = hist['Close'].resample('YE').last().dropna()

    # Convert to a DataFrame with columns: year, close
    df_price = yearly.reset_index()
    df_price['year'] = df_price['Date'].dt.year
    df_price.rename(columns={'Close': 'close'}, inplace=True)
    return df_price[['year', 'close']]


def get_latest_price(ticker_symbol: str) -> float:
    """
    Get the latest market price (attempt from 'regularMarketPrice'
    or fallback to 'previousClose').
    """
    ticker = yf.Ticker(ticker_symbol)
    latest_price = ticker.info.get('previousClose')
    return float(latest_price) if latest_price else None

## Analysis

In [21]:
def safe_float(value):
    """Return a float when possible; otherwise None."""
    if value is None:
        return None
    try:
        if pd.isna(value):
            return None
    except TypeError:
        pass
    try:
        return float(value)
    except (TypeError, ValueError):
        return None


def safe_divide(numerator, denominator):
    """Defensive division that ignores None/NaN/zero denominators."""
    if numerator is None or denominator is None:
        return None
    try:
        numerator = float(numerator)
        denominator = float(denominator)
    except (TypeError, ValueError):
        return None
    if denominator == 0 or pd.isna(numerator) or pd.isna(denominator):
        return None
    return numerator / denominator


def series_mean(series: pd.Series) -> float | None:
    if series is None or series.empty:
        return None
    return safe_float(series.mean())


WINDOW_PERIODS = (1, 2, 5, 10)

def windowed_means(series: pd.Series, windows: tuple[int, ...] = WINDOW_PERIODS) -> dict[str, float | None]:
    """Return trailing means for the requested window sizes."""
    results: dict[str, float | None] = {}
    if series is None:
        return {f"{years}Y": None for years in windows}
    cleaned = series.dropna()
    for years in windows:
        label = f"{years}Y"
        if len(cleaned) >= years:
            window_slice = cleaned.tail(years)
            results[label] = safe_float(window_slice.mean())
        else:
            results[label] = None
    return results


def prepare_annual_frame(raw: pd.DataFrame) -> pd.DataFrame:
    """Return annual reports with derived FCF and fiscal year."""
    df = raw.copy()
    if df.empty:
        return df
    df['fcf'] = df['operatingCashflow'] - df['capitalExpenditures']
    df['year'] = df['fiscalDateEnding'].dt.year
    return df


def prepare_quarterly_frame(raw: pd.DataFrame) -> pd.DataFrame:
    """Return quarterly reports with TTM aggregates and share data."""
    df = raw.copy()
    if df.empty:
        return df
    df['fcf'] = df['operatingCashflow'] - df['capitalExpenditures']
    df['ttm_fcf'] = df['fcf'].rolling(4).sum()
    df['ttm_net_income'] = df['netIncome'].rolling(4).sum()
    df['shares'] = df['commonStockSharesOutstanding'].ffill().bfill()
    df['ttm_eps'] = df.apply(lambda row: safe_divide(row['ttm_net_income'], row['shares']), axis=1)
    df['ttm_fcf_ps'] = df.apply(lambda row: safe_divide(row['ttm_fcf'], row['shares']), axis=1)
    return df


def extract_ttm_metrics(quarterly_df: pd.DataFrame) -> dict[str, Any]:
    """Pull the latest TTM metrics out of the enriched quarterly frame."""
    if quarterly_df.empty:
        return {
            'latest_quarter_eps': None,
            'ttm_eps': None,
            'ttm_fcf_ps': None,
            'fiscal_date_ending': None,
        }
    latest = quarterly_df.iloc[-1]
    return {
        'latest_quarter_eps': safe_divide(latest['netIncome'], latest['shares']),
        'ttm_eps': safe_float(latest.get('ttm_eps')),
        'ttm_fcf_ps': safe_float(latest.get('ttm_fcf_ps')),
        'fiscal_date_ending': latest['fiscalDateEnding'].strftime('%Y-%m-%d') if pd.notnull(latest['fiscalDateEnding']) else None,
    }


def compute_growth_metrics(annual_df: pd.DataFrame, num_periods: int) -> tuple[dict[str, Any], pd.DataFrame, dict[str, dict[str, float | None]]]:
    """Compute revenue/FCF/share growth plus leverage and margin details."""
    df = annual_df.copy()
    if df.empty:
        empty_metrics = {
            'avg_turnover_growth': None,
            'avg_FCF_growth': None,
            'avg_ROCE_using_FCF': None,
            'latest_net_debt_to_FCF': None,
            'avg_stock_increase': None,
            'avg_FCF_margin': None,
        }
        return empty_metrics, df, {
            'revenue_growth': windowed_means(None),
            'fcf_growth': windowed_means(None),
            'roce_fcf': windowed_means(None),
            'shares_growth': windowed_means(None),
        }

    df['revenue_growth'] = df['totalRevenue'].pct_change()
    df['fcf_growth'] = df['fcf'].pct_change()
    df['shares_growth'] = df['commonStockSharesOutstanding'].pct_change()

    df['capital_employed'] = df['totalAssets'] - df['totalCurrentLiabilities']

    cash_series = df.get('cashAndShortTermInvestments')
    if cash_series is None or cash_series.empty:
        cash_series = df.get('cashAndCashEquivalentsAtCarryingValue')
    df['cash'] = cash_series.fillna(0) if cash_series is not None else 0.0

    for debt_col in ('currentDebt', 'longTermDebt'):
        if debt_col in df:
            df[debt_col] = df[debt_col].fillna(0)
        else:
            df[debt_col] = 0.0

    df['debt'] = df['currentDebt'] + df['longTermDebt']
    df['net_debt'] = df['debt'] - df['cash']

    df['roce_fcf'] = df.apply(lambda row: safe_divide(row['fcf'], row['capital_employed']), axis=1)
    df['net_debt_to_fcf'] = df.apply(lambda row: safe_divide(row['net_debt'], row['fcf']), axis=1)
    df['fcf_margin'] = df.apply(lambda row: safe_divide(row['fcf'], row['totalRevenue']), axis=1)

    recent_subset = df.tail(num_periods)
    averages = {
        'avg_turnover_growth': series_mean(recent_subset['revenue_growth']),
        'avg_FCF_growth': series_mean(recent_subset['fcf_growth']),
        'avg_ROCE_using_FCF': series_mean(recent_subset['roce_fcf']),
        'avg_stock_increase': series_mean(recent_subset['shares_growth']),
        'avg_FCF_margin': series_mean(recent_subset['fcf_margin']),
    }

    net_debt_series = df['net_debt_to_fcf'].dropna()
    averages['latest_net_debt_to_FCF'] = safe_float(net_debt_series.iloc[-1]) if not net_debt_series.empty else None

    trend_windows = {
        'revenue_growth': windowed_means(df['revenue_growth']),
        'fcf_growth': windowed_means(df['fcf_growth']),
        'roce_fcf': windowed_means(df['roce_fcf']),
        'shares_growth': windowed_means(df['shares_growth']),
    }

    return averages, df, trend_windows


def build_historical_records(df: pd.DataFrame) -> list[dict[str, Any]]:
    """Project annual history into serialisable dictionaries."""
    if df.empty:
        return []
    records: list[dict[str, Any]] = []
    for _, row in df[['year', 'totalRevenue', 'fcf', 'roce_fcf', 'commonStockSharesOutstanding', 'fcf_margin']].iterrows():
        records.append({
            'year': int(row['year']) if pd.notnull(row['year']) else None,
            'turnover': safe_float(row['totalRevenue']),
            'FCF': safe_float(row['fcf']),
            'ROCE_using_FCF': safe_float(row['roce_fcf']),
            'shares': safe_float(row['commonStockSharesOutstanding']),
            'FCF_margin': safe_float(row['fcf_margin']),
        })
    return records


def compute_price_ratios(symbol: str,
                         annual_df: pd.DataFrame,
                         ttm_metrics: dict[str, Any],
                         num_periods: int) -> dict[str, Any]:
    """Compute valuation ratios from annual figures and market prices."""
    year_values = annual_df['year'].dropna() if 'year' in annual_df else pd.Series(dtype=int)
    end_year = int(year_values.max()) if not year_values.empty else datetime.today().year - 1
    start_year = end_year - 10

    df_price = get_yearly_close_prices(symbol, start_year, end_year)
    annual_price_df = pd.merge(annual_df, df_price, on='year', how='left')

    annual_price_df['eps'] = annual_price_df.apply(
        lambda row: safe_divide(row['netIncome'], row['commonStockSharesOutstanding']),
        axis=1,
    )
    annual_price_df['p_e'] = annual_price_df.apply(
        lambda row: safe_divide(row['close'], row['eps']),
        axis=1,
    )
    annual_price_df['fcf_per_share'] = annual_price_df.apply(
        lambda row: safe_divide(row['fcf'], row['commonStockSharesOutstanding']),
        axis=1,
    )
    annual_price_df['p_fcf'] = annual_price_df.apply(
        lambda row: safe_divide(row['close'], row['fcf_per_share']),
        axis=1,
    )

    valid_pe = annual_price_df['p_e'].dropna()
    valid_pfcf = annual_price_df['p_fcf'].dropna()

    average_pe_5y = series_mean(valid_pe.tail(5)) if len(valid_pe) >= 5 else None
    average_pe_10y = series_mean(valid_pe.tail(10)) if len(valid_pe) >= 10 else None
    average_pfcf_5y = series_mean(valid_pfcf.tail(5)) if len(valid_pfcf) >= 5 else None
    average_pfcf_10y = series_mean(valid_pfcf.tail(10)) if len(valid_pfcf) >= 10 else None

    average_per = series_mean(valid_pe.tail(num_periods)) if not valid_pe.empty else None

    latest_price = safe_float(get_latest_price(symbol))
    ttm_fcf_ps = safe_float(ttm_metrics.get('ttm_fcf_ps'))
    ttm_eps = safe_float(ttm_metrics.get('ttm_eps'))
    latest_quarter_eps = safe_float(ttm_metrics.get('latest_quarter_eps'))

    ttm_pfcf = safe_divide(latest_price, ttm_fcf_ps) if latest_price is not None else None
    ttm_per = safe_divide(latest_price, ttm_eps) if latest_price is not None else None
    latest_per = safe_divide(latest_price, latest_quarter_eps * 4) if (latest_price is not None and latest_quarter_eps is not None) else None

    return {
        'latest_price': latest_price,
        'ttm_per': ttm_per,
        'latest_per': latest_per,
        'average_per': average_per,
        'average_pe_5y': average_pe_5y,
        'average_pe_10y': average_pe_10y,
        'ttm_pfcf': ttm_pfcf,
        'average_pfcf_5y': average_pfcf_5y,
        'average_pfcf_10y': average_pfcf_10y,
    }


def build_dcf_inputs(annual_df: pd.DataFrame, ttm_fcf_ps: float | None) -> dict[str, Any]:
    """Extract the latest balance sheet inputs for the DCF form."""
    if annual_df.empty:
        return {
            'fcf_ps': safe_float(ttm_fcf_ps),
            'cash': None,
            'debt': None,
            'shares': None,
        }

    latest = annual_df.iloc[-1]
    cash_val = latest.get('cashAndShortTermInvestments')
    if not cash_val:
        cash_val = latest.get('cashAndCashEquivalentsAtCarryingValue')

    current_debt = latest.get('currentDebt') or 0
    long_term_debt = latest.get('longTermDebt') or 0

    return {
        'fcf_ps': safe_float(ttm_fcf_ps),
        'cash': safe_float(cash_val),
        'debt': safe_float(current_debt + long_term_debt),
        'shares': safe_float(latest.get('commonStockSharesOutstanding')),
    }


def analyze_dataframes(symbol: str,
                       annual_df: pd.DataFrame,
                       quarterly_df: pd.DataFrame,
                       num_periods: int = 5) -> Dict[str, Any]:
    """High-level orchestrator that combines all computations."""

    annual_clean = prepare_annual_frame(annual_df)
    quarterly_clean = prepare_quarterly_frame(quarterly_df)

    ttm_metrics = extract_ttm_metrics(quarterly_clean)

    growth_metrics, growth_frame, trend_windows = compute_growth_metrics(annual_clean, num_periods)
    historical_metrics = build_historical_records(growth_frame)

    price_metrics = compute_price_ratios(symbol, annual_clean, ttm_metrics, num_periods)
    dcf_inputs = build_dcf_inputs(annual_clean, ttm_metrics.get('ttm_fcf_ps'))

    results = {
        **growth_metrics,
        'historical_metrics': historical_metrics,
        **price_metrics,
        'trend_windows': trend_windows,
        'latest_quarterly_eps': ttm_metrics.get('latest_quarter_eps'),
        'ttm_eps': ttm_metrics.get('ttm_eps'),
        'ttm_fcf_ps': ttm_metrics.get('ttm_fcf_ps'),
        'fiscal_date_ending': ttm_metrics.get('fiscal_date_ending'),
        'dcf_inputs': dcf_inputs,
        'analysis_date': datetime.now().strftime('%Y-%m-%d'),
        'symbol': symbol,
    }

    return results




## High-Level Function

In [22]:
def analyze_stock(ticker: str, api_key: str, num_periods: int = 5) -> Dict[str, Any]:
    """
    Fetch raw data, merge, run the all-in-DataFrame analysis, and return
    a dictionary of results (no printing or saving).
    """
    # Fetch data
    income_statement = fetch_income_statement(ticker, api_key)
    balance_sheet_data = fetch_balance_sheet(ticker, api_key)
    cash_flow_data = fetch_cash_flow(ticker, api_key)

    # Merge into DataFrames
    merged_data = merge_reports_df(income_statement, balance_sheet_data, cash_flow_data)
    annual_df = merged_data['annual']
    quarterly_df = merged_data['quarterly']
    symbol = merged_data.get('symbol', ticker)

    # Perform the big analysis
    results = analyze_dataframes(symbol, annual_df, quarterly_df, num_periods=num_periods)
    return results

## Usage

In [23]:
ticker = 'ASML'

In [24]:
metrics_dict = analyze_stock(ticker, API_KEY, num_periods=5)

# Ensure the analysis directory exists
os.makedirs('analysis', exist_ok=True)

# Save the metrics_dict to a JSON file
file_path = os.path.join('analysis', f'{ticker}.json')
with open(file_path, 'w') as f:
    json.dump(metrics_dict, f, indent=4)

print(json.dumps(metrics_dict, indent=4))

{
    "avg_turnover_growth": 0.19576410689450405,
    "avg_FCF_growth": 0.6301749756447773,
    "avg_ROCE_using_FCF": 0.3059943567695744,
    "avg_stock_increase": -0.013594138702447944,
    "avg_FCF_margin": 0.31533086770520197,
    "latest_net_debt_to_FCF": -0.9955599516430377,
    "historical_metrics": [
        {
            "year": 2005,
            "turnover": 2527046847.0,
            "FCF": 638348174.17,
            "ROCE_using_FCF": 0.2734684339819501,
            "shares": 371639006.0,
            "FCF_margin": 0.252606387146253
        },
        {
            "year": 2006,
            "turnover": 3581773383.0,
            "FCF": 406768148.90999997,
            "ROCE_using_FCF": 0.1468678621242473,
            "shares": 388066910.0,
            "FCF_margin": 0.11356613202851532
        },
        {
            "year": 2007,
            "turnover": 3774123817.0,
            "FCF": 491916937.08000004,
            "ROCE_using_FCF": 0.17775817020295248,
            "shares": 373