In [2]:
!pip install "websockets>=13.0" finnhub-python sec-edgar-downloader langchain_google_genai langchain-community python-dotenv ta langchain langgraph pandas requests google-cloud-aiplatform markdown2 pdfkit -q

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.3/155.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m866.0 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.2/44.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.0/50.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m

In [3]:
!apt-get install wkhtmltopdf -q

Reading package lists...
Building dependency tree...
Reading state information...
The following additional packages will be installed:
  avahi-daemon geoclue-2.0 glib-networking glib-networking-common
  glib-networking-services gsettings-desktop-schemas iio-sensor-proxy
  libavahi-core7 libavahi-glib1 libdaemon0 libevdev2 libgudev-1.0-0 libhyphen0
  libinput-bin libinput10 libjson-glib-1.0-0 libjson-glib-1.0-common
  libmbim-glib4 libmbim-proxy libmd4c0 libmm-glib0 libmtdev1 libnl-genl-3-200
  libnotify4 libnss-mdns libproxy1v5 libqmi-glib5 libqmi-proxy libqt5core5a
  libqt5dbus5 libqt5gui5 libqt5network5 libqt5positioning5 libqt5printsupport5
  libqt5qml5 libqt5qmlmodels5 libqt5quick5 libqt5sensors5 libqt5svg5
  libqt5webchannel5 libqt5webkit5 libqt5widgets5 libsoup2.4-1
  libsoup2.4-common libudev1 libwacom-bin libwacom-common libwacom9 libwoff1
  libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-render-util0 libxcb-util1
  libxcb-xinerama0 libxcb-xinput0 libxcb-xkb1 libxkbcommon-x1

In [None]:
import os
import re
import json
import glob
import html
import shutil
from time import sleep
import datetime as dt
from datetime import date, datetime, timedelta
from typing import Dict, List, Union, TypedDict, Annotated


import pytz
import requests
import pdfkit
import finnhub
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from markdown import markdown
from dateutil.parser import parse as parse_date
from dateutil.relativedelta import relativedelta
from sec_edgar_downloader import Downloader


from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph, START, END
from openai import OpenAI


ALPACA_API_KEY = os.getenv('ALPACA_API_KEY')
ALPACA_SECRET_KEY = os.getenv('ALPACA_SECRET_KEY')
FINHUB_API_KEY = os.getenv('FINHUB_API_KEY')
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')


# Initialize LLM clients
client = OpenAI(
    api_key=GOOGLE_API_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

HEADERS = {
    'accept': 'application/json',
    'APCA-API-KEY-ID': ALPACA_API_KEY,
    'APCA-API-SECRET-KEY': ALPACA_SECRET_KEY
}
eastern = pytz.timezone('America/Toronto')
DFKIT_CONFIG = pdfkit.configuration(wkhtmltopdf='/usr/bin/wkhtmltopdf')

# --- Tools ---
@tool
def get_competitors(ticker: str) -> Union[List[str], str]:
    """Fetch peer tickers using Finnhub API."""
    finnhub_client = finnhub.Client(api_key=FINHUB_API_KEY)
    data = finnhub_client.company_peers(ticker)
    return data[:2] if isinstance(data, list) else []

@tool
def get_historical_stock_price(ticker: str) -> Dict:
    """Fetch one-week stock price."""
    start_ts = (dt.datetime.now(eastern).date() - dt.timedelta(days=8)).isoformat()
    end_ts = (dt.datetime.now(eastern).date() - dt.timedelta(days=1)).isoformat()
    url = (
        f"https://data.alpaca.markets/v2/stocks/bars?symbols={ticker}"
        f"&timeframe=1D&start={start_ts}&end={end_ts}"
        f"&limit=1000&adjustment=raw&feed=sip&sort=asc"
    )
    resp = requests.get(url, headers=HEADERS)
    if resp.status_code != 200:
        return {'status': f"error {resp.status_code}", 'bars': []}
    bars = resp.json().get('bars', {}).get(ticker, [])
    return {'status': 'success', 'bars': bars}

@tool
def get_financial_metrics(ticker: str) -> Dict:
    """Fetch and extract extended financial metrics for comprehensive valuation analysis."""
    resp = requests.get(
        "https://finnhub.io/api/v1/stock/metric",
        params={'symbol': ticker, 'metric': 'all', 'token': FINHUB_API_KEY}
    )

    if resp.status_code != 200:
        return {}

    metric = resp.json().get('metric', {})

    return {
        # Valuation
        'pe_ratio': metric.get('peBasicExclExtraTTM'),
        'price_to_book': metric.get('pbQuarterly'),
        'price_to_free_cash_flow': metric.get('pfcfShareTTM'),
        'ev_to_free_cash_flow': metric.get('currentEv/freeCashFlowTTM'),

        # Profitability
        'net_profit_margin_ttm': metric.get('netProfitMarginTTM'),
        'net_profit_margin_annual': metric.get('netProfitMarginAnnual'),
        'net_profit_margin_5y': metric.get('netProfitMargin5Y'),
        'gross_margin_ttm': metric.get('grossMarginTTM'),
        'operating_margin_ttm': metric.get('operatingMarginTTM'),
        'return_on_equity_5y': metric.get('roe5Y'),
        'return_on_assets_5y': metric.get('roa5Y'),
        'return_on_investment_5y': metric.get('roi5Y'),

        # Financial Health
        'current_ratio_ttm': metric.get('currentRatioQuarterly'),
        'quick_ratio_annual': metric.get('quickRatioAnnual'),
        'debt_to_equity': metric.get('totalDebt/totalEquityQuarterly'),
        'long_term_debt_to_equity': metric.get('longTermDebt/equityQuarterly'),
        'net_interest_coverage': metric.get('netInterestCoverageTTM'),

        # Growth
        'eps_growth_5y': metric.get('epsGrowth5Y'),
        'eps_growth_ttm_yoy': metric.get('epsGrowthTTMYoy'),
        'revenue_growth_5y': metric.get('revenueGrowth5Y'),
        'revenue_growth_ttm_yoy': metric.get('revenueGrowthTTMYoy'),
        'ebitda_cagr_5y': metric.get('ebitdaCagr5Y'),
        'focf_cagr_5y': metric.get('focfCagr5Y'),

        # Cash Flow Quality
        'cash_flow_per_share_ttm': metric.get('cashFlowPerShareTTM'),
        'cash_per_share_quarterly': metric.get('cashPerSharePerShareQuarterly'),

        # Efficiency & Leverage
        'asset_turnover_ttm': metric.get('assetTurnoverTTM'),
        'inventory_turnover_ttm': metric.get('inventoryTurnoverTTM'),

        # Risk Profile
        'beta': metric.get('beta'),

        # Capital Allocation
        'dividend_yield_ttm': metric.get('currentDividendYieldTTM'),
        'dividend_growth_5y': metric.get('dividendGrowthRate5Y'),
        'payout_ratio_ttm': metric.get('payoutRatioTTM'),

        # Size
        'market_cap': metric.get('marketCapitalization'),
        'enterprise_value': metric.get('enterpriseValue'),
    }


@tool
def get_sec_filings(ticker: str) -> Dict:
    """get sec filings"""
    today = date.today()
    start = today - relativedelta(years=5)
    after, before = start.isoformat(), today.isoformat()
    results = {}
    dl = Downloader('sec-edgar-downloader', 'noreply@example.com')
    try:
      dl.get("10-K", ticker, after=after, before=before)
      base_dir = os.path.join("./sec-edgar-filings", ticker, "10-K")
      submissions = glob.glob(os.path.join(base_dir, "*", "full-submission.txt"))

      for path in submissions:
          fid = os.path.basename(os.path.dirname(path))
          raw = open(path, encoding='utf-8').read()
          html_match = re.search(r'<DOCUMENT>\s*<TYPE>10-K.*?<TEXT>(.*?)</TEXT>', raw, re.DOTALL | re.IGNORECASE)

          if html_match:
              html_content = html_match.group(1)
              sections = extract_html_sections(html_content)
              results[fid] = sections
      status = 'success'
    except Exception as e:
        results = {}
        status = f'error: {e}'
    return {'filings': results, 'status': status}


@tool
def get_news(ticker: str, count: int = 20) -> List[Dict]:
    """Fetch news for exactly one ticker."""
    start_ts = (dt.datetime.utcnow().date() - dt.timedelta(days=8)).isoformat()
    end_ts = (dt.datetime.utcnow().date() - dt.timedelta(days=1)).isoformat()
    try:
        url = f"https://data.alpaca.markets/v1beta1/news?start={start_ts}&end={end_ts}&sort=desc&symbols={ticker}"
        resp = requests.get(url, headers=HEADERS)
        items = resp.json() if resp.status_code == 200 else []
        news_list = []
        print(items['news'])
        for item in items.get('news', []):
            if ticker in item.get('symbols', []):
                news_list.append({
                    'headline': item.get('headline'),
                    'summary': item.get('summary'),
                    'date': item.get('created_at'),
                    'url': item.get('url'),
                    'source': item.get('source')
                })
        print(news_list)
        return news_list
    except:
        return []

# Helper function to extract HTML sections from filing
def extract_html_sections(html_content: str) -> Dict[str, str]:
    soup = BeautifulSoup(html_content, 'html.parser')
    text = soup.get_text(separator='\n', strip=True)
    parts = re.split(r'(ITEM\s+\d+[A-Z]?\.)', text, flags=re.IGNORECASE)
    sections = {}
    for i in range(1, len(parts), 2):
        key = parts[i].strip().upper()
        sections[key] = parts[i+1].strip()
    return sections


def summarize_news(state: Dict) -> Dict:
    """Summarize news articles related to each ticker, grouped into a markdown table per ticker."""
    new_data = {}
    for ticker, info in state['data'].items():
        news_items = info.get('news', [])
        news_summary = ""

        if news_items:
            # Sort news by date descending
            try:
                news_items.sort(key=lambda x: parse_date(x['date']), reverse=True)
            except Exception:
                pass  # Fallback in case of parsing issue, continue unsorted

            cleaned_news = []
            for item in news_items:
                if not item.get('summary', '').strip():
                    continue
                cleaned_news.append({
                    'date': item['date'],
                    'headline': html.unescape(item['headline']),
                    'summary': html.unescape(item['summary']),
                    'url': item['url'],
                    'source': item['source']
                })
            news_block = json.dumps(cleaned_news, indent=2)


            prompt = f"""
            You are a financial analyst preparing a clean news impact summary for an equity research report.
            You will receive a list of company news entries in JSON format. Each entry contains the following fields:
            - `date`: ISO timestamp
            - `headline`: The article headline
            - `summary`: A brief summary of the article
            - `source`: Source name
            - `url`: The link to the article

            Your task is to produce a **Markdown table** with these columns:

            | Date | Headline | Impact Area | Sentiment | Summary | Relevance | Level of Relevance | Implication |
            |------|----------|--------------|-----------|---------|-----------|--------------------|-------------|

            ### Column Definitions:
            - **Impact Area**: What business unit, product, geography, or partner is directly affected?
            - **Sentiment**: Classify as Positive, Neutral, or Negative.
            - **Relevance**: What category does this fall into? E.g., Strategic, Regulatory, Financial, Operational, Legal.
            - **Level of Relevance**: High, Medium, or Low.
            - **Implication**: What does this news suggest about future performance, valuation, or risks?

            ### Instructions:
            - Sort the table by `Date` (most recent first).
            - Only include entries with a meaningful `summary`.
            - Keep headlines and summaries concise.
            - Do not include any extra commentary or prose — only return the completed Markdown table.
            """

            try:
                resp = client.chat.completions.create(
                  model="gemini-2.0-flash",
                  messages=[
                      {"role": "system", "content": prompt},
                      {"role": "user", "content": news_block}
                  ]
              )
                news_summary = resp.choices[0].message.content
            except Exception as e:
                news_summary = f"### News Summary\n\nError extracting news data: {str(e)}"
        else:
            news_summary = "### News Summary\n\nNo recent news available."

        new_data[ticker] = {**info, 'news_summary': news_summary}

    return {'data': new_data}



def summarize_filings_earnings(state: Dict) -> Dict:
    """Summarize earnings from each filing individually into markdown tables per fiscal year."""
    new_data = {}

    for ticker, info in state['data'].items():
        filings = info.get('filings', {}).get('filings', {})
        earnings_summary = []


        for fid, sec_dict in filings.items():
            print('summarize earning')
            sec_text = "\n\n".join([f"{section_title}:\n{section_text}" for section_title, section_text in sec_dict.items()])
            prompt = (
                f"You are a financial analyst. From the following 10-K filing (ID: {fid}), extract the following financial metrics "
                "and present them in markdown table format. Assume all financials are for the fiscal year reported in this filing.\n\n"
                "If any assumptions were made, put it as a NOTE underneath the table.\n"
                "Clearly label the unit, i.e. Millions USD or USD"

                "### Financial Performance Metrics\n"
                "| Fiscal Year | Total Revenue (USD) | Gross Margin (USD) | Gross Margin (%) | Operating Income (USD) | Net Income (USD) | EPS (Basic) | EPS (Diluted) | Free Cash Flow (USD) |\n"
                "|-------------|----------------------|---------------------|------------------|------------------------|------------------|-------------|----------------|-----------------------|\n"

                "### Segment Revenue Breakdown\n"
                "| Fiscal Year | Segment         | Revenue (USD) |\n"
                "|-------------|------------------|----------------|\n"
                "|             | Product Revenue  |                |\n"
                "|             | Services Revenue |                |\n"

                "### Geographical Revenue Breakdown\n"
                "| Fiscal Year | Segment         | Revenue (USD) |\n"
                "|-------------|-----------------|----------------|\n"
                "|             | Central America |                |\n"
                "|             | Europe          |                |\n"


                "### Product-Level Revenue Analysis\n"
                "Break down individual products or product lines by fiscal year, with revenue and share of total revenue.\n"
                "| Fiscal Year | Product / Line         | Revenue (USD) | % of Total Revenue |\n"
                "|-------------|------------------------|---------------|---------------------|\n"

                "### Capital Allocation & Balance Sheet Highlights\n"
                "| Fiscal Year | Cash & Marketable Securities (USD) | Total Debt (USD) | Share Repurchases (USD) | Dividends Paid (USD) | Capital Expenditures (USD) |\n"
                "|-------------|-------------------------------------|------------------|--------------------------|----------------------|-----------------------------|\n"

                "### Operational Efficiency Metrics\n"
                "| Fiscal Year | Inventory Levels (USD) | Accounts Receivable (USD) | Accounts Payable (USD) |\n"
                "|-------------|------------------------|----------------------------|-------------------------|\n"

                "### Strategic Investments & R&D\n"
                "| Fiscal Year | R&D Expenses (USD) |\n"
                "|-------------|---------------------|\n"

                "### Tax Metrics\n"
                "| Fiscal Year | Effective Tax Rate (%) | Income Before Tax (USD) | Income Tax Expense (USD) | Deferred Tax Assets (USD) | Deferred Tax Liabilities (USD) | Notes on Tax Strategy or Exposure |\n"
                "|-------------|-------------------------|---------------------------|---------------------------|-----------------------------|-------------------------------|----------------------------------|\n"
                "|             |                         |                           |                           |                             |                               |                                  |\n"
                "- Also summarize any geographic tax exposure: how much tax is paid or income is earned in domestic vs. international jurisdictions\n"

                "Present each category in a **separate markdown table** with clear headers. Include values, appropriate units (e.g., USD, %), and add brief notes if relevant data is mentioned in the filing (e.g., jurisdictional tax structure, regulatory exposure, tax credits, loss carryforwards)."
            )

            try:
                resp = client.chat.completions.create(
                    model="gemini-2.0-flash",
                    messages=[
                        {"role": "system", "content": prompt},
                        {"role": "user", "content": sec_text}
                    ]
                )
                filing_summary = f"### Filing ID: {fid}\n\n" + resp.choices[0].message.content
            except Exception as e:
                filing_summary = f"### Filing ID: {fid}\n\nError extracting data: {str(e)}"
            earnings_summary.append(filing_summary)
            sleep(30)  # Avoid rate limit

        summary_text = "\n\n---\n\n".join(earnings_summary) if earnings_summary else "No earnings data found"
        new_data[ticker] = {**info, 'filings-earnings': summary_text}

    return {'data': new_data}


def summarize_filings_guidance(state: Dict) -> Dict:
    """Summarize Guidance from each filing individually."""
    new_data = {}

    for ticker, info in state['data'].items():
        filings = info.get('filings', {}).get('filings', {})
        guidance_summary = []

        for fid, sec_dict in filings.items():
            # Convert the dict of sections (e.g., ITEM 7, ITEM 7A) into a readable string
            sec_text = "\n\n".join([f"{section_title}:\n{section_text}" for section_title, section_text in sec_dict.items()])

            prompt = (
                f"You are a financial analyst. From the following SEC filing (ID: {fid}), extract detailed summarized guidance, forward-looking commentary, and disclosed risk factors.\n\n"
                "Format the output using **markdown tables** with the following structures. Use as many rows as needed. If a section is not discussed in the filing, omit that table.\n\n"

                "### Guidance & Risk Summary\n"
                "| Fiscal Year | Category                        | Summary |\n"
                "|-------------|----------------------------------|---------|\n"
                "- GENERAL GUIDANCE SUMMARY\n"
                "- FUTURE GUIDANCE\n"
                "- PRODUCT PERFORMANCE\n"
                "- ONGOING RISKS\n"
                "- ONGOING ISSUES\n"
                "- NEW OR EMERGING RISKS\n\n"
                "- GEOPOLITICAL RISK\n"
                "- MACROECONOMIC RISK\n"
                "- SUPPLY CHAIN RISK\n"
                "- INFLATION RISK\n"
                "- ECONOMIC DOWNTURN RISK\n"
                "- INTEREST RATE RISK\n"
                "- FX / CURRENCY RISK\n"
                "- REGULATORY / LEGAL RISK\n"
                "- NATURAL DISASTER RISK\n"


                "### Product Launch and Innovation\n"
                "| Fiscal Year | Product / Service | Launch Year | Purpose / Market | Growth Expectation | Timeliness | Launch Risk | Expected Market Response |\n"
                "|-------------|-------------------|-------------|------------------|---------------------|------------|--------------|---------------------------|\n"
                "- Include all announced or discussed product/service initiatives\n"
                "- Estimate feasibility of launch date, risk of delay, and expected market impact\n\n"

                "### Management Positioning and Strategic Outlook\n"
                "| Fiscal Year | Management Viewpoint | Market Role (Leader/Follower/etc) | Strategic Tone (Confident/Cautious/etc) | Execution Evidence |\n"
                "|-------------|----------------------|-----------------------------------|-----------------------------------------|--------------------|\n"
                "- Summarize management's view of market direction and their strategic role\n"
                "- Capture tone and any alignment/mismatch with actual actions (e.g. M&A, hiring, investment)\n\n"

                "### Business Growth Commentary\n"
                "| Fiscal Year | Metric / Theme                 | Forward-Looking Statement |\n"
                "|-------------|--------------------------------|----------------------------|\n"
                "- List all future growth-related commentary (e.g. revenue CAGR targets, market share goals, TAM expansion, product-driven growth)\n"
                "- Include geography or segment-specific growth statements"
            )

            try:
                resp = client.chat.completions.create(
                    model="gemini-2.0-flash",
                    messages=[
                        {"role": "system", "content": prompt},
                        {"role": "user", "content": sec_text}
                    ]
                )
                filing_summary = f"### Filing ID: {fid}\n\n" + resp.choices[0].message.content
            except Exception as e:
                filing_summary = f"### Filing ID: {fid}\n\nError extracting guidance: {str(e)}"

            guidance_summary.append(filing_summary)
            sleep(30)  # Rate limiting

        summary_text = "\n\n---\n\n".join(guidance_summary) if guidance_summary else "No guidance data found"
        new_data[ticker] = {**info, 'filings-guidance': summary_text}

    return {'data': new_data}



@tool
def generate_report(data: Dict[str, Dict]) -> str:
    """Generate raw report sections for each ticker with Markdown tables for fundamental metrics."""
    sections = []

    for ticker, m in data.items():
        fundamentals = m.get('fundamentals', {})
        sec = [f"## {ticker}", f"Status: {m.get('overall_status', 'N/A')}"]

        bars = m.get('prices', {}).get('bars', [])
        if bars:
            sec.append("### Recent Stock Prices\n\n")
            sec.append("| Date | Closing Price |")
            sec.append("|------|----------------|")
            for b in bars:
                sec.append(f"| {b['t'][:10]} | {b['c']} |")
            sec.append(f"\nLast Close: {bars[-1]['c']}")
        else:
            sec.append("Last 7 Days Close Price: N/A")
            sec.append("Last 7 Trading Dates: N/A")
            sec.append("Last Close: N/A")

        sec.append(f"Query Date: {date.today()}")

        if fundamentals:
            sec.append("\n### Fundamental Metrics\n\n")
            sec.append("| Metric | Value |")
            sec.append("|--------|-------|")

            metric_map = {
                "P/E Ratio": "pe_ratio",
                "Price-to-Book": "price_to_book",
                "Price/Free Cash Flow": "price_to_free_cash_flow",
                "EV/Free Cash Flow": "ev_to_free_cash_flow",
                "Net Profit Margin (TTM)": "net_profit_margin_ttm",
                "Net Profit Margin (Annual)": "net_profit_margin_annual",
                "Net Profit Margin (5Y)": "net_profit_margin_5y",
                "Gross Margin (TTM)": "gross_margin_ttm",
                "Operating Margin (TTM)": "operating_margin_ttm",
                "Return on Equity (5Y)": "return_on_equity_5y",
                "Return on Assets (5Y)": "return_on_assets_5y",
                "Return on Investment (5Y)": "return_on_investment_5y",
                "Current Ratio (TTM)": "current_ratio_ttm",
                "Quick Ratio (Annual)": "quick_ratio_annual",
                "Debt to Equity": "debt_to_equity",
                "Long-Term Debt to Equity": "long_term_debt_to_equity",
                "Net Interest Coverage": "net_interest_coverage",
                "EPS Growth (5Y)": "eps_growth_5y",
                "EPS Growth TTM YoY": "eps_growth_ttm_yoy",
                "Revenue Growth (5Y)": "revenue_growth_5y",
                "Revenue Growth TTM YoY": "revenue_growth_ttm_yoy",
                "Free Cash Flow Growth (5Y)": "focf_cagr_5y",
                "Cash Flow per Share (TTM)": "cash_flow_per_share_ttm",
                "Cash per Share (Quarterly)": "cash_per_share_quarterly",
                "Asset Turnover (TTM)": "asset_turnover_ttm",
                "Inventory Turnover (TTM)": "inventory_turnover_ttm",
                "Beta": "beta",
                "Dividend Yield (TTM)": "dividend_yield_ttm",
                "Dividend Growth (5Y)": "dividend_growth_5y",
                "Payout Ratio (TTM)": "payout_ratio_ttm",
                "Market Capitalization": "market_cap",
                "Enterprise Value": "enterprise_value"
            }

            for label, key in metric_map.items():
                val = fundamentals.get(key, "N/A")
                sec.append(f"| {label} | {val} |")

        # Earnings & Guidance
        if m.get('filings-earnings'):
            sec.append("\n### Earnings (Past 5-Year Summary)\n\n")
            sec.append(m.get('filings-earnings'))

        if m.get('filings-guidance'):
            sec.append("\n### Guidance (Past 5-Year Summary)\n\n")
            sec.append(m.get('filings-guidance'))

        if m.get('consolidated_filings_summary'):
            sec.append("\n### Consolidated Sec Filings with Earnings and Guidance from Past Few Years\n\n")
            sec.append(m.get('consolidated_filings_summary'))

        if m.get('valuation_forecast'):
            sec.append("\n### 1 Year and 5 Year Valuation Forecast\n\n")
            sec.append(m.get('valuation_forecast'))

        if m.get('news_summary'):
            sec.append("\n### Recent News Summary\n\n")
            sec.append(m.get('news_summary'))


        sections.append("\n".join(sec))

    return "\n---\n".join(sections)



# --- Graph State ---
class State(TypedDict):
    messages: Annotated[list, None]
    company: str
    competitors: List[str]
    data: Dict[str, Union[Dict, str]]

# Graph nodes
def lookup_competitors(state: State) -> Dict:
    peers = get_competitors.invoke(state['company'])
    return {'competitors': peers or []}

def analyze_all(state: State) -> Dict:
    tickers = [state['company']] + state.get('competitors', [])
    tickers = list(dict.fromkeys(tickers))
    data_map = {}
    for t in tickers:
        prices = get_historical_stock_price.invoke(t)
        fundamentals = get_financial_metrics.invoke(t) or {}
        filings = get_sec_filings.invoke(t)
        news = get_news.invoke(t)
        data_map[t] = {
            'prices': prices,
            'fundamentals': fundamentals,
            'filings': filings,
        }
    # Store raw data for summaries
    state['data'] = data_map
    # Summarize earnings then guidance
    state = summarize_filings_earnings(state)
    sleep(20)
    state = summarize_filings_guidance(state)

    # Build final output with summaries only
    final_map = {}
    for t in tickers:
        info = state['data'][t]
        final_map[t] = {
            'prices': info['prices'],
            'fundamentals': info['fundamentals'],
            'filings-earnings': info['filings-earnings'],
            'filings-guidance': info['filings-guidance'],
        }
    return {'data': final_map}


def consolidate_filings_node(state: State) -> Dict:
  updated_data = {}
  for ticker, info in state["data"].items():
    raw = generate_report.invoke({'data': {ticker: info}})
    prompt = """
      You are the author of this filing consolidation report.
      You are NOT reviewing or commenting on an existing report — your job is to generate a **new**, original, and complete equity research consolidation using the data provided.

      You are a buyside equity research analyst. Create a consolidated summary of the past years’ 10-K filings using the provided markdown data tables and strategic disclosures.

      ---

      ## 🔍 Data Handling Instructions

      You are given:
      - A series of markdown tables from multiple 10-K filings (up to 10 per company)
      - Tables covering different categories (e.g., revenue, capital allocation, tax, risks)
      - Pre-extracted guidance, risks, and strategic commentary

      Your tasks:

      1. If a section includes multiple tables (e.g., "Financial Performance" across 5 years), **combine them into one clean markdown table**, using the following rules:
        - Keep only one table per section
        - Merge rows by fiscal year
        - If multiple values exist for the same year, use the **most recent filing**
        - Remove duplicate rows or repeated headers
        - Clarify in footnotes if you make judgment calls (e.g., conflicting numbers)

      2. You must not invent or fabricate any data. Only use what is provided.

      3. Follow consistent table formats across all sections.

      ---

      ## 📊 Final Consolidated Report Structure

      ### 1. Historical Financial Data (Last 5 Years)

      Use 10-K data and merge tables into one per category:

      - Financial Performance
      - Product Segment Revenue
      - Geographical Segment Revenue (grouped by in the order of Geographical Segment, Year)
      - Product Revenue (grouped by in the order of Product, Year)
      - Capital Allocation
      - Operational Efficiency
      - R&D
      - Tax Position

      Format: **markdown tables only** (no prose summaries)

      ---

      ### 2. Strategic Commentary & Guidance (Last 5 Years)

      Use 10-K data and merge all guidance-related summaries into the following:

      - Management outlook and execution (grouped by year)
      - New product launches, expectation of new products and market expansion ** clearly write out all product names **
      - TAM growth or product-specific projections
      - R&D expansion or focus areas
      - Key disclosed risks (ongoing and new)
      - Macro, regulatory, or competitive themes
      - Risk mitigations disclosed by management
      - If tables are available, preserve them; otherwise, summarize in **structured markdown sections**
      Format: **markdown tables only** (no prose summaries)

    """
    resp = client.chat.completions.create(
            model="gemini-2.0-flash",
            messages=[
                {"role": "system", "content": prompt},
                {"role": "user", "content": raw}
            ]
        )

    consolidated_filings_summary  = resp.choices[0].message.content
    updated_data[ticker] = {
        **info,
        'consolidated_filings_summary': consolidated_filings_summary
    }

  return {'data': updated_data}


def summarize_news_node(state: State) -> Dict:
    """Fetch and summarize news for each ticker."""
    new_data = {}
    for ticker, info in state['data'].items():
        news_items = get_news.invoke(ticker)

        if not news_items:
            new_data[ticker] = {**info, "news_summary": "### News Summary\n\nNo recent news available."}
            continue
        valid_news = [
            {
                'date': item['date'],
                'headline': html.unescape(item['headline']),
                'summary': html.unescape(item['summary']),
                'url': item['url'],
                'source': item['source'],
                'stock-ticker': ticker,
            }
            for item in news_items if item.get("summary", "").strip()
        ]

        try:
            valid_news.sort(key=lambda x: parse_date(x['date']), reverse=True)
        except Exception:
            pass  # fallback if date sort fails

        news_json = json.dumps(valid_news, indent=2)

        # Step 3: Prompt LLM to summarize
        prompt = """
        You are a financial analyst preparing a clean news impact summary for an equity research report for the stock-ticker provided.

        You will receive a list of company news entries in JSON format. Your task is to output a **Markdown table** with the following columns:

        | Date | Headline | Impact Area | Sentiment | Summary | Relevance | Level of Relevance | Implication |
        |------|----------|--------------|-----------|---------|-----------|--------------------|-------------|

        ### Column Definitions:
        - **Impact Area**: What business unit, product, geography, or partner is directly affected?
        - **Sentiment**: Classify as Positive, Neutral, or Negative to the stock-ticker provided.
        - **Relevance**: Strategic, Regulatory, Financial, Operational, Legal, etc.
        - **Level of Relevance**: High, Medium, or Low.
        - **Implication**: What does this suggest for valuation, performance, or risks?

        ### Instructions:
        - Sort the table by `Date` (most recent first).
        - Only include entries with meaningful `summary`.
        - Do not return any prose or commentary outside the table.
        - Do **not** wrap the table in triple backticks or markdown code blocks — just return the table directly.
        """


        try:
            resp = client.chat.completions.create(
                model="gemini-2.0-flash",
                messages=[
                    {"role": "system", "content": prompt},
                    {"role": "user", "content": news_json}
                ]
            )
            summary = resp.choices[0].message.content
        except Exception as e:
            summary = f"### News Summary\n\nError: {str(e)}"

        new_data[ticker] = {**info, "news": news_items, "news_summary": summary}
        print(f"[News Summary for {ticker}]\n{summary}")

    return {"data": new_data}


def valuation_node(state: State) -> Dict:
    updated_data = {}
    for ticker, info in state["data"].items():
        summary_input = {
          "prices": info.get("prices", {}),
          "fundamentals": info.get("fundamentals", {}),
          "consolidated_filings_summary": info.get("consolidated_filings_summary", ""),
        }
        raw = generate_report.invoke({'data': {ticker: summary_input}})
        prompt = """
        You are a valuation analyst. Generate a 1-year and 5-year price forecast for a stock using a comprehensive financial and strategic assessment based on provided data.

        You must incorporate and clearly reference the following:

        - Historical stock performance and volatility
        - Key fundamental metrics (e.g., P/E, P/B, FCF yield, ROE, ROIC, margins): state which metrics you use and their values.
        - Financial statements: cash flow, income, and balance sheet items
        - Debt structure and liquidity (e.g., current ratio, interest coverage, D/E)
        - Revenue growth trends and free cash flow sustainability
        - Management's track record in executing guidance and resolving disclosed risks
        - Forward guidance (from management or filings)
        - Recent News
        - Sales performance trends (e.g., unit economics, seasonality, retention)
        - Competitive position, market share, and TAM
        - Capital allocation strategy (e.g., buybacks, dividends, R&D, acquisitions)
        - Risks and uncertainties (e.g., regulatory, operational, macroeconomic)
        - Peer valuation comparison and relative positioning
        - Beta and macro sensitivity (e.g., interest rate or inflation exposure)

        ---

        ### Product Revenue Analysis

        1. Extract and summarize historical **product-level revenue** from the provided filings:

        | Fiscal Year | Product / Line | Revenue (USD) | % Change from Prior Year |

        2. Based on the actual data:
        - Estimate realistic CAGR per product
        - Forecast each product’s revenue for Year 1 and Year 5

        Present in:

        | Product | Revenue (Year 1) | Revenue (Year 5) | CAGR | Notes |

        > Use actual historical growth rates where possible. Do **not** assume arbitrary numbers. If a forecast requires extrapolation, explain the logic.

        ---

        ### 🧮 Valuation Forecast Table

        | Scenario       | Target Price (Year 1) | Target Price (Year 5) | CAGR from Current Price | Key Assumptions |
        |----------------|-----------------------|------------------------|--------------------------|------------------|
        | Bullish Case   |                       |                        |                          |                  |
        | Nominal Case   |                       |                        |                          |                  |
        | Bearish Case   |                       |                        |                          |                  |

        ---

        ### Required: Explicit Formula Breakdown

        You must clearly write out:
        1. The valuation methodology used (e.g., DCF, EV/EBITDA, P/E-based model)
        2. The formulas used
        3. All variables in the formulas, and:
          - Their source (e.g., from data, assumed)
          - The actual value used

        For example:

        - **Formula**: Target Price = EPS × Forward P/E
        - **Variables**:
          - EPS (Year 1) = 6.40 (from fundamentals)
          - Forward P/E = 25 (assumed based on peer average)

        Another example:

        - **Formula**: Enterprise Value = EBITDA × EV/EBITDA Multiple
        - EBITDA = $9.1B (TTM)
        - EV/EBITDA = 14x (peer group avg from provided data)

        This level of transparency is required for each scenario forecast.

        ---

        ### Important Rules

        - Do not fabricate any data. Use only what is provided or clearly derived from it.
        - If a value must be assumed, state that it is an assumption and explain why it's reasonable.
        - Do not include comparisons unless peer metrics are available.

        ---

        ### Finally, summarize:

        - The valuation methodology used and why it fits this company
        - Key sensitivities (e.g., to margins, growth, valuation multiple)
        - Your most likely scenario and rationale

        """
        resp = client.chat.completions.create(
            model="gemini-2.0-flash",
            messages=[
                {"role": "system", "content": prompt},
                {"role": "user", "content": raw}
            ]
        )

        valuation_summary = resp.choices[0].message.content
        updated_data[ticker] = {
            **info,
            'valuation_forecast': valuation_summary
        }

    return {'data': updated_data}


def report_node(state: State) -> Dict:
    from langchain_core.messages import AIMessage
    from datetime import date

    messages = state.get('messages', [])
    data = state.get('data', {})
    final_messages = []
    compiled_markdown = ""
    light_summaries = []

    report_prompt = """
    You are the author of this equity research report.
    You are NOT reviewing or commenting on an existing report — your job is to generate a **new**, original, and complete equity research report based on the data provided.
    Follow the structure below exactly. Populate each section with real data, tables, calculations, and strategic analysis.

    You are a buyside equity research analyst. Create a final version of a professional equity research report using:
    - Historical market data
    - Fundamental metrics
    - Valuation forecasts
    - A fully pre-merged, markdown-based summary from multiple 10-K filings, management outlook, risk disclosures

    ## Data Provided:
    - 7-day stock price history
    - Fundamental financial metrics (in markdown table)
    - Valuation forecast (markdown table with assumptions)
    - Consolidated 10-K summary (`consolidated_filings_summary`)
    - Includes tables for Financial Performance, Revenue by Product/Geo, Capital Allocation, R&D, Tax, Risk Disclosures

    ## Final Report Structure (markdown only):

    ⚠️ DO NOT include the first three sections (1–3). They have already been generated and shown.
    Begin writing from Section 4 onward:
    ### 4. Comments and Insights on Consolidated Filing Summary
    - Write a short paragraph for each table, no need to show the table
    - Evaluate management’s effectiveness in mitigating risks and challenges
    - Evaluate the impact on on-going and emerging risks


    ### 5. Product & Growth Outlook
    - Identify growth drivers, new launches, performance on new products, TAM outlook

    ### 6. Detailed Fundamental Analysis
    - Analyze margins, efficiency, leverage, capex, debts, liquidity, execution

    ### 7. Valuation Forecast
    - Paste markdown table exactly
    - List valuation assumptions and calculation breakdown

    ### 8. Investment Recommendation
    - Buy / Hold / Sell
    - Justify based on valuation, strategy, and financials

    ## Rules:
    - DO NOT invent or modify tables
    - DO NOT return prose-only output
    """

    for ticker, info in data.items():
        report_input_metrics= {
          "prices": info.get("prices", {}),
          "fundamentals": info.get("fundamentals", {}),
          "consolidated_filings_summary": info.get("consolidated_filings_summary", ""),
          "news_summary": info.get("news_summary", ""),
        }
        report_input = generate_report.invoke({'data': {ticker: report_input_metrics}})

        # Compile initial content
        compiled_markdown += f"\n\n# Report: {ticker}\n\n---\n"
        compiled_markdown += report_input

        # Compose raw input for LLM
        prices = info.get("prices", "")
        fundamentals = info.get("fundamentals", "")
        consolidated_filings_summary = info.get("consolidated_filings_summary", "")
        valuation_forecast = info.get("valuation_forecast", "")
        news_summary = info.get("news_summary", "")
        raw_input = f"""
        ### 1. Last 7 Days Stock Close Price\n{prices}
        ### 2. Stock Fundamental Metrics\n{fundamentals}
        ### 3. Consolidated Filings Summary\n{consolidated_filings_summary}
        ### 4. Valuation Forecast\n{valuation_forecast}
        ### 5. Recent News\n{news_summary}
        """

        # Call LLM for full report generation
        resp = client.chat.completions.create(
            model="gemini-2.0-flash",
            messages=[
                {"role": "system", "content": report_prompt},
                {"role": "user", "content": raw_input}
            ]
        )
        report_summary = resp.choices[0].message.content
        compiled_markdown += f"\n\n# Summary: {ticker}\n\n{report_summary}"
        final_messages.append(AIMessage(content=report_summary))

        # Extract short investment summary
        extract_prompt = """
        You are a portfolio assistant. Extract a brief investment summary from the following detailed equity report.
        Include:
        - Summary of valuation outlook (bullish/nominal/bearish)
        - Debt, liquidity, asset base, margin trend, revenue growth
        - Key product and geographic performance
        - Risk themes and mitigations
        - Forward-looking growth commentary
        Return a markdown section under the ticker.
        """

        extract_resp = client.chat.completions.create(
            model="gemini-2.0-flash",
            messages=[
                {"role": "system", "content": extract_prompt},
                {"role": "user", "content": report_summary}
            ]
        )
        light_summaries.append(extract_resp.choices[0].message.content)

    # Build final ranking
    combined_summary = "\n".join(light_summaries)
    ranking_prompt = """
    You are a portfolio manager reviewing multiple equity research reports.
    Rank the companies below by expected return vs. risk profile.

    ### Format:
    | Rank | Company | Recommendation | Rationale |
    |------|---------|----------------|-----------|

    Then explain your top pick.
    """

    ranking_resp = client.chat.completions.create(
        model="gemini-2.0-flash",
        messages=[
            {"role": "system", "content": ranking_prompt},
            {"role": "user", "content": combined_summary}
        ]
    )
    portfolio_ranking = ranking_resp.choices[0].message.content
    final_messages.append(AIMessage(content=portfolio_ranking))
    compiled_markdown += f"\n\n# Comparison Ranking\n\n{portfolio_ranking}"

    # Save report
    save_markdown_to_pdf(compiled_markdown, output_path="/content/final_equity_report.pdf")
    return {
        "messages": messages + final_messages,
        "pdf_markdown": compiled_markdown
    }


# Build the graph
graph_builder = StateGraph(State)
graph_builder.add_node('lookup_competitors', lookup_competitors)
graph_builder.add_node('analyze_all', analyze_all)
graph_builder.add_node('consolidate_filings_node', consolidate_filings_node)
graph_builder.add_node(' summarize_news_node', summarize_news_node)
graph_builder.add_node('valuation_node', valuation_node)
graph_builder.add_node('report_node', report_node)
graph_builder.add_edge(START, 'lookup_competitors')
graph_builder.add_edge('lookup_competitors', 'analyze_all')
graph_builder.add_edge('analyze_all', 'consolidate_filings_node')
graph_builder.add_edge('consolidate_filings_node', ' summarize_news_node')
graph_builder.add_edge(' summarize_news_node', 'valuation_node')
graph_builder.add_edge('valuation_node', 'report_node')
graph_builder.add_edge('report_node', END)

def save_markdown_to_pdf(markdown_text: str, output_path: str = "/content/equity_research.pdf"):
    import shutil
    import pdfkit
    from markdown import markdown

    # Locate wkhtmltopdf binary
    wkhtmltopdf_path = shutil.which("wkhtmltopdf")

    # Convert Markdown to HTML with GitHub-style table support
    html_body = markdown(markdown_text, extensions=['tables'])

    # HTML template with fixed table layout and word wrapping
    html = f"""
    <html>
    <head>
      <style>
        body {{
          font-family: 'Arial', sans-serif;
          padding: 20px;
          font-size: 12px;
        }}
        table {{
          width: 100%;
          border-collapse: collapse;
          margin-bottom: 20px;
          table-layout: fixed;
          word-wrap: break-word;
        }}
        th, td {{
          border: 1px solid #999;
          padding: 6px;
          text-align: left;
          vertical-align: top;
          word-break: break-word;
          white-space: pre-wrap;
        }}
        th {{
          background-color: #f2f2f2;
        }}
        h1, h2, h3 {{
          color: #2e6c80;
        }}
      </style>
    </head>
    <body>
      {html_body}
    </body>
    </html>
    """

    # PDF config
    config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)
    options = {
        'enable-local-file-access': '',
        'page-size': 'A4',
        'encoding': 'UTF-8',
        'quiet': ''
    }

    # Generate PDF
    pdfkit.from_string(html, output_path, options=options, configuration=config)
    print(f"✅ PDF generated: {output_path}")


if __name__ == '__main__':
    init_state = {
        'messages': [HumanMessage(content='Start analysis')],
        'company': 'AAPL',
        'competitors': [],
        'data': {}
    }
    graph = graph_builder.compile()
    final_state = None

    for evt in graph.stream(init_state, stream_mode='values'):
        final_state = evt
        if 'messages' in evt:
            msg = evt['messages'][-1]
            if isinstance(msg, AIMessage):
                print(msg.content)
            else:
                print(msg)

    from google.colab import files
    pdf_path = "/content/final_equity_report.pdf"
    files.download(pdf_path)

content='Start analysis' additional_kwargs={} response_metadata={}
content='Start analysis' additional_kwargs={} response_metadata={}
[{'author': 'Adam Eckert', 'content': '', 'created_at': '2025-05-21T17:57:24Z', 'headline': 'OpenAI Teams Up With iPhone Designer Jony Ive As ChatGPT Maker Makes Push Into Hardware, Apple Shares Slide', 'id': 45550033, 'images': [{'size': 'large', 'url': 'https://cdn.benzinga.com/files/imagecache/2048x1536xUP/images/story/2025/05/21/OpenAI-shutterstock-2525341257.jpeg'}, {'size': 'small', 'url': 'https://cdn.benzinga.com/files/imagecache/1024x768xUP/images/story/2025/05/21/OpenAI-shutterstock-2525341257.jpeg'}, {'size': 'thumb', 'url': 'https://cdn.benzinga.com/files/imagecache/250x187xUP/images/story/2025/05/21/OpenAI-shutterstock-2525341257.jpeg'}], 'source': 'benzinga', 'summary': 'Apple Inc (NASDAQ:AAPL) shares are moving lower Wednesday afternoon following multiple reports that OpenAI is set to acquire Apple Veteran Jony Ive&#39;s AI device startup.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>