<a href="https://colab.research.google.com/github/shimonsant2/FinAppV3/blob/main/FinappV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
!pip install yfinance pandas matplotlib requests plotly beautifulsoup4 lxml --quiet

import os
import re
import requests
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import plotly.graph_objects as go
from bs4 import BeautifulSoup
from IPython.display import display

%matplotlib inline

# ============================================================
# CONFIG: SEC headers (MUST be real name + email in production)
# ============================================================
SEC_HEADERS = {
    "User-Agent": "Your Name your.email@example.com",  # TODO: change this
    "Accept-Encoding": "gzip, deflate"
}

# ============================================================
# STEP 1 – Find ticker from company name (SEC)
# ============================================================
def find_symbol_from_company_name(company_name: str):
    """
    Find symbol by company name using SEC official company list.
    """
    url = "https://www.sec.gov/files/company_tickers.json"
    resp = requests.get(url, headers=SEC_HEADERS)
    resp.raise_for_status()
    data = resp.json()

    company_upper = company_name.upper()
    best_match = None

    for entry in data.values():
        name = entry["title"].upper()
        if company_upper in name:
            best_match = entry["ticker"]
            break

    return best_match

# ---- INPUT: main company & competitors ----
company_name = "servicenow"   # <--- change this

# You can use tickers OR company names here.
# Examples for ServiceNow:
COMPETITOR_INPUTS = [
    "Atlassian",     # TEAM
    "Salesforce",    # CRM
    "Workday",       # WDAY
    "Microsoft",     # MSFT
    "Adobe",         # ADBE
    "Freshworks",    # FRSH
]

ticker_symbol = find_symbol_from_company_name(company_name)
print("Found symbol:", ticker_symbol)

# ============================================================
# STEP 2 – Download history (company, S&P500, competitors, FX)
# ============================================================
def download_price_history(symbol: str, period: str = "10y"):
    """
    Download up to 'period' of historical data for a symbol using yfinance.
    Returns a DataFrame or None.
    """
    try:
        ticker = yf.Ticker(symbol)
        hist = ticker.history(period=period, auto_adjust=False)
    except Exception as e:
        print(f"Error downloading history for {symbol}:", e)
        return None

    if hist is None or hist.empty:
        print("No history returned for", symbol)
        return None

    hist.index = pd.to_datetime(hist.index)
    return hist

def resolve_competitor_symbol(raw_input: str):
    """
    Try to resolve competitor input (ticker or company name) to a ticker.
    Logic:
      - If looks like a ticker (no spaces, <=5 chars) -> assume as-is.
      - Else, try SEC company name lookup.
      - If fails -> return None and skip.
    """
    s = raw_input.strip()
    if " " not in s and len(s) <= 5:
        return s.upper()

    try:
        ticker = find_symbol_from_company_name(s)
        return ticker
    except Exception:
        return None

def build_competitor_close_df(competitor_histories: dict):
    """
    competitor_histories: dict[ticker -> hist_df]
    Returns a DataFrame of close prices with columns=tickers.
    """
    frames = []
    for tck, df in competitor_histories.items():
        if df is None or df.empty:
            continue
        if "Close" not in df.columns:
            continue
        frames.append(df["Close"].rename(tck))

    if not frames:
        return None
    combo = pd.concat(frames, axis=1)
    combo = combo.dropna(how="all")
    return combo

# ---- Download main, benchmark, competitors, FX ----
if ticker_symbol is None:
    print("Ticker symbol not found. Check company_name.")
    hist_10y = None
    spx_10y = None
    competitor_close_df = None
    fx_10y = None
else:
    # main stock
    hist_10y = download_price_history(ticker_symbol, period="10y")
    # S&P 500 benchmark
    spx_10y = download_price_history("^GSPC", period="10y")
    # USD → ILS FX (ILS=X: USD/ILS rate)
    fx_10y = download_price_history("ILS=X", period="10y")

    # competitors
    competitor_histories = {}
    for raw in COMPETITOR_INPUTS:
        tck = resolve_competitor_symbol(raw)
        if not tck:
            print(f"Could not resolve competitor '{raw}' – skipping.")
            continue
        if tck.upper() == ticker_symbol.upper():
            continue
        if tck in competitor_histories:
            continue
        print("Resolved competitor:", raw, "→", tck)
        competitor_histories[tck] = download_price_history(tck, period="10y")

    competitor_close_df = build_competitor_close_df(competitor_histories)
    if competitor_close_df is not None:
        print("\nCompetitor close-price DataFrame shape:", competitor_close_df.shape)
    else:
        print("\nNo competitor price data available.")

# ======================================================
# NEW STEP 2a – Single-stock graphs (USD + ILS in hover)
# ======================================================
def plot_time_ranges_single_stock_usd_ils(hist, symbol, fx_hist=None):
    """
    Plot charts only for the tested stock for:
      1D, 1W, 1M, 3M, 6M, 1Y, 2Y, 3Y, 5Y, 10Y.

    - Y-axis: USD Close price.
    - Hover: full OHLCV + Close in ILS (using FX = ILS per 1 USD).
    """
    if hist is None or hist.empty:
        print("No history to plot for", symbol)
        return

    hist = hist.copy()
    end = hist.index.max()

    if fx_hist is None or fx_hist.empty:
        print("WARNING: No FX data (ILS=X). Hover will not show ILS prices.")
        fx_series = None
    else:
        fx_series = fx_hist["Close"].sort_index()

    def slice_range(days):
        if days is None:
            return hist
        start = end - pd.Timedelta(days=days)
        return hist.loc[hist.index >= start]

    ranges = {
        "1 Day": 1,
        "1 Week": 7,
        "1 Month": 30,
        "3 Months": 90,
        "6 Months": 180,
        "1 Year": 365,
        "2 Years": 365 * 2,
        "3 Years": 365 * 3,
        "5 Years": 365 * 5,
        "10 Years": None,
    }

    for label, days in ranges.items():
        sliced = slice_range(days)
        if sliced is None or sliced.empty:
            print(f"{label}: Not enough data to plot for {symbol}.")
            continue

        required_cols = ["Open", "High", "Low", "Close", "Volume"]
        if not all(col in sliced.columns for col in required_cols):
            print(f"{label}: Missing OHLCV columns in data for {symbol}.")
            continue

        close_series = sliced["Close"].dropna()
        if close_series.empty:
            print(f"{label}: No valid close prices for {symbol}.")
            continue

        # % change in USD over full period
        start_price = close_series.iloc[0]
        end_price = close_series.iloc[-1]
        if pd.notna(start_price) and pd.notna(end_price) and start_price != 0:
            pct_change_full = (end_price / start_price - 1) * 100
            pct_text_full = f"{pct_change_full:+.2f}%"
        else:
            pct_text_full = "N/A"

        title = f"{symbol} – Close Price (USD + ILS) – {label} (Δ {pct_text_full})"

        # FX aligned with stock dates
        if fx_series is not None:
            fx_aligned = fx_series.reindex(sliced.index, method="ffill")
            close_ils = sliced["Close"] * fx_aligned
        else:
            fx_aligned = pd.Series(index=sliced.index, data=float('nan'))
            close_ils = pd.Series(index=sliced.index, data=float('nan'))

        # customdata: Open, High, Low, Close, Volume, Close_ILS
        tmp_df = sliced.copy()
        tmp_df["Close_ILS"] = close_ils
        customdata = tmp_df[["Open", "High", "Low", "Close", "Volume", "Close_ILS"]].values

        fig = go.Figure()
        fig.add_trace(
            go.Scatter(
                x=sliced.index,
                y=sliced["Close"],
                mode="lines",
                name=f"{symbol} (USD)",
                customdata=customdata,
                hovertemplate=(
                    "Date: %{x}<br>"
                    "Open (USD): %{customdata[0]:.2f}<br>"
                    "High (USD): %{customdata[1]:.2f}<br>"
                    "Low (USD): %{customdata[2]:.2f}<br>"
                    "Close (USD): %{customdata[3]:.2f}<br>"
                    "Close (ILS): %{customdata[5]:.2f}<br>"
                    "Volume: %{customdata[4]:,.0f}<extra></extra>"
                ),
            )
        )

        fig.update_layout(
            title=title,
            xaxis_title="Date",
            yaxis_title="Price (USD)",
            hovermode="x unified",
            height=400,
            xaxis=dict(
                rangeslider=dict(visible=True),
                type="date"
            ),
            updatemenus=[
                dict(
                    type="buttons",
                    direction="left",
                    buttons=[
                        dict(
                            label="Reset View",
                            method="relayout",
                            args=[{"xaxis.autorange": True, "yaxis.autorange": True}],
                        )
                    ],
                    x=1,
                    xanchor="right",
                    y=1.15,
                    yanchor="top",
                    showactive=False,
                )
            ],
        )

        print(f"\n=== {label} – Single Stock (USD + ILS) ===")
        print("Hover: shows OHLCV in USD and Close in ILS.")
        fig.show()

# ======================================================
# STEP 2b – Comparison graphs: stock vs S&P 500 vs competitor index
# ======================================================
def plot_time_ranges_interactive(
    hist,
    symbol,
    benchmark_hist=None,
    benchmark_symbol="^GSPC",
    competitor_close_df=None,
):
    """
    Plot interactive Plotly charts for:
      1D, 1W, 1M, 3M, 6M, 1Y, 2Y, 3Y, 5Y, 10Y.

    Lines (all indexed to 100 at start of each period):
      - Company
      - Benchmark (e.g. S&P 500)
      - Virtual competitor index (avg of competitors' Close)
    """
    if hist is None or hist.empty:
        print("No history to plot for", symbol)
        return

    hist = hist.copy()
    end = hist.index.max()

    def slice_range_company(days):
        if days is None:
            return hist
        start = end - pd.Timedelta(days=days)
        return hist.loc[hist.index >= start]

    def slice_range_benchmark(days):
        if benchmark_hist is None or benchmark_hist.empty:
            return None
        bh = benchmark_hist
        if days is None:
            start = bh.index.min()
        else:
            start = end - pd.Timedelta(days=days)
        return bh.loc[(bh.index >= start) & (bh.index <= end)]

    def slice_range_competitors(days):
        if competitor_close_df is None or competitor_close_df.empty:
            return None
        df = competitor_close_df
        if days is None:
            start = df.index.min()
        else:
            start = end - pd.Timedelta(days=days)
        return df.loc[(df.index >= start) & (df.index <= end)]

    ranges = {
        "1 Day": 1,
        "1 Week": 7,
        "1 Month": 30,
        "3 Months": 90,
        "6 Months": 180,
        "1 Year": 365,
        "2 Years": 365 * 2,
        "3 Years": 365 * 3,
        "5 Years": 365 * 5,
        "10 Years": None,
    }

    for label, days in ranges.items():
        sliced = slice_range_company(days)
        if sliced is None or sliced.empty:
            print(f"{label}: Not enough data to plot for {symbol}.")
            continue

        required_cols = ["Open", "High", "Low", "Close", "Volume"]
        if not all(col in sliced.columns for col in required_cols):
            print(f"{label}: Missing OHLCV columns in data for {symbol}.")
            continue

        close_series = sliced["Close"].dropna()
        if close_series.empty:
            print(f"{label}: No valid close prices for {symbol}.")
            continue

        # Company % change over full period (indexed view)
        start_price = close_series.iloc[0]
        end_price = close_series.iloc[-1]
        if pd.notna(start_price) and pd.notna(end_price) and start_price != 0:
            pct_change_full = (end_price / start_price - 1) * 100
            pct_text_full = f"{pct_change_full:+.2f}%"
        else:
            pct_text_full = "N/A"

        title = f"{symbol} vs S&P 500 vs Competitor Index – {label} (Δ {pct_text_full})"

        comp_norm = (close_series / close_series.iloc[0]) * 100
        customdata = sliced[["Open", "High", "Low", "Close", "Volume"]].values

        fig = go.Figure()
        fig.add_trace(
            go.Scatter(
                x=close_series.index,
                y=comp_norm,
                mode="lines",
                name=f"{symbol} (Indexed)",
                customdata=customdata,
                hovertemplate=(
                    "Date: %{x}<br>"
                    "Open: %{customdata[0]:.2f}<br>"
                    "High: %{customdata[1]:.2f}<br>"
                    "Low: %{customdata[2]:.2f}<br>"
                    "Close: %{customdata[3]:.2f}<br>"
                    "Volume: %{customdata[4]:,.0f}<br>"
                    "Indexed: %{y:.2f}<extra></extra>"
                ),
            )
        )

        # Benchmark
        if benchmark_hist is not None and not benchmark_hist.empty:
            bench_sliced = slice_range_benchmark(days)
            if bench_sliced is not None and not bench_sliced.empty:
                bench_close = bench_sliced["Close"].dropna()
                if not bench_close.empty:
                    bench_norm = (bench_close / bench_close.iloc[0]) * 100
                    fig.add_trace(
                        go.Scatter(
                            x=bench_norm.index,
                            y=bench_norm.values,
                            mode="lines",
                            name="S&P 500 (Indexed)",
                            hovertemplate=(
                                "Date: %{x}<br>"
                                "Indexed: %{y:.2f}<extra></extra>"
                            ),
                        )
                    )

        # Competitor virtual index
        comp_basket = slice_range_competitors(days)
        if comp_basket is not None and not comp_basket.empty:
            basket_close = comp_basket.mean(axis=1, skipna=True).dropna()
            if not basket_close.empty:
                basket_norm = (basket_close / basket_close.iloc[0]) * 100
                fig.add_trace(
                    go.Scatter(
                        x=basket_norm.index,
                        y=basket_norm.values,
                        mode="lines",
                        name="Competitor Basket (Indexed)",
                        hovertemplate=(
                            "Date: %{x}<br>"
                            "Indexed basket: %{y:.2f}<extra></extra>"
                        ),
                    )
                )

        fig.update_layout(
            title=title,
            xaxis_title="Date",
            yaxis_title="Indexed Price (100 = start)",
            hovermode="x unified",
            height=400,
            xaxis=dict(
                rangeslider=dict(visible=True),
                type="date"
            ),
            updatemenus=[
                dict(
                    type="buttons",
                    direction="left",
                    buttons=[
                        dict(
                            label="Reset View",
                            method="relayout",
                            args=[{"xaxis.autorange": True, "yaxis.autorange": True}],
                        )
                    ],
                    x=1,
                    xanchor="right",
                    y=1.15,
                    yanchor="top",
                    showactive=False,
                )
            ],
        )

        print(f"\n=== {label} – Comparison (Stock vs S&P vs Competitor Index) ===")
        fig.show()

# ---- RUN GRAPHS (single-stock first, then comparisons) ----
if ticker_symbol is not None and hist_10y is not None:
    print("\n############################")
    print("# SINGLE-STOCK GRAPHS (USD + ILS)")
    print("############################")
    plot_time_ranges_single_stock_usd_ils(hist_10y, ticker_symbol, fx_hist=fx_10y)

    print("\n############################")
    print("# COMPARISON GRAPHS (STOCK vs S&P vs COMP INDEX)")
    print("############################")
    plot_time_ranges_interactive(
        hist_10y,
        ticker_symbol,
        benchmark_hist=spx_10y,
        benchmark_symbol="^GSPC",
        competitor_close_df=competitor_close_df,
    )
else:
    print("Skipping graphs – no main history.")

# =====================================================
# STEP 3 – SEC filings fetch & download (2 x 10-K, 4 x 10-Q)
# =====================================================
def get_cik_for_ticker(ticker: str):
    url = "https://www.sec.gov/files/company_tickers.json"
    try:
        resp = requests.get(url, headers=SEC_HEADERS, timeout=30)
        resp.raise_for_status()
    except Exception as e:
        print("Error fetching SEC ticker list:", e)
        return None

    data = resp.json()
    t_upper = ticker.upper()

    for entry in data.values():
        if entry.get("ticker", "").upper() == t_upper:
            cik_int = int(entry["cik_str"])
            return f"{cik_int:010d}"

    print("CIK not found for ticker:", ticker)
    return None

def get_company_submissions(cik: str):
    url = f"https://data.sec.gov/submissions/CIK{cik}.json"
    try:
        resp = requests.get(url, headers=SEC_HEADERS, timeout=30)
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        print("Error fetching company submissions:", e)
        return None

def pick_filings(submissions: dict, form_type: str, limit: int):
    if submissions is None:
        return []

    recent = submissions.get("filings", {}).get("recent", {})
    forms = recent.get("form", [])
    accessions = recent.get("accessionNumber", [])
    dates = recent.get("filingDate", [])
    docs = recent.get("primaryDocument", [])

    rows = []
    for f, acc, d, doc in zip(forms, accessions, dates, docs):
        if f == form_type:
            rows.append({
                "form": f,
                "accession": acc,
                "date": d,
                "doc": doc,
            })

    return rows[:limit]

def build_filing_url(cik: str, accession: str, primary_doc: str):
    cik_nolead = cik.lstrip("0")
    acc_nodash = accession.replace("-", "")
    return f"https://www.sec.gov/Archives/edgar/data/{cik_nolead}/{acc_nodash}/{primary_doc}"

def download_filings(cik: str, filings: list, dest_dir: str):
    os.makedirs(dest_dir, exist_ok=True)
    downloaded = []

    for f in filings:
        url = build_filing_url(cik, f["accession"], f["doc"])
        filename = f"{f['form']}_{f['accession'].replace('-', '')}_{f['doc'].replace('/', '_')}"
        path = os.path.join(dest_dir, filename)

        print("Downloading:", url)
        try:
            r = requests.get(url, headers=SEC_HEADERS, timeout=60)
            r.raise_for_status()
            with open(path, "wb") as fp:
                fp.write(r.content)

            downloaded.append({
                "form": f["form"],
                "date": f["date"],
                "accession": f["accession"],
                "url": url,
                "path": path,
            })
        except Exception as e:
            print("Error downloading filing:", url, "-", e)

    return downloaded

# ---- Run SEC step for current ticker ----
if ticker_symbol is None:
    print("No ticker symbol – cannot fetch SEC filings.")
else:
    cik = get_cik_for_ticker(ticker_symbol)
    print("Ticker:", ticker_symbol, "| CIK:", cik)

    if cik is not None:
        submissions = get_company_submissions(cik)

        latest_10k = pick_filings(submissions, "10-K", limit=2)
        latest_10q = pick_filings(submissions, "10-Q", limit=4)

        print("\nLatest 10-K metadata:")
        display(pd.DataFrame(latest_10k))

        print("\nLatest 10-Q metadata:")
        display(pd.DataFrame(latest_10q))

        downloaded_10k = download_filings(cik, latest_10k, dest_dir="sec_10k")
        downloaded_10q = download_filings(cik, latest_10q, dest_dir="sec_10q")

        print("\nSaved 10-K files:")
        for d in downloaded_10k:
            print(d["date"], d["form"], "→", d["path"])

        print("\nSaved 10-Q files:")
        for d in downloaded_10q:
            print(d["date"], d["form"], "→", d["path"])
    else:
        latest_10k = []
        latest_10q = []
        downloaded_10k = []
        downloaded_10q = []
        print("CIK not found – skipping SEC download.")

# =====================================================
# STEP 4 – Parse 10-K / 10-Q HTML & extract sections
# =====================================================
def load_filing_html(path: str) -> str:
    with open(path, "rb") as f:
        raw = f.read()

    for enc in ("utf-8", "latin-1", "cp1252"):
        try:
            return raw.decode(enc, errors="ignore")
        except Exception:
            continue
    return raw.decode("utf-8", errors="ignore")

def html_to_clean_text(html: str) -> str:
    soup = BeautifulSoup(html, "lxml")
    for tag in soup(["script", "style"]):
        tag.decompose()

    text = soup.get_text("\n")
    text = re.sub(r"\n\s*\n+", "\n\n", text)
    return text.strip()

def extract_section_regex(text: str, start_pattern: str, end_patterns: list) -> str:
    flags = re.IGNORECASE | re.DOTALL

    m_start = re.search(start_pattern, text, flags)
    if not m_start:
        return None

    start_idx = m_start.end()
    end_idx = len(text)

    for ep in end_patterns:
        m_end = re.search(ep, text[start_idx:], flags)
        if m_end:
            candidate = start_idx + m_end.start()
            if candidate < end_idx:
                end_idx = candidate

    section = text[start_idx:end_idx].strip()
    return section if section else None

def extract_10k_sections(text: str) -> dict:
    sections = {}

    sections["Business"] = extract_section_regex(
        text,
        start_pattern=r"ITEM\s+1[\.\-–—]?\s*BUSINESS",
        end_patterns=[
            r"ITEM\s+1A[\.\-–—]?\s*RISK\s+FACTORS",
            r"ITEM\s+1B[\.\-–—]?",
            r"ITEM\s+2[\.\-–—]?",
        ],
    )

    sections["Risk Factors"] = extract_section_regex(
        text,
        start_pattern=r"ITEM\s+1A[\.\-–—]?\s*RISK\s+FACTORS",
        end_patterns=[
            r"ITEM\s+1B[\.\-–—]?",
            r"ITEM\s+2[\.\-–—]?",
        ],
    )

    sections["MD&A"] = extract_section_regex(
        text,
        start_pattern=r"ITEM\s+7[\.\-–—]?\s*MANAGEMENT[’']?S\s+DISCUSSION",
        end_patterns=[
            r"ITEM\s+7A[\.\-–—]?",
            r"ITEM\s+8[\.\-–—]?",
        ],
    )

    return sections

def extract_10q_sections(text: str) -> dict:
    sections = {}

    sections["Risk Factors"] = extract_section_regex(
        text,
        start_pattern=r"ITEM\s+1A[\.\-–—]?\s*RISK\s+FACTORS",
        end_patterns=[
            r"ITEM\s+2[\.\-–—]?\s*MANAGEMENT[’']?S\s+DISCUSSION",
            r"ITEM\s+2[\.\-–—]?",
        ],
    )

    sections["MD&A"] = extract_section_regex(
        text,
        start_pattern=r"ITEM\s+2[\.\-–—]?\s*MANAGEMENT[’']?S\s+DISCUSSION",
        end_patterns=[
            r"ITEM\s+3[\.\-–—]?",
            r"ITEM\s+4[\.\-–—]?",
        ],
    )

    return sections

# =====================================================
# STEP 5 – Basic financial metrics from yfinance
# =====================================================
def get_basic_metrics(ticker: str) -> dict:
    yf_ticker = yf.Ticker(ticker)

    info = yf_ticker.info or {}
    financials = yf_ticker.financials
    balance = yf_ticker.balance_sheet
    cashflow = yf_ticker.cashflow

    metrics = {}
    metrics["ticker"] = ticker
    metrics["longName"] = info.get("longName")
    metrics["sector"] = info.get("sector")
    metrics["industry"] = info.get("industry")
    metrics["marketCap"] = info.get("marketCap")

    # Income statement
    try:
        revenue = float(financials.loc["Total Revenue"].iloc[0])
        op_income = float(financials.loc["Operating Income"].iloc[0])
        net_income = float(financials.loc["Net Income"].iloc[0])
    except Exception:
        revenue = op_income = net_income = None

    metrics["revenue"] = revenue
    metrics["operatingIncome"] = op_income
    metrics["netIncome"] = net_income

    if revenue and revenue != 0:
        metrics["operatingMargin"] = op_income / revenue if op_income is not None else None
        metrics["netMargin"] = net_income / revenue if net_income is not None else None
    else:
        metrics["operatingMargin"] = None
        metrics["netMargin"] = None

    # Balance sheet
    try:
        total_assets = float(balance.loc["Total Assets"].iloc[0])
    except Exception:
        total_assets = None
    try:
        total_equity = float(balance.loc["Total Stockholder Equity"].iloc[0])
    except Exception:
        total_equity = None
    try:
        total_debt = float(balance.loc["Total Debt"].iloc[0])
    except Exception:
        total_debt = None
    try:
        cash_row = balance.loc["Cash And Cash Equivalents"] if "Cash And Cash Equivalents" in balance.index else balance.loc["Cash"]
        cash = float(cash_row.iloc[0])
    except Exception:
        cash = None

    metrics["totalAssets"] = total_assets
    metrics["totalEquity"] = total_equity
    metrics["totalDebt"] = total_debt
    metrics["cash"] = cash

    if total_equity and total_equity != 0 and net_income is not None:
        metrics["ROE"] = net_income / total_equity
    else:
        metrics["ROE"] = None

    if total_equity and total_equity != 0 and total_debt is not None:
        metrics["debtToEquity"] = total_debt / total_equity
    else:
        metrics["debtToEquity"] = None

    # Cash flow
    try:
        op_cf = float(cashflow.loc["Total Cash From Operating Activities"].iloc[0])
    except Exception:
        op_cf = None
    try:
        capex = float(cashflow.loc["Capital Expenditures"].iloc[0])
    except Exception:
        capex = None

    metrics["operatingCashFlow"] = op_cf
    metrics["capex"] = capex
    if op_cf is not None and capex is not None:
        metrics["freeCashFlow"] = op_cf + capex  # capex is negative
    else:
        metrics["freeCashFlow"] = None

    return metrics

# =====================================================
# STEP 6 – Memo builder
# =====================================================
def shorten(text, max_chars: int = 1200) -> str:
    if not text:
        return "[Not found / empty]"
    text = text.strip()
    if len(text) <= max_chars:
        return text
    return text[:max_chars] + "\n\n...[truncated for memo draft]..."

def build_investment_memo(
    company_name: str,
    ticker: str,
    metrics: dict,
    sec_10k_meta: dict,
    sections_10k: dict,
    sec_10q_meta: dict,
    sections_10q: dict
) -> str:
    name = metrics.get("longName") or company_name
    sector = metrics.get("sector")
    industry = metrics.get("industry")
    mcap = metrics.get("marketCap")

    def fmt_pct(x):
        return f"{x*100:,.1f}%" if x is not None else "N/A"

    def fmt_money(x):
        return f"${x:,.0f}" if x is not None else "N/A"

    k_date = sec_10k_meta["date"] if sec_10k_meta else "N/A"
    k_url = sec_10k_meta["url"] if sec_10k_meta else "N/A"
    q_date = sec_10q_meta["date"] if sec_10q_meta else "N/A"
    q_url = sec_10q_meta["url"] if sec_10q_meta else "N/A"

    memo = []

    memo.append(f"# Investment Memo – {name} ({ticker})\n")

    memo.append("## 1. Snapshot")
    memo.append(f"- **Company:** {name}")
    memo.append(f"- **Ticker:** {ticker}")
    memo.append(f"- **Sector / Industry:** {sector} / {industry}")
    memo.append(f"- **Market Cap (approx):** {fmt_money(mcap)}")
    memo.append(f"- **Latest 10-K:** {k_date} – [SEC link]({k_url})")
    memo.append(f"- **Latest 10-Q:** {q_date} – [SEC link]({q_url})\n")

    memo.append("## 2. Business & Moat (from 10-K Item 1 + your interpretation)")
    memo.append("**Auto-extracted BUSINESS section (truncated):**\n")
    memo.append("```text\n" + shorten(sections_10k.get("Business") if sections_10k else None, 1500) + "\n```")
    memo.append(
        "> **Your notes:** Describe the business model, main products, customer segments, and moat "
        "(switching costs, network effects, brand, regulation)."
    )

    memo.append("\n## 3. Financial Snapshot & Unit Economics")
    memo.append("- **Revenue (last FY):** " + fmt_money(metrics.get("revenue")))
    memo.append("- **Operating Income (last FY):** " + fmt_money(metrics.get("operatingIncome")))
    memo.append("- **Net Income (last FY):** " + fmt_money(metrics.get("netIncome")))
    memo.append("- **Operating Margin:** " + fmt_pct(metrics.get("operatingMargin")))
    memo.append("- **Net Margin:** " + fmt_pct(metrics.get("netMargin")))
    memo.append("- **ROE:** " + fmt_pct(metrics.get("ROE")))
    memo.append("- **Debt / Equity:** " + (f"{metrics.get('debtToEquity'):,.2f}" if metrics.get("debtToEquity") is not None else "N/A"))
    memo.append("- **Free Cash Flow (last FY):** " + fmt_money(metrics.get("freeCashFlow")))
    memo.append(
        "\n> **Your notes:** Comment on margin quality, capital intensity, and ability to "
        "generate cash relative to revenue and invested capital."
    )

    memo.append("\n## 4. Risks (from 10-K Item 1A and 10-Q Item 1A)")
    memo.append("**Auto-extracted 10-K RISK FACTORS (truncated):**\n")
    memo.append("```text\n" + shorten(sections_10k.get("Risk Factors") if sections_10k else None, 1000) + "\n```")

    memo.append("**Auto-extracted 10-Q RISK FACTORS (truncated, if any):**\n")
    memo.append("```text\n" + shorten(sections_10q.get("Risk Factors") if sections_10q else None, 800) + "\n```")
    memo.append(
        "> **Your notes:** Identify the *few* critical risks that matter: "
        "customer concentration, regulation, tech disruption, macro, leverage, key suppliers."
    )

    memo.append("\n## 5. Management, Strategy & MD&A Highlights")
    memo.append("**Auto-extracted 10-K MD&A (truncated):**\n")
    memo.append("```text\n" + shorten(sections_10k.get("MD&A") if sections_10k else None, 1500) + "\n```")

    memo.append("**Auto-extracted 10-Q MD&A (truncated, latest quarter):**\n")
    memo.append("```text\n" + shorten(sections_10q.get("MD&A") if sections_10q else None, 1000) + "\n```")
    memo.append(
        "> **Your notes:** How does management talk about capital allocation, competition, "
        "macro environment, and long-term strategy? Honest and specific, or vague and promotional?"
    )

    memo.append("\n## 6. Thesis, Valuation & Decision (for you to fill)")
    memo.append("- **Qualitative thesis:**")
    memo.append("- **Key drivers (3–5 bullets):**")
    memo.append("- **Base-case valuation view (range):**")
    memo.append("- **Upside / downside scenarios:**")
    memo.append("- **Position sizing / risk controls:**")

    memo.append("\n---\n*This memo is auto-generated from the latest 10-K / 10-Q and yfinance data. "
                "Edit and refine before using for real decisions.*\n")

    return "\n".join(memo)

# =====================================================
# STEP 7 – Driver: create memo for current company
# =====================================================
def create_memo_for_current_company():
    if ticker_symbol is None:
        print("No ticker symbol – cannot create memo.")
        return

    if "downloaded_10k" not in globals() or "downloaded_10q" not in globals():
        print("No downloaded filings – run SEC download step first.")
        return

    latest_10k_meta = downloaded_10k[0] if downloaded_10k else None
    latest_10q_meta = downloaded_10q[0] if downloaded_10q else None

    if latest_10k_meta:
        print("Using 10-K:", latest_10k_meta["date"], latest_10k_meta["path"])
        k_html = load_filing_html(latest_10k_meta["path"])
        k_text = html_to_clean_text(k_html)
        sections_10k = extract_10k_sections(k_text)
    else:
        sections_10k = None

    if latest_10q_meta:
        print("Using 10-Q:", latest_10q_meta["date"], latest_10q_meta["path"])
        q_html = load_filing_html(latest_10q_meta["path"])
        q_text = html_to_clean_text(q_html)
        sections_10q = extract_10q_sections(q_text)
    else:
        sections_10q = None

    if latest_10k_meta and not latest_10k_meta.get("url"):
        latest_10k_meta["url"] = build_filing_url(
            cik, latest_10k_meta["accession"], os.path.basename(latest_10k_meta["path"])
        )
    if latest_10q_meta and not latest_10q_meta.get("url"):
        latest_10q_meta["url"] = build_filing_url(
            cik, latest_10q_meta["accession"], os.path.basename(latest_10q_meta["path"])
        )

    print("Fetching basic financial metrics from yfinance...")
    metrics = get_basic_metrics(ticker_symbol)

    print("Building memo...")
    memo_md = build_investment_memo(
        company_name=company_name,
        ticker=ticker_symbol,
        metrics=metrics,
        sec_10k_meta=latest_10k_meta,
        sections_10k=sections_10k,
        sec_10q_meta=latest_10q_meta,
        sections_10q=sections_10q,
    )

    print("\n" + "="*80)
    print("INVESTMENT MEMO DRAFT (Markdown)")
    print("="*80 + "\n")
    print(memo_md)

    filename = f"investment_memo_{ticker_symbol}.md"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(memo_md)
    print(f"\nSaved memo to: {filename}")

# ---- run memo builder ----
create_memo_for_current_company()


Found symbol: NOW
Resolved competitor: Atlassian → TEAM
Resolved competitor: Salesforce → CRM
Resolved competitor: Workday → WDAY
Resolved competitor: Microsoft → MSFT
Resolved competitor: Adobe → ADOBE


ERROR:yfinance:HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: ADOBE"}}}
ERROR:yfinance:$ADOBE: possibly delisted; no price data found  (period=10y) (Yahoo error = "No data found, symbol may be delisted")


No history returned for ADOBE
Resolved competitor: Freshworks → FRSH

Competitor close-price DataFrame shape: (2515, 5)

############################
# SINGLE-STOCK GRAPHS (USD + ILS)
############################

=== 1 Day – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 1 Week – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 1 Month – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 3 Months – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 6 Months – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 1 Year – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 2 Years – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 3 Years – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 5 Years – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



=== 10 Years – Single Stock (USD + ILS) ===
Hover: shows OHLCV in USD and Close in ILS.



############################
# COMPARISON GRAPHS (STOCK vs S&P vs COMP INDEX)
############################

=== 1 Day – Comparison (Stock vs S&P vs Competitor Index) ===



=== 1 Week – Comparison (Stock vs S&P vs Competitor Index) ===



=== 1 Month – Comparison (Stock vs S&P vs Competitor Index) ===



=== 3 Months – Comparison (Stock vs S&P vs Competitor Index) ===



=== 6 Months – Comparison (Stock vs S&P vs Competitor Index) ===



=== 1 Year – Comparison (Stock vs S&P vs Competitor Index) ===



=== 2 Years – Comparison (Stock vs S&P vs Competitor Index) ===



=== 3 Years – Comparison (Stock vs S&P vs Competitor Index) ===



=== 5 Years – Comparison (Stock vs S&P vs Competitor Index) ===



=== 10 Years – Comparison (Stock vs S&P vs Competitor Index) ===


Ticker: NOW | CIK: 0001373715

Latest 10-K metadata:


Unnamed: 0,form,accession,date,doc
0,10-K,0001373715-25-000010,2025-01-30,now-20241231.htm
1,10-K,0001373715-24-000030,2024-01-25,now-20231231.htm



Latest 10-Q metadata:


Unnamed: 0,form,accession,date,doc
0,10-Q,0001373715-25-000309,2025-10-30,now-20250930.htm
1,10-Q,0001373715-25-000276,2025-07-24,now-20250630.htm
2,10-Q,0001373715-25-000126,2025-04-23,now-20250331.htm
3,10-Q,0001373715-24-000344,2024-10-24,now-20240930.htm


Downloading: https://www.sec.gov/Archives/edgar/data/1373715/000137371525000010/now-20241231.htm
Downloading: https://www.sec.gov/Archives/edgar/data/1373715/000137371524000030/now-20231231.htm
Downloading: https://www.sec.gov/Archives/edgar/data/1373715/000137371525000309/now-20250930.htm
Downloading: https://www.sec.gov/Archives/edgar/data/1373715/000137371525000276/now-20250630.htm
Downloading: https://www.sec.gov/Archives/edgar/data/1373715/000137371525000126/now-20250331.htm
Downloading: https://www.sec.gov/Archives/edgar/data/1373715/000137371524000344/now-20240930.htm

Saved 10-K files:
2025-01-30 10-K → sec_10k/10-K_000137371525000010_now-20241231.htm
2024-01-25 10-K → sec_10k/10-K_000137371524000030_now-20231231.htm

Saved 10-Q files:
2025-10-30 10-Q → sec_10q/10-Q_000137371525000309_now-20250930.htm
2025-07-24 10-Q → sec_10q/10-Q_000137371525000276_now-20250630.htm
2025-04-23 10-Q → sec_10q/10-Q_000137371525000126_now-20250331.htm
2024-10-24 10-Q → sec_10q/10-Q_00013737152400


It looks like you're using an HTML parser to parse an XML document.

Assuming this really is an XML document, what you're doing might work, but you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the Python package 'lxml' installed, and pass the keyword argument `features="xml"` into the BeautifulSoup constructor.







Using 10-Q: 2025-10-30 sec_10q/10-Q_000137371525000309_now-20250930.htm
Fetching basic financial metrics from yfinance...
Building memo...

INVESTMENT MEMO DRAFT (Markdown)

# Investment Memo – ServiceNow, Inc. (NOW)

## 1. Snapshot
- **Company:** ServiceNow, Inc.
- **Ticker:** NOW
- **Sector / Industry:** Technology / Software - Application
- **Market Cap (approx):** $169,316,270,080
- **Latest 10-K:** 2025-01-30 – [SEC link](https://www.sec.gov/Archives/edgar/data/1373715/000137371525000010/now-20241231.htm)
- **Latest 10-Q:** 2025-10-30 – [SEC link](https://www.sec.gov/Archives/edgar/data/1373715/000137371525000309/now-20250930.htm)

## 2. Business & Moat (from 10-K Item 1 + your interpretation)
**Auto-extracted BUSINESS section (truncated):**

```text
1
```
> **Your notes:** Describe the business model, main products, customer segments, and moat (switching costs, network effects, brand, regulation).

## 3. Financial Snapshot & Unit Economics
- **Revenue (last FY):** $10,984,000,000