In [1]:
# Inspect FMP raw folders and compare file coverage across statements
# Works with standard library only.

from pathlib import Path
import json
from collections import defaultdict

# ----- CONFIG: update if your project path differs -----
PROJECT = Path("/Users/martingobbo/stock-dashboard")
RAW = PROJECT / "app" / "data" / "raw" / "fmp"
PUBLIC_EXPORT = PROJECT / "public" / "data" / "fundamentals_highlights.json"

DIRS = {
    "income_statement": RAW / "income_statement",
    "balance_sheet":    RAW / "balance_sheet",
    "cash_flow":        RAW / "cash_flow",
    "ratios":           RAW / "ratios",
}

def list_jsonl_stems(folder: Path):
    """Return sorted list of file stems for *.jsonl in a folder (handles symbols like BRK.B)."""
    if not folder.exists():
        return []
    return sorted([p.stem for p in folder.glob("*.jsonl")])

def show_folder_contents():
    print("=== Folder contents (exact stems) ===")
    coverage = {}
    for name, path in DIRS.items():
        stems = list_jsonl_stems(path)
        coverage[name] = set(stems)
        print(f"\n[{name}]  {path}")
        print(f"Total files: {len(stems)}")
        # Print all stems (comment out if too long)
        if stems:
            print(", ".join(stems))
        else:
            print("(none)")
    return coverage

def summarize_coverage(coverage):
    print("\n=== Summary and diffs ===")
    all_syms = set().union(*coverage.values()) if coverage else set()
    print(f"Union of symbols across all folders: {len(all_syms)}")

    # Pairwise diffs
    for a_name, a_set in coverage.items():
        missing_anywhere = all_syms - a_set
        if missing_anywhere:
            print(f"\nSymbols missing from [{a_name}] (present elsewhere): {len(missing_anywhere)}")
            # show a few then all
            preview = sorted(list(missing_anywhere))[:50]
            print(", ".join(preview) + (" ..." if len(missing_anywhere) > 50 else ""))

    # Per-symbol presence map
    print("\n=== Per-symbol presence map (where each symbol appears) ===")
    presence = defaultdict(list)
    for name, s in coverage.items():
        for sym in s:
            presence[sym].append(name)
    # Print all (comment if too long)
    for sym in sorted(presence):
        print(f"{sym}: {', '.join(sorted(presence[sym]))}")

    return all_syms

def compare_to_highlights(all_syms):
    print("\n=== Compare against fundamentals_highlights.json (if present) ===")
    if not PUBLIC_EXPORT.exists():
        print(f"(skip) {PUBLIC_EXPORT} not found.")
        return
    try:
        data = json.loads(PUBLIC_EXPORT.read_text())
    except Exception as e:
        print(f"(error) Could not read JSON: {e}")
        return

    # highlights format expected: list of { "symbol": "<TICKER>", "FY": [...], "Q": [...] }
    highlight_syms = {rec.get("symbol") for rec in data if isinstance(rec, dict) and rec.get("symbol")}
    print(f"Symbols in highlights: {len(highlight_syms)}")

    only_on_disk = sorted(all_syms - highlight_syms)
    only_in_highlights = sorted(highlight_syms - all_syms)

    print(f"\nOn disk but missing in highlights: {len(only_on_disk)}")
    if only_on_disk:
        print(", ".join(only_on_disk[:100]) + (" ..." if len(only_on_disk) > 100 else ""))

    print(f"\nIn highlights but not found on disk: {len(only_in_highlights)}")
    if only_in_highlights:
        print(", ".join(only_in_highlights[:100]) + (" ..." if len(only_in_highlights) > 100 else ""))

# ----- Run the checks -----
cov = show_folder_contents()
universe = summarize_coverage(cov)
compare_to_highlights(universe)


=== Folder contents (exact stems) ===

[income_statement]  /Users/martingobbo/stock-dashboard/app/data/raw/fmp/income_statement
Total files: 35
A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, ADI, ADM, ADP, ADSK, AEE, AEP, AES, AFL, AIG, AIZ, AJG, AKAM, ALB, ALGN, ALL, ALLE, AMAT, AMCR, AMD, AME, AMGN, AMP, AMT, AMZN, ANET, AON, AOS

[balance_sheet]  /Users/martingobbo/stock-dashboard/app/data/raw/fmp/balance_sheet
Total files: 34
A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, ADI, ADM, ADP, ADSK, AEE, AEP, AES, AFL, AIG, AIZ, AJG, AKAM, ALB, ALGN, ALL, ALLE, AMAT, AMCR, AMD, AME, AMGN, AMP, AMT, AMZN, ANET, AON

[cash_flow]  /Users/martingobbo/stock-dashboard/app/data/raw/fmp/cash_flow
Total files: 34
A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, ADI, ADM, ADP, ADSK, AEE, AEP, AES, AFL, AIG, AIZ, AJG, AKAM, ALB, ALGN, ALL, ALLE, AMAT, AMCR, AMD, AME, AMGN, AMP, AMT, AMZN, ANET, AON

[ratios]  /Users/martingobbo/stock-dashboard/app/data/raw/fmp/ratios
Total files: 34
A, AAPL, ABBV, ABNB, ABT, ACGL