# Operator 1 -- Company Analysis Pipeline

**Guarantees:**
- No look-ahead bias: all statement data aligned via as-of logic
- Ratio safety: all divisions guarded against zero/null/tiny denominators
- Missing data tracked: every feature has an `is_missing_*` companion flag
- Observed values never overwritten by estimation
- Secrets never printed or logged
- Idempotent cells: cached data reused unless `FORCE_REBUILD=True`

**Pipeline Phases:**
1. Foundation (secrets, config, HTTP, clients)
2. Ingestion (verification, macro, entity discovery, extraction)
3. Cache & Features (daily cache, derived variables, linked aggregates)
4. Analysis (survival mode, vanity, hierarchy weights, data quality)
5. Estimation (deterministic identity fill + regime-weighted imputation)
6. Temporal Modeling (regimes, forecasting, Monte Carlo, predictions)
7. Report (profile JSON, Gemini narrative, charts)

In [None]:
# Cell 2: Inputs
# ============================================================
# User-provided identifiers -- the ONLY required manual inputs.
# Do NOT hardcode company name or country; those are extracted
# automatically from the Eulerpool profile.

target_isin = "US0378331005"   # Example: Apple Inc.
fmp_symbol  = "AAPL"           # FMP ticker for OHLCV

print(f"Target ISIN : {target_isin}")
print(f"FMP Symbol  : {fmp_symbol}")

In [None]:
# Cell 3: Load secrets
# ============================================================
from operator1.secrets_loader import get_secret

EULERPOOL_API_KEY = get_secret("EULERPOOL_API_KEY")
EOD_API_KEY       = get_secret("EOD_API_KEY")
FMP_API_KEY       = get_secret("FMP_API_KEY")
GEMINI_API_KEY    = get_secret("GEMINI_API_KEY")

# Verify keys loaded (never print values!)
for name, key in [("EULERPOOL", EULERPOOL_API_KEY),
                   ("EOD", EOD_API_KEY),
                   ("FMP", FMP_API_KEY),
                   ("GEMINI", GEMINI_API_KEY)]:
    status = "OK" if key else "MISSING"
    print(f"{name}_API_KEY: {status}")

In [None]:
# Cell 4: Global config
# ============================================================
from operator1.config_loader import get_global_config, load_config
from operator1.constants import DATE_START, DATE_END, CACHE_DIR
import os

cfg = get_global_config()
FORCE_REBUILD = cfg.get("FORCE_REBUILD", False)

print(f"Date range  : {DATE_START} to {DATE_END}")
print(f"Timeout     : {cfg.get('timeout_s', 30)}s")
print(f"Max retries : {cfg.get('max_retries', 3)}")
print(f"FORCE_REBUILD: {FORCE_REBUILD}")

os.makedirs(CACHE_DIR, exist_ok=True)

In [None]:
# Cell 5: HTTP utilities
# ============================================================
from operator1.http_utils import cached_get, get_request_log

print("HTTP utilities loaded.")
print(f"Cache directory: {CACHE_DIR}")

In [None]:
# Cell 6: Eulerpool client
# ============================================================
from operator1.clients.equity_provider import create_equity_provider
from operator1.secrets_loader import load_secrets

secrets = load_secrets()
equity_client = create_equity_provider(secrets)
print("Equity data client initialised.")


In [None]:
# Cell 7: FMP client
# ============================================================
from operator1.clients.fmp import FMPClient

fmp = FMPClient(api_key=FMP_API_KEY)
print("FMP client initialised.")

In [None]:
# Cell 8: Identifier verification + country extraction
# ============================================================
# Fail fast on invalid ISIN or FMP symbol.
from operator1.steps.verify_identifiers import verify_identifiers

verified = verify_identifiers(
    target_isin=target_isin,
    fmp_symbol=fmp_symbol,
    eulerpool_client=equity_client,
    fmp_client=fmp,
)

print(f"Verified: {verified.name} ({verified.ticker})")
print(f"Country : {verified.country}")
print(f"Sector  : {verified.sector}")
print(f"Exchange: {verified.exchange}")

In [None]:
# Cell 9: World Bank macro module
# ============================================================
from operator1.steps.macro_mapping import fetch_macro_data
from operator1.clients.world_bank import WorldBankClient

wb_client = WorldBankClient()
macro_data = fetch_macro_data(
    country_iso2=verified.country,
    wb_client=wb_client,
    force_rebuild=FORCE_REBUILD,
)

print(f"Macro variables fetched: {len(macro_data)} indicators")

In [None]:
# Cell 10: Linked entity discovery (Gemini)
# ============================================================
from operator1.clients.gemini import GeminiClient
from operator1.steps.entity_discovery import discover_linked_entities

gemini = GeminiClient(api_key=GEMINI_API_KEY)

try:
    linked_entities = discover_linked_entities(
        verified_target=verified,
        gemini_client=gemini,
        eulerpool_client=equity_client,
        force_rebuild=FORCE_REBUILD,
    )
    print(f"Linked entities discovered: {sum(len(v) for v in linked_entities.values())}")
    for group, entities in linked_entities.items():
        print(f"  {group}: {len(entities)} entities")
except Exception as exc:
    print(f"Entity discovery failed (non-fatal): {exc}")
    linked_entities = {}

In [None]:
# Cell 11: Linked entity resolution + checkpointing
# ============================================================
# Progress is checkpointed to cache/progress.json.
# On rerun, already-resolved entities are loaded from cache.

import json
from pathlib import Path

progress_path = Path(CACHE_DIR) / "progress.json"
if progress_path.exists() and not FORCE_REBUILD:
    with open(progress_path, "r") as f:
        progress = json.load(f)
    print(f"Loaded progress checkpoint: {len(progress.get('resolved', {}))} entities")
else:
    progress = {"resolved": {}, "failed": []}
    print("Starting fresh entity resolution.")

# Save progress
with open(progress_path, "w") as f:
    json.dump(progress, f, indent=2)
print("Progress checkpointed.")

In [None]:
# Cell 12: Target OHLCV build (FMP authoritative)
# ============================================================
from operator1.steps.data_extraction import extract_target_ohlcv

target_ohlcv = extract_target_ohlcv(
    fmp_client=fmp,
    fmp_symbol=fmp_symbol,
    date_start=DATE_START,
    date_end=DATE_END,
    force_rebuild=FORCE_REBUILD,
)

print(f"Target OHLCV: {len(target_ohlcv)} rows")
print(f"Date range  : {target_ohlcv.index.min()} to {target_ohlcv.index.max()}")

In [None]:
# Cell 13: Target fundamentals as-of table
# ============================================================
from operator1.steps.cache_builder import build_target_cache

target_cache = build_target_cache(
    verified_target=verified,
    ohlcv=target_ohlcv,
    eulerpool_client=equity_client,
    date_start=DATE_START,
    date_end=DATE_END,
    force_rebuild=FORCE_REBUILD,
)

print(f"Target cache: {target_cache.shape[0]} days x {target_cache.shape[1]} columns")

In [None]:
# Cell 14: Feature engineering + safety flags
# ============================================================
from operator1.features.derived_variables import compute_derived_variables
from operator1.features.macro_alignment import align_macro_to_daily
from operator1.quality.data_quality import run_quality_audit

# Compute derived variables (returns, solvency, liquidity, etc.)
target_cache = compute_derived_variables(target_cache)
print(f"After derived vars: {target_cache.shape[1]} columns")

# Align macro data to daily
try:
    target_cache = align_macro_to_daily(target_cache, macro_data)
    print(f"After macro alignment: {target_cache.shape[1]} columns")
except Exception as exc:
    print(f"Macro alignment failed (non-fatal): {exc}")

# Data quality audit (fails on look-ahead violation)
quality_report = run_quality_audit(target_cache)
print(f"Quality audit: {quality_report.get('status', 'unknown')}")

In [None]:
# Cell 15: Linked caches in batches
# ============================================================
from operator1.steps.cache_builder import build_linked_caches

try:
    linked_cache = build_linked_caches(
        linked_entities=linked_entities,
        eulerpool_client=equity_client,
        date_start=DATE_START,
        date_end=DATE_END,
        force_rebuild=FORCE_REBUILD,
    )
    print(f"Linked cache: {linked_cache.shape[0]} rows x {linked_cache.shape[1]} columns")
except Exception as exc:
    print(f"Linked cache build failed (non-fatal): {exc}")
    import pandas as pd
    linked_cache = pd.DataFrame()

In [None]:
# Cell 16: Linked aggregates join
# ============================================================
from operator1.features.linked_aggregates import compute_linked_aggregates

try:
    linked_aggregates = compute_linked_aggregates(
        target_cache=target_cache,
        linked_cache=linked_cache,
        linked_entities=linked_entities,
    )
    print(f"Linked aggregates: {linked_aggregates.shape[1]} columns")
except Exception as exc:
    print(f"Linked aggregates failed (non-fatal): {exc}")
    import pandas as pd
    linked_aggregates = pd.DataFrame()

In [None]:
# Cell 17: Survival mode + hierarchy weights + vanity
# ============================================================
from operator1.analysis.survival_mode import compute_survival_flags
from operator1.analysis.vanity import compute_vanity_percentage
from operator1.analysis.hierarchy_weights import compute_hierarchy_weights

# Survival flags
target_cache = compute_survival_flags(
    target_cache,
    country=verified.country,
    sector=verified.sector,
)
print("Survival flags computed.")

# Vanity percentage
target_cache = compute_vanity_percentage(target_cache)
print(f"Vanity % (latest): {target_cache['vanity_percentage'].iloc[-1]:.1f}%")

# Hierarchy weights
target_cache = compute_hierarchy_weights(target_cache)
print(f"Survival regime (latest): {target_cache['survival_regime'].iloc[-1]}")

In [None]:
# Cell 18: Save cache artifacts
# ============================================================
import json
from pathlib import Path
from operator1.http_utils import get_request_log

cache_dir = Path(CACHE_DIR)

# Save caches as Parquet
target_cache.to_parquet(cache_dir / "target_company_daily.parquet")
print("Saved: target_company_daily.parquet")

if not linked_cache.empty:
    linked_cache.to_parquet(cache_dir / "linked_entities_daily.parquet")
    print("Saved: linked_entities_daily.parquet")

if not linked_aggregates.empty:
    linked_aggregates.to_parquet(cache_dir / "linked_aggregates_daily.parquet")
    print("Saved: linked_aggregates_daily.parquet")

target_cache.to_parquet(cache_dir / "full_feature_table.parquet")
print("Saved: full_feature_table.parquet")

# Metadata
from dataclasses import asdict
metadata = {
    "target_isin": target_isin,
    "fmp_symbol": fmp_symbol,
    "verified_target": {k: v for k, v in asdict(verified).items() if k != "raw_profile"},
    "date_start": str(DATE_START),
    "date_end": str(DATE_END),
    "n_linked_entities": sum(len(v) for v in linked_entities.values()),
    "request_log_count": len(get_request_log()),
}
with open(cache_dir / "metadata.json", "w") as f:
    json.dump(metadata, f, indent=2, default=str)
print("Saved: metadata.json")

In [None]:
# Cell 19: Modeling / prediction cells
# ============================================================
# Each model wrapped in try/except -- failures are non-fatal.

from operator1.estimation.estimator import run_estimation
from operator1.models.regime_detector import detect_regimes_and_breaks
from operator1.models.forecasting import run_forecasting
from operator1.models.monte_carlo import run_monte_carlo
from operator1.models.prediction_aggregator import run_prediction_aggregation

# --- Estimation ---
try:
    target_cache = run_estimation(target_cache)
    print("Estimation: OK")
except Exception as exc:
    print(f"Estimation failed (non-fatal): {exc}")

# --- Regime detection ---
regime_result = None
try:
    regime_result = detect_regimes_and_breaks(target_cache)
    print(f"Regime detection: OK (current regime: {target_cache.get('regime_label', ['N/A']).iloc[-1] if 'regime_label' in target_cache.columns else 'N/A'})")
except Exception as exc:
    print(f"Regime detection failed (non-fatal): {exc}")

# --- Forecasting ---
forecast_result = None
try:
    forecast_result = run_forecasting(target_cache)
    print(f"Forecasting: OK ({len(forecast_result.forecasts)} variables)")
except Exception as exc:
    print(f"Forecasting failed (non-fatal): {exc}")

# --- Monte Carlo ---
mc_result = None
try:
    mc_result = run_monte_carlo(target_cache)
    print(f"Monte Carlo: OK (survival prob = {mc_result.survival_probability_mean:.2%})")
except Exception as exc:
    print(f"Monte Carlo failed (non-fatal): {exc}")

# --- Prediction aggregation ---
prediction_result = None
try:
    prediction_result = run_prediction_aggregation(
        cache=target_cache,
        forecast_result=forecast_result,
        mc_result=mc_result,
    )
    print(f"Prediction aggregation: OK ({prediction_result.n_models_available} models)")
except Exception as exc:
    print(f"Prediction aggregation failed (non-fatal): {exc}")

In [None]:
# Cell 20: Report generation + LIMITATIONS
# ============================================================
from dataclasses import asdict
from operator1.report.profile_builder import build_company_profile
from operator1.report.report_generator import generate_report

# Build company profile JSON
profile = build_company_profile(
    verified_target=asdict(verified),
    cache=target_cache,
    linked_aggregates=linked_aggregates if not linked_aggregates.empty else None,
    regime_result=asdict(regime_result) if regime_result else None,
    forecast_result=asdict(forecast_result) if forecast_result else None,
    mc_result=asdict(mc_result) if mc_result else None,
    prediction_result=asdict(prediction_result) if prediction_result else None,
)
print(f"Profile built: {len(profile)} sections")

# Generate report (with Gemini if available, fallback otherwise)
report_output = generate_report(
    profile=profile,
    gemini_client=gemini,
    cache=target_cache,
    generate_pdf=False,
    generate_chart_images=True,
)

print(f"Report saved to: {report_output['markdown_path']}")
print(f"Charts generated: {len(report_output['chart_paths'])}")

# Display the report
from IPython.display import Markdown, display
display(Markdown(report_output['markdown']))