In [2]:
from pathlib import Path
import json

# 1) import the debug extractor
from hydraedge.extractor.cli import extract_sentence_debug

# 2) compute paths (assumes this notebook lives in notebooks/Tuple extraction/)
base      = Path.cwd().parent.parent
gaz_path  = base / "data" / "sample" / "syn_dict.tsv"
role_path = base / "data" / "sample" / "roles.tsv"

# sanity checks
assert gaz_path.exists(),  f"Gazetteer not found: {gaz_path}"
assert role_path.exists(), f"Roles file not found: {role_path}"

# 3) define two longer test sentences
sentences = [
    "After a decade of research and development, the startup finally unveiled its flagship quantum-secure communications platform to an audience of industry leaders and venture capitalists.",
    "While the committee debated the proposed amendments late into the night, a coalition of independent experts published a comprehensive white paper highlighting both the strengths and the potential risks of the new regulatory framework."
]

# 4) run pipeline in debug mode for each and print all stages
for sent in sentences:
    print(f"\n\n========== PROCESSING SENTENCE ==========\n{sent}\n")
    debug = extract_sentence_debug(
        sent,
        gaz_path=str(gaz_path),
        role_path=str(role_path),
    )

    for stage in ["cap", "tokens", "deps", "frames", "raw", "tuples", "hulls"]:
        print(f"\n=== {stage.upper()} ===")
        print(json.dumps(debug.get(stage, []), indent=2))

    print("\n=== FINAL PAYLOAD ===")
    print(json.dumps(debug["payload"], indent=2))




After a decade of research and development, the startup finally unveiled its flagship quantum-secure communications platform to an audience of industry leaders and venture capitalists.


=== CAP ===
{
  "text": "After a decade of research and development, the startup finally unveiled its flagship quantum-secure communications platform to an audience of industry leaders and venture capitalists.",
  "stub": null
}

=== TOKENS ===
[
  [
    "After",
    "ADP",
    0
  ],
  [
    "a",
    "DET",
    6
  ],
  [
    "decade",
    "NOUN",
    8
  ],
  [
    "of",
    "ADP",
    15
  ],
  [
    "research",
    "NOUN",
    18
  ],
  [
    "and",
    "CCONJ",
    27
  ],
  [
    "development",
    "NOUN",
    31
  ],
  [
    ",",
    "PUNCT",
    42
  ],
  [
    "the",
    "DET",
    44
  ],
  [
    "startup",
    "NOUN",
    48
  ],
  [
    "finally",
    "ADV",
    56
  ],
  [
    "unveiled",
    "VERB",
    64
  ],
  [
    "its",
    "PRON",
    73
  ],
  [
    "flagship",
    "NOUN",
    7