# LangGraph translation demo

This notebook loads the enhanced Serbian↔English dataset, sends one synset through `LangGraphTranslationPipeline`, and compares the pipeline output against the curated translation stored in the JSON export.

In [31]:
from pathlib import Path
import json
from pprint import pprint

DATA_PATH = Path("../examples/serbian_english_synset_pairs_enhanced.json")
with DATA_PATH.open("r", encoding="utf-8") as f:
    dataset = json.load(f)

pairs = dataset["pairs"]
metadata = dataset.get("metadata", {})

print(f"Loaded {len(pairs)} pairs from {DATA_PATH}")
print("Metadata snapshot:")
pprint(metadata)

sample_pair = pairs[0]
print("\nFirst pair keys:", list(sample_pair.keys()))

Loaded 27 pairs from ..\examples\serbian_english_synset_pairs_enhanced.json
Metadata snapshot:
{'created_by': 'Serbian WordNet Synset Browser',
 'description': 'Enhanced export with Serbian and English relations for '
                'translation context',
 'export_timestamp': '2025-07-29T13:57:13.380980',
 'format_version': '2.0',
 'includes_metadata': True,
 'includes_relations': True,
 'total_pairs': 27}

First pair keys: ['serbian_id', 'serbian_synonyms', 'serbian_definition', 'serbian_usage', 'serbian_pos', 'serbian_domain', 'serbian_relations', 'english_id', 'english_definition', 'english_lemmas', 'english_examples', 'english_pos', 'english_name', 'english_relations', 'pairing_metadata']


In [32]:
pprint({
    "english_id": sample_pair.get("english_id"),
    "english_lemmas": sample_pair.get("english_lemmas"),
    "english_definition": sample_pair.get("english_definition"),
    "english_examples": sample_pair.get("english_examples"),
    "serbian_synonyms": sample_pair.get("serbian_synonyms"),
    "serbian_definition": sample_pair.get("serbian_definition"),
    "serbian_usage": sample_pair.get("serbian_usage"),
})

{'english_definition': 'an establishment consisting of a building or complex '
                       'of buildings where an organization for the promotion '
                       'of some cause is situated',
 'english_examples': [],
 'english_id': 'ENG30-03574555-n',
 'english_lemmas': ['institution'],
 'serbian_definition': 'zgrada u kojoj se nalazi organizaciona jedinica neke '
                       'grane javnog poslovanja',
 'serbian_synonyms': ['ustanova'],
 'serbian_usage': ''}


In [33]:
import importlib
import ollama
import wordnet_autotranslate.pipelines.langgraph_translation_pipeline as lg_module

lg_module = importlib.reload(lg_module)
LangGraphTranslationPipeline = lg_module.LangGraphTranslationPipeline

PREFERRED_OLLAMA_MODEL = "gpt-oss:120b"
OLLAMA_TIMEOUT = 180  # seconds
OLLAMA_TEMPERATURE = 0.0

try:
    model_list_response = ollama.list()
    available_models = {item.model for item in model_list_response.models}
except Exception as exc:  # pragma: no cover - depends on local runtime
    raise RuntimeError(
        "Could not reach the local Ollama daemon. Start it with `ollama serve`."
    ) from exc

if not available_models:
    raise RuntimeError(
        "No Ollama models are installed. Pull one with `ollama pull <model>` before running this cell."
    )

if PREFERRED_OLLAMA_MODEL in available_models:
    ollama_model = PREFERRED_OLLAMA_MODEL
else:
    ollama_model = sorted(available_models)[0]
    print(
        f"Preferred model '{PREFERRED_OLLAMA_MODEL}' not found. "
        f"Falling back to '{ollama_model}'."
    )

pipeline = LangGraphTranslationPipeline(
    source_lang="en",
    target_lang="sr",
    model=ollama_model,
    temperature=OLLAMA_TEMPERATURE,
    timeout=OLLAMA_TIMEOUT,
)

print(f"Using Ollama model: {ollama_model}")

Using Ollama model: gpt-oss:120b


In [34]:
preview_limit = 200


def preview_text(text: str | None, limit: int = preview_limit) -> str:
    if not text:
        return ""
    return text[:limit] + ("… [truncated]" if len(text) > limit else "")


synset_input = {
    "id": sample_pair.get("english_id"),
    "english_id": sample_pair.get("english_id"),
    "lemmas": sample_pair.get("english_lemmas", []),
    "definition": sample_pair.get("english_definition", ""),
    "examples": sample_pair.get("english_examples", []),
    "pos": sample_pair.get("english_pos"),
}

result = pipeline.translate_synset(synset_input)

translation = result.get("translation", "")
definition_translation = result.get("definition_translation", "")
translated_synonyms = result.get("translated_synonyms", [])
examples = result.get("examples", [])
notes = result.get("notes")
curator_summary = result.get("curator_summary", "")
raw_response = result.get("raw_response", "")

print(f"Primary translation headword: {translation}")
print(f"Translated synonyms: {translated_synonyms}")
print(f"Example count: {len(examples)}")
print(f"Definition translation length: {len(definition_translation)} characters")
print(f"Raw response length: {len(raw_response)} characters")

print("\nCurator summary:")
print(curator_summary)

print("\nDefinition translation:")
print(definition_translation)

if translated_synonyms:
    print("\nSynonym table:")
    synonym_entries = result.get("payload", {}).get("synonyms", {}).get("synonyms", [])
    for entry in synonym_entries:
        if isinstance(entry, dict):
            original = entry.get("original")
            translation_value = entry.get("translation")
            confidence = entry.get("confidence")
            example = preview_text(entry.get("example"))
            print(f"- {original} → {translation_value} (confidence: {confidence})")
            if example:
                print(f"  example: {example}")
        else:
            print(f"- {entry}")
else:
    print("\nNo synonym candidates returned.")

if examples:
    print("\nExamples preview:")
    for ex in examples[:3]:
        print(f"- {preview_text(ex)}")
    if len(examples) > 3:
        print(f"... ({len(examples)} total)")
else:
    print("\nNo examples returned.")

print("\nNotes preview:")
print(preview_text(notes))

print("\nStage logs (preview):")
logs = result.get("payload", {}).get("logs", {})
for stage, log in logs.items():
    if not log:
        continue
    print(f"[{stage}] prompt preview: {preview_text(log.get('prompt'))}")
    print(f"[{stage}] raw response preview: {preview_text(log.get('raw_response_preview'))}")

print("\nFull raw response preview:")
print(preview_text(raw_response))

Primary translation headword: ustanova
Translated synonyms: ['ustanova', 'institucija', 'objekat', 'zgrada', 'sedište', 'kompleks']
Example count: 8
Definition translation length: 139 characters
Raw response length: 2391 characters

Curator summary:
Headword (sr): ustanova
Definition translation: objekat koji se sastoji od jedne zgrade ili kompleksa zgrada, u kojem se nalazi organizacija posvećena promociji određenog cilja ili uzroka
Synonym candidates:
  • ustanova
  • institucija
  • objekat
  • zgrada
  • sedište
  (+1 more candidates)
Example sentences: 8 (showing first)
  “Ustanova za zaštitu životne sredine organizuje radionice za lokalnu zajednicu.”
Notes: U srpskom jeziku se za fizičko mesto organizacije najčešće koristi termin „objekat“ ili „zgrada“, kako bi se izbegla konfuzija sa apstraktnim značenjem reči „institucija“.

Definition translation:
objekat koji se sastoji od jedne zgrade ili kompleksa zgrada, u kojem se nalazi organizacija posvećena promociji određenog cilja il

In [35]:
simple_synonym_candidates: list[str] = []
synonym_entries_full = result.get("payload", {}).get("synonyms", {}).get("synonyms", [])
for entry in synonym_entries_full:
    if isinstance(entry, dict):
        candidate = entry.get("translation")
        if candidate:
            simple_synonym_candidates.append(candidate.strip())
    elif isinstance(entry, str):
        simple_synonym_candidates.append(entry.strip())

for candidate in translated_synonyms:
    if candidate:
        simple_synonym_candidates.append(candidate.strip())

sorted_synonyms = sorted({syn for syn in simple_synonym_candidates if syn})

simple_output = {
    "english_id": synset_input.get("id"),
    "preferred_headword": translation,
    "definition_translation": definition_translation,
    "synonyms": sorted_synonyms,
}

output_path = Path("simple_translation_output.json")
with output_path.open("w", encoding="utf-8") as fp:
    json.dump(simple_output, fp, ensure_ascii=False, indent=2)

print(f"Simple JSON saved to {output_path.resolve()}")
print(json.dumps(simple_output, ensure_ascii=False, indent=2))

Simple JSON saved to E:\Github\wordnet_autotranslate\notebooks\simple_translation_output.json
{
  "english_id": "ENG30-03574555-n",
  "preferred_headword": "ustanova",
  "definition_translation": "objekat koji se sastoji od jedne zgrade ili kompleksa zgrada, u kojem se nalazi organizacija posvećena promociji određenog cilja ili uzroka",
  "synonyms": [
    "institucija",
    "kompleks",
    "objekat",
    "sedište",
    "ustanova",
    "zgrada"
  ]
}


## Next steps

- Swap `DemoLLM` for a real `ChatOllama` instance once your local model is running (see the project README for setup instructions).
- Translate multiple synsets by calling `pipeline.translate(list_of_synsets)` or the streaming variant for large batches.