In [1]:
import textwrap

import langextract as lx
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
# 1. Define a concise prompt
prompt = textwrap.dedent(
    """\
Extract characters, emotions, and relationships in order of appearance.
Use exact text for extractions. Do not paraphrase or overlap entities.
Provide meaningful attributes for each entity to add context."""
)

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text=(
            "ROMEO. But soft! What light through yonder window breaks? It is"
            " the east, and Juliet is the sun."
        ),
        extractions=[
            lx.data.Extraction(
                extraction_class="character",
                extraction_text="ROMEO",
                attributes={"emotional_state": "wonder"},
            ),
            lx.data.Extraction(
                extraction_class="emotion",
                extraction_text="But soft!",
                attributes={"feeling": "gentle awe"},
            ),
            lx.data.Extraction(
                extraction_class="relationship",
                extraction_text="Juliet is the sun",
                attributes={"type": "metaphor"},
            ),
        ],
    )
]

In [4]:
# 3. Run the extraction on your input text
input_text = "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"
result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemini-2.5-pro",
)

Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.
[94m[1mLangExtract[0m: model=[92mgemini-2.5-pro[0m, current=[92m68[0m chars, processed=[92m68[0m chars:  [00:20]


[92m✓[0m Extraction processing complete
[92m✓[0m Extracted [1m3[0m entities ([1m3[0m unique types)
  [96m•[0m Time: [1m20.56s[0m
  [96m•[0m Speed: [1m3[0m chars/sec
  [96m•[0m Chunks: [1m1[0m


In [8]:
for extraction in result.extractions:
    print(extraction.extraction_class)
    print(extraction.extraction_text)
    print(extraction.attributes)
    print("-" * 100)

character
Lady Juliet
{'emotional_state': 'longing'}
----------------------------------------------------------------------------------------------------
emotion
longingly
{'feeling': 'yearning'}
----------------------------------------------------------------------------------------------------
relationship
her heart aching for Romeo
{'type': 'romantic longing'}
----------------------------------------------------------------------------------------------------


In [20]:
# Save the results to a JSONL file
lx.io.save_annotated_documents([result], output_name="extraction_results.jsonl")

[94m[1mLangExtract[0m: Saving to [92mextraction_results.jsonl[0m: 1 docs [00:00, 948.72 docs/s]


[92m✓[0m Saved [1m1[0m documents to [92mextraction_results.jsonl[0m


In [22]:
# Generate the interactive visualization from the file
html_display_object = lx.visualize("test_output/extraction_results.jsonl")
# Extract the actual HTML content from the IPython display object
html_content = html_display_object.data
with open("test_output/outputvisualization.html", "w", encoding="utf-8") as f:
    f.write(html_content)

[94m[1mLangExtract[0m: Loading [92mextraction_results.jsonl[0m: 100%|██████████| 922/922 [00:00<00:00, 2.23MB/s]


[92m✓[0m Loaded [1m1[0m documents from [92mextraction_results.jsonl[0m
