In [10]:
import pymupdf
import langextract as lx
import textwrap
from dotenv import load_dotenv
import os

In [11]:
#Setting up Google Gemini API Key 

load_dotenv()

api_key = os.getenv('LANGEXTRACT_API_KEY')

In [12]:
# 1. Define a concise prompt
prompt = textwrap.dedent("""\
Extract characters, emotions, and relationships in order of appearance.
Use exact text for extractions. Do not paraphrase or overlap entities.
Provide meaningful attributes for each entity to add context.""")

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text=(
            "ROMEO. But soft! What light through yonder window breaks? It is"
            " the east, and Juliet is the sun."
        ),
        extractions=[
            lx.data.Extraction(
                extraction_class="character",
                extraction_text="ROMEO",
                attributes={"emotional_state": "wonder"},
            ),
            lx.data.Extraction(
                extraction_class="emotion",
                extraction_text="But soft!",
                attributes={"feeling": "gentle awe"},
            ),
            lx.data.Extraction(
                extraction_class="relationship",
                extraction_text="Juliet is the sun",
                attributes={"type": "metaphor"},
            ),
        ],
    )
]

# 3. Run the extraction on your input text
input_text = (
    "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"
)



result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemini-2.5-flash",
    api_key= api_key,
)
   

In [14]:
# Save the results to a JSONL file if extraction succeeded
if result is not None:
    try:
        output_name = "extraction_results.jsonl"
        lx.io.save_annotated_documents([result], output_name=output_name)
        print(f"Saved extraction results to {output_name}")
    except Exception as e:
        print(f"Failed to save extraction results: {e}")

    # Generate the interactive visualization from the file
    try:
        html_content = lx.visualize(output_name)
        vis_path = "visualization.html"
        with open(vis_path, "w", encoding="utf-8") as f:
            f.write(html_content)
        print(f"Saved visualization to {vis_path}")
    except Exception as e:
        print(f"Visualization generation failed: {e}")
else:
    print("No result to save or visualize.")


[94m[1mLangExtract[0m: Saving to [92mtest_output\extraction_results.jsonl[0m: 1 docs [00:00, 249.10 docs/s]

[92m✓[0m Saved [1m1[0m documents to [92mtest_output\extraction_results.jsonl[0m
Saved extraction results to extraction_results.jsonl
Visualization generation failed: JSONL file not found: extraction_results.jsonl
Saved extraction results to extraction_results.jsonl
Visualization generation failed: JSONL file not found: extraction_results.jsonl



