In [None]:
import pymupdf
import langextract as lx
import textwrap
from dotenv import load_dotenv
import os

In [None]:
#Setting up Google Gemini API Key 

load_dotenv()

api_key = os.getenv('LANGEXTRACT_API_KEY')

In [None]:
# 1. Define a concise prompt
prompt = textwrap.dedent("""\
Extract characters, emotions, and relationships in order of appearance.
Use exact text for extractions. Do not paraphrase or overlap entities.
Provide meaningful attributes for each entity to add context.""")

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text=(
            "ROMEO. But soft! What light through yonder window breaks? It is"
            " the east, and Juliet is the sun."
        ),
        extractions=[
            lx.data.Extraction(
                extraction_class="character",
                extraction_text="ROMEO",
                attributes={"emotional_state": "wonder"},
            ),
            lx.data.Extraction(
                extraction_class="emotion",
                extraction_text="But soft!",
                attributes={"feeling": "gentle awe"},
            ),
            lx.data.Extraction(
                extraction_class="relationship",
                extraction_text="Juliet is the sun",
                attributes={"type": "metaphor"},
            ),
        ],
    )
]

# 3. Run the extraction on your input text
input_text = (
    "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"
)



result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemini-2.5-flash",
    api_key= api_key,
)
   

In [None]:
# Save and visualize the results
lx.io.save_annotated_documents([result], output_name="romeo_juliet_extraction.jsonl", output_dir=".")

# Generate the interactive visualization
html_content = lx.visualize("romeo_juliet_extraction.jsonl")
with open("romeo_juliet_extraction.html", "w", encoding='utf-8') as f:
    if hasattr(html_content, 'data'):
        f.write(html_content.data)  
    else:
        f.write(html_content)

print("Interactive visualization saved to romeo_juliet_extraction_visualization.html")
