# Romeo and Juliet Text Extraction with LangExtract

This notebook demonstrates extracting characters, emotions, and relationships from Shakespeare's Romeo and Juliet using LangExtract.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/google/langextract/blob/main/examples/notebooks/romeo_juliet_extraction.ipynb)

## Setup

In [1]:
# Install LangExtract
%pip install -q langextract

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/85.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.7/76.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Set up your Gemini API key
# Get your key from: https://aistudio.google.com/app/apikey
import os
from getpass import getpass

if 'GEMINI_API_KEY' not in os.environ:
    os.environ['GEMINI_API_KEY'] = getpass('Enter your Gemini API key: ')

AIzaSyA1oNZnhvoIpeIdUZu_MsCmAjbCcX0oCFQ··········


## Define Extraction Task

In [3]:
import langextract as lx
import textwrap

prompt = textwrap.dedent("""\
    Extract key entities and attributes from an identification document.
    Use exact text where possible. Do not paraphrase names or IDs.
    Include: person, document, id_number, address, dates (dob, issue, expiry),
    classifications (class, endorsements, restrictions), and flags (organ_donor).
""")

examples = [
    lx.data.ExampleData(
        text=fake_license,
        extractions=[
            lx.data.Extraction(
                extraction_class="document",
                extraction_text="DRIVER LICENSE",
                attributes={"jurisdiction": "STATE OF PACIFICA"}
            ),
            lx.data.Extraction(
                extraction_class="id_number",
                extraction_text="P123-456-789",
                attributes={"label": "DLN"}
            ),
            lx.data.Extraction(
                extraction_class="person",
                extraction_text="RIVERA, ALEX J",
                attributes={"given_name": "ALEX", "family_name": "RIVERA", "middle_initial": "J"}
            ),
            lx.data.Extraction(
                extraction_class="address",
                extraction_text="1420 Beacon Ave, Apt 5B, Northport, PC 94021",
                attributes={"city": "Northport", "postal_code": "94021", "region": "PC"}
            ),
            lx.data.Extraction(
                extraction_class="date",
                extraction_text="1991-07-18",
                attributes={"type": "DOB"}
            ),
            lx.data.Extraction(
                extraction_class="date",
                extraction_text="2024-06-15",
                attributes={"type": "Issue"}
            ),
            lx.data.Extraction(
                extraction_class="date",
                extraction_text="2032-07-18",
                attributes={"type": "Exp"}
            ),
            lx.data.Extraction(
                extraction_class="classification",
                extraction_text="Class: C",
                attributes={"class": "C"}
            ),
            lx.data.Extraction(
                extraction_class="endorsement",
                extraction_text="Endorsements: None",
                attributes={"endorsements": "None"}
            ),
            lx.data.Extraction(
                extraction_class="restriction",
                extraction_text="Restr: Corrective Lenses",
                attributes={"restriction": "Corrective Lenses"}
            ),
            lx.data.Extraction(
                extraction_class="flag",
                extraction_text="Organ Donor: YES",
                attributes={"organ_donor": "YES"}
            ),
            lx.data.Extraction(
                extraction_class="note",
                extraction_text="Temporary address valid until 2025-01-31.",
                attributes={"type": "address_validity"}
            ),
        ]
    )
]

## Extract from Sample Text

In [None]:
# Simple extraction from a short text
fake_license = textwrap.dedent("""\
    STATE OF PACIFICA • DRIVER LICENSE
    DLN: P123-456-789   Class: C   Endorsements: None   Restr: Corrective Lenses
    Name: RIVERA, ALEX J
    Address: 1420 Beacon Ave, Apt 5B, Northport, PC 94021
    DOB: 1991-07-18   Issue: 2024-06-15   Exp: 2032-07-18
    Sex: M   Hgt: 5'11"   Wgt: 178 lb   Eyes: BRN
    Organ Donor: YES
    Notes: Temporary address valid until 2025-01-31.
    Barcode: |||:::|||:::
""")

result = lx.extract(
    text_or_documents=fake_license,
    prompt_description=prompt,
    examples=examples,
    model_id="gemini-2.5-flash",
)

# Display results
print(f"Extracted {len(result.extractions)} entities:\n")
for extraction in result.extractions:
    print(f"• {extraction.extraction_class}: '{extraction.extraction_text}'")
    if extraction.attributes:
        for key, value in extraction.attributes.items():
            print(f"  - {key}: {value}")

[94m[1mLangExtract[0m: model=[92mgemini-2.5-flash[0m, current=[92m68[0m chars, processed=[92m68[0m chars:  [00:01]

[92m✓[0m Extraction processing complete
[92m✓[0m Extracted [1m3[0m entities ([1m3[0m unique types)
  [96m•[0m Time: [1m1.96s[0m
  [96m•[0m Speed: [1m35[0m chars/sec
  [96m•[0m Chunks: [1m1[0m
Extracted 3 entities:

• character: 'Lady Juliet'
  - emotional_state: longing
• emotion: 'gazed longingly at the stars, her heart aching'
  - feeling: melancholy longing
• relationship: 'her heart aching for Romeo'
  - type: romantic





## Interactive Visualization

In [None]:
# Save results to JSONL
lx.io.save_annotated_documents([result], output_name="romeo_juliet.jsonl", output_dir=".")

# Generate interactive visualization
html_content = lx.visualize("romeo_juliet.jsonl")

# Display in notebook
print("Interactive visualization (hover over highlights to see attributes):")
html_content

[94m[1mLangExtract[0m: Saving to [92mromeo_juliet.jsonl[0m: 1 docs [00:00, 995.33 docs/s]

[92m✓[0m Saved [1m1[0m documents to [92mromeo_juliet.jsonl[0m



[94m[1mLangExtract[0m: Loading [92mromeo_juliet.jsonl[0m: 100%|██████████| 961/961 [00:00<00:00, 2.49MB/s]

[92m✓[0m Loaded [1m1[0m documents from [92mromeo_juliet.jsonl[0m
Interactive visualization (hover over highlights to see attributes):





In [None]:
# Save visualization to file (for downloading)
with open("romeo_juliet_visualization.html", "w") as f:
    # Handle both Jupyter (HTML object) and non-Jupyter (string) environments
    if hasattr(html_content, 'data'):
        f.write(html_content.data)
    else:
        f.write(html_content)

print("✓ Visualization saved to romeo_juliet_visualization.html")
print("You can download this file from the Files panel on the left.")

✓ Visualization saved to romeo_juliet_visualization.html
You can download this file from the Files panel on the left.


## Try Your Own Text

Experiment with your own Shakespeare quotes or any literary text!

In [None]:
# Try your own text
your_text = """
JULIET: O Romeo, Romeo! wherefore art thou Romeo?
Deny thy father and refuse thy name;
Or, if thou wilt not, be but sworn my love,
And I'll no longer be a Capulet.
"""

custom_result = lx.extract(
    text_or_documents=your_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemini-2.5-flash",
)

print("Extractions from your text:\n")
for e in custom_result.extractions:
    print(f"• {e.extraction_class}: '{e.extraction_text}'")
    if e.attributes:
        for key, value in e.attributes.items():
            print(f"  - {key}: {value}")

[94m[1mLangExtract[0m: model=[92mgemini-2.5-flash[0m, current=[92m163[0m chars, processed=[92m163[0m chars:  [00:05]

[92m✓[0m Extraction processing complete
[92m✓[0m Extracted [1m6[0m entities ([1m3[0m unique types)
  [96m•[0m Time: [1m5.84s[0m
  [96m•[0m Speed: [1m28[0m chars/sec
  [96m•[0m Chunks: [1m1[0m
Extractions from your text:

• character: 'JULIET'
  - emotional_state: longing
• emotion: 'O Romeo, Romeo! wherefore art thou Romeo?'
  - feeling: desperate questioning
• relationship: 'thy father'
  - type: familial
• relationship: 'thy name'
  - type: lineage
• relationship: 'my love'
  - type: romantic bond
• relationship: 'Capulet'
  - type: family affiliation



