# LangExtract Integration – All-in-One Demo

This notebook demonstrates an end-to-end extraction workflow optimized for PDFs and free-form text:

- Load env and route via OpenRouter (from `.env`)
- Use project venv (prints Python executable)
- Upload/select a PDF or paste text; parse via `src/processors/pdf_parser.py`
- Edit the LangExtract system prompt and few-shot examples
- Run extraction with `src/langextract_integration`
- Review raw results, normalized table, and LangExtract visualizations
- Save outputs (JSON, JSONL, HTML, CSV)


In [14]:
# Environment: load .env, configure OpenRouter, show venv
import os, sys
from dotenv import load_dotenv

# Load .env from repository root
repo_root = os.path.abspath(os.path.join('..'))
env_file = os.path.join(repo_root, '.env')
if os.path.exists(env_file):
    load_dotenv(env_file)
    print("✅ Loaded .env from:", env_file)
else:
    print("⚠️ .env not found at:", env_file)

# Show interpreter and venv
print("🧪 Python:", sys.executable)
print("🪺 VIRTUAL_ENV:", os.environ.get('VIRTUAL_ENV', 'not set'))

# Configure OpenRouter
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
if OPENROUTER_API_KEY:
    os.environ['OPENAI_BASE_URL'] = 'https://openrouter.ai/api/v1'
    os.environ['OPENAI_API_KEY'] = OPENROUTER_API_KEY
    os.environ['OPENROUTER_API_KEY'] = OPENROUTER_API_KEY
    print("🔑 OpenRouter API key detected and configured")
else:
    print("⚠️ OPENROUTER_API_KEY not set; cloud extraction will fail")

# Ensure nbformat for Plotly inline rendering
try:
    import nbformat  # noqa: F401
    print("✅ nbformat available")
except Exception:
    import subprocess
    print("⬇️ Installing nbformat for inline rendering...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'nbformat>=4.2.0'])
    import nbformat
    print("✅ nbformat installed")


✅ Loaded .env from: /Users/Mitya/Desktop/working/biomedicalmedical_text_agent/.env
🧪 Python: /Users/Mitya/Desktop/working/biomedicalmedical_text_agent/venv/bin/python
🪺 VIRTUAL_ENV: /Users/Mitya/Desktop/working/biomedicalmedical_text_agent/venv
🔑 OpenRouter API key detected and configured
✅ nbformat available


In [15]:
# Imports and project modules
import json
import pandas as pd
from IPython.display import display, HTML

# Ensure src on path
if '../src' not in sys.path:
    sys.path.append('../src')

# LangExtract integration
from langextract_integration import (
    LangExtractEngine,
    BiomedicExtractionClasses,
    BiomedicNormalizer,
    ExtractionVisualizer,
)

# Processors
from processors.pdf_parser import PDFParser

print("✅ Imports ready")


✅ Imports ready


In [16]:
# Config controls
MODEL_ID = os.getenv('LANGEXTRACT_MODEL_ID', 'gpt-4o-mini')
USE_LOCAL_MODEL = os.getenv('USE_LOCAL_MODEL', 'false').lower() in ('1','true','yes')
LOCAL_MODEL_URL = os.getenv('LOCAL_MODEL_URL', 'http://localhost:11434')

print('🤖 Model:', MODEL_ID)
print('🖥️ Local model enabled:', USE_LOCAL_MODEL)
print('🌐 Local URL:', LOCAL_MODEL_URL)


🤖 Model: gpt-4o-mini
🖥️ Local model enabled: False
🌐 Local URL: http://localhost:11434


In [17]:
# Configuration
# Prefer OpenAI-compatible id to force OpenRouter over Ollama inside LangExtract
MODEL_ID = os.getenv("LANGEXTRACT_MODEL_ID", "gpt-4o-mini")

# Optional local (Ollama) configuration
USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "false").lower() in ("1", "true", "yes")
LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "llama3")
LOCAL_MODEL_URL = os.getenv("LOCAL_MODEL_URL", "http://localhost:11434")

# Load API key from environment for safety (cloud route)
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
if not USE_LOCAL_MODEL:
    if not OPENROUTER_API_KEY:
        print("⚠️ OPENROUTER_API_KEY is not set. Set it in your environment to run extraction.")
    else:
        print("🔑 API Key detected in environment")
        # Ensure LangExtract and OpenAI SDKs route via OpenRouter
        os.environ["OPENAI_BASE_URL"] = "https://openrouter.ai/api/v1"
        os.environ["OPENAI_API_KEY"] = OPENROUTER_API_KEY
        os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY

# Echo effective route
if USE_LOCAL_MODEL:
    print(f"🖥️ Using local model via Ollama: {LOCAL_MODEL_ID} @ {LOCAL_MODEL_URL}")
    EFFECTIVE_MODEL_ID = LOCAL_MODEL_ID
else:
    print(f"☁️ Using OpenRouter (OpenAI-compatible): {MODEL_ID}")
    EFFECTIVE_MODEL_ID = MODEL_ID

print(f"🤖 Effective model: {EFFECTIVE_MODEL_ID}")

🔑 API Key detected in environment
☁️ Using OpenRouter (OpenAI-compatible): gpt-4o-mini
🤖 Effective model: gpt-4o-mini


## Input: Upload PDF or Paste Text


In [26]:
# File input helpers
from pathlib import Path
from ipywidgets import VBox, HBox, Text, Textarea, FileUpload, Button, Label

pdf_uploader = FileUpload(accept='.pdf', multiple=False)
path_field = Text(value='/Users/Mitya/Desktop/working/biomedicalmedical_text_agent/data/input/PMID32679198.pdf', description='Path:')
text_area = Textarea(value='', description='Text:', layout=dict(width='100%', height='200px'))
status = Label(value='')

load_btn = Button(description='Load PDF', button_style='primary')
use_text_btn = Button(description='Use Text', button_style='success')

loaded_text = {'text': ''}

def on_load_pdf(_):
    # Priority 1: file uploader
    if pdf_uploader.value:
        up_item = next(iter(pdf_uploader.value.values()))
        tmp_path = Path('uploaded.pdf')
        with open(tmp_path, 'wb') as f:
            f.write(up_item['content'])
        parser = PDFParser()
        res = parser.process(str(tmp_path))
        if res.success:
            loaded_text['text'] = res.data.content
            status.value = f"✅ Loaded from upload ({len(loaded_text['text'])} chars)"
        else:
            status.value = f"❌ PDF parsing failed: {res.error}"
        return
    # Priority 2: path field
    p = Path(path_field.value)
    if p.exists() and p.suffix.lower() == '.pdf':
        parser = PDFParser()
        res = parser.process(str(p))
        if res.success:
            loaded_text['text'] = res.data.content
            status.value = f"✅ Loaded from path ({len(loaded_text['text'])} chars)"
        else:
            status.value = f"❌ PDF parsing failed: {res.error}"
    else:
        status.value = '⚠️ Provide a valid .pdf path or upload a file'

def on_use_text(_):
    if text_area.value.strip():
        loaded_text['text'] = text_area.value.strip()
        status.value = f"✅ Using pasted text ({len(loaded_text['text'])} chars)"
    else:
        status.value = '⚠️ Paste some text first'

load_btn.on_click(on_load_pdf)
use_text_btn.on_click(on_use_text)

ui = VBox([
    HBox([Label('Upload:'), pdf_uploader]),
    HBox([path_field, load_btn, use_text_btn]),
    text_area,
    status
])
ui


VBox(children=(HBox(children=(Label(value='Upload:'), FileUpload(value=(), accept='.pdf', description='Upload'…

## Prompt and Examples (Editable)


In [34]:
# Load default system prompt and examples; allow editing
from langextract_integration.schema_classes import BIOMEDICAL_SYSTEM_PROMPT, BiomedicExtractionClasses
from ipywidgets import Accordion, Textarea

extraction_classes = BiomedicExtractionClasses()

prompt_area = Textarea(
    value=BIOMEDICAL_SYSTEM_PROMPT.strip(),
    description='System Prompt:',
    layout=dict(width='100%', height='220px')
)

# Seed editable examples from PatientRecord few-shot
seed_examples = extraction_classes.patient_record.few_shot_examples or []
examples_area = Textarea(
    value=json.dumps(seed_examples, indent=2),
    description='Examples (JSON):',
    layout=dict(width='100%', height='260px')
)

acc = Accordion(children=[prompt_area, examples_area])
acc.set_title(0, 'System Prompt')
acc.set_title(1, 'Few-shot Examples')
acc


Accordion(children=(Textarea(value='You are a biomedical information extraction agent. Extract only facts that…

## Run Extraction


In [35]:
# Controls
from ipywidgets import IntSlider, Checkbox, VBox

passes_slider = IntSlider(description='Passes', min=1, max=3, value=2)
workers_slider = IntSlider(description='Workers', min=1, max=8, value=4)
segment_chk = Checkbox(description='Segment patients', value=True)
visual_chk = Checkbox(description='Include visualization', value=True)

VBox([passes_slider, workers_slider, segment_chk, visual_chk])


VBox(children=(IntSlider(value=2, description='Passes', max=3, min=1), IntSlider(value=4, description='Workers…

In [36]:
# Initialize engine
results = None
engine = LangExtractEngine(
    model_id=MODEL_ID,
    openrouter_api_key=os.getenv('OPENROUTER_API_KEY'),
    use_local_model=USE_LOCAL_MODEL,
    local_model_url=LOCAL_MODEL_URL
)

print('🚀 Ready to extract')


HPO data file not found: data/ontologies/hpo/hp.json


🚀 Ready to extract


In [None]:
# Run extraction
from ipywidgets import Output, Button

out = Output()
run_btn = Button(description='Run Extraction', button_style='primary')

def run_extraction(_):
    out.clear_output()
    with out:
        text = loaded_text.get('text') or ''
        if not text.strip():
            print('⚠️ No input text loaded. Load a PDF or paste text.')
            return
        try:
            # Parse examples JSON
            examples_override = None
            if examples_area.value.strip():
                try:
                    examples_override = json.loads(examples_area.value)
                except Exception as je:
                    print('⚠️ Examples JSON invalid, ignoring override:', je)
                    examples_override = None
            print('🚀 Running...')
            res = engine.extract_from_text(
                text=text,
                extraction_passes=passes_slider.value,
                max_workers=workers_slider.value,
                segment_patients=segment_chk.value,
                include_visualization=visual_chk.value,
                prompt_description=prompt_area.value,
                examples_override=examples_override,
            )
            global results
            results = res
            total = len(res.get('extractions') or res.get('original_extractions') or [])
            print(f'✅ Done. Extractions: {total}; Patients: {len(res.get("normalized_data", []))}')
        except Exception as e:
            print('❌ Extraction failed:', e)

run_btn.on_click(run_extraction)
VBox([run_btn, out])


VBox(children=(Button(button_style='primary', description='Run Extraction', style=ButtonStyle()), Output()))

## Results: Raw and Normalized


In [38]:
# Raw
if results:
    extractions = results.get('extractions') or results.get('original_extractions') or []
    print('🔍 Raw extraction sample:')
    for i, ex in enumerate(extractions[:3]):
        print(f'\nExtraction {i+1}:')
        print(json.dumps(ex, indent=2, default=str))
else:
    print('❌ No results yet')

# Normalized
if results and 'normalized_data' in results:
    df = pd.DataFrame(results['normalized_data'])
    print('\n📋 Normalized Patient Records:')
    print('Rows:', len(df))
    display(df.head(10))
else:
    df = None
    print('❌ No normalized data')


❌ No results yet
❌ No normalized data


# LangExtract Integration – All-in-One Demo

This notebook demonstrates an end-to-end extraction workflow optimized for PDFs and free-form text:

- Load env and route via OpenRouter (from `.env`)
- Use project venv (prints Python executable)
- Upload/select a PDF or paste text; parse via `src/processors/pdf_parser.py`
- Edit the LangExtract system prompt and few-shot examples
- Run extraction with `src/langextract_integration`
- Review raw results, normalized table, and LangExtract visualizations
- Save outputs (JSON, JSONL, HTML, CSV)


In [None]:
# Environment: load .env, configure OpenRouter, show venv
import os, sys
from dotenv import load_dotenv

# Load .env from repository root
repo_root = os.path.abspath(os.path.join('..'))
env_file = os.path.join(repo_root, '.env')
if os.path.exists(env_file):
    load_dotenv(env_file)
    print("✅ Loaded .env from:", env_file)
else:
    print("⚠️ .env not found at:", env_file)

# Show interpreter and venv
print("🧪 Python:", sys.executable)
print("🪺 VIRTUAL_ENV:", os.environ.get('VIRTUAL_ENV', 'not set'))

# Configure OpenRouter
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
if OPENROUTER_API_KEY:
    os.environ['OPENAI_BASE_URL'] = 'https://openrouter.ai/api/v1'
    os.environ['OPENAI_API_KEY'] = OPENROUTER_API_KEY
    os.environ['OPENROUTER_API_KEY'] = OPENROUTER_API_KEY
    print("🔑 OpenRouter API key detected and configured")
else:
    print("⚠️ OPENROUTER_API_KEY not set; cloud extraction will fail")

# Ensure nbformat for Plotly inline rendering
try:
    import nbformat  # noqa: F401
    print("✅ nbformat available")
except Exception:
    import subprocess
    print("⬇️ Installing nbformat for inline rendering...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'nbformat>=4.2.0'])
    import nbformat
    print("✅ nbformat installed")


✅ Loaded .env from: /Users/Mitya/Desktop/working/biomedicalmedical_text_agent/.env
🧪 Python: /Users/Mitya/Desktop/working/biomedicalmedical_text_agent/venv/bin/python
🪺 VIRTUAL_ENV: /Users/Mitya/Desktop/working/biomedicalmedical_text_agent/venv
🔑 OpenRouter API key detected and configured
✅ nbformat available


In [None]:
# Imports and project modules
import json
import pandas as pd
from IPython.display import display, HTML

# Ensure src on path
if '../src' not in sys.path:
    sys.path.append('../src')

# LangExtract integration
from langextract_integration import (
    LangExtractEngine,
    BiomedicExtractionClasses,
    BiomedicNormalizer,
    ExtractionVisualizer,
)

# Processors
from processors.pdf_parser import PDFParser

print("✅ Imports ready")


✅ Imports ready


In [None]:
# Config controls
MODEL_ID = os.getenv('LANGEXTRACT_MODEL_ID', 'gpt-4o-mini')
USE_LOCAL_MODEL = os.getenv('USE_LOCAL_MODEL', 'false').lower() in ('1','true','yes')
LOCAL_MODEL_URL = os.getenv('LOCAL_MODEL_URL', 'http://localhost:11434')

print('🤖 Model:', MODEL_ID)
print('🖥️ Local model enabled:', USE_LOCAL_MODEL)
print('🌐 Local URL:', LOCAL_MODEL_URL)


🤖 Model: gpt-4o-mini
🖥️ Local model enabled: False
🌐 Local URL: http://localhost:11434


In [None]:
# Configuration
# Prefer OpenAI-compatible id to force OpenRouter over Ollama inside LangExtract
MODEL_ID = os.getenv("LANGEXTRACT_MODEL_ID", "gpt-4o-mini")

# Optional local (Ollama) configuration
USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "false").lower() in ("1", "true", "yes")
LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "llama3")
LOCAL_MODEL_URL = os.getenv("LOCAL_MODEL_URL", "http://localhost:11434")

# Load API key from environment for safety (cloud route)
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
if not USE_LOCAL_MODEL:
    if not OPENROUTER_API_KEY:
        print("⚠️ OPENROUTER_API_KEY is not set. Set it in your environment to run extraction.")
    else:
        print("🔑 API Key detected in environment")
        # Ensure LangExtract and OpenAI SDKs route via OpenRouter
        os.environ["OPENAI_BASE_URL"] = "https://openrouter.ai/api/v1"
        os.environ["OPENAI_API_KEY"] = OPENROUTER_API_KEY
        os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY

# Echo effective route
if USE_LOCAL_MODEL:
    print(f"🖥️ Using local model via Ollama: {LOCAL_MODEL_ID} @ {LOCAL_MODEL_URL}")
    EFFECTIVE_MODEL_ID = LOCAL_MODEL_ID
else:
    print(f"☁️ Using OpenRouter (OpenAI-compatible): {MODEL_ID}")
    EFFECTIVE_MODEL_ID = MODEL_ID

print(f"🤖 Effective model: {EFFECTIVE_MODEL_ID}")

## Input: Upload PDF or Paste Text


In [None]:
# File input helpers
from pathlib import Path
from ipywidgets import VBox, HBox, Text, Textarea, FileUpload, Button, Label

pdf_uploader = FileUpload(accept='.pdf', multiple=False)
path_field = Text(value='../data/input/example.pdf', description='Path:')
text_area = Textarea(value='', description='Text:', layout=dict(width='100%', height='200px'))
status = Label(value='')

load_btn = Button(description='Load PDF', button_style='primary')
use_text_btn = Button(description='Use Text', button_style='success')

loaded_text = {'text': ''}

def on_load_pdf(_):
    # Priority 1: file uploader
    if pdf_uploader.value:
        up_item = next(iter(pdf_uploader.value.values()))
        tmp_path = Path('uploaded.pdf')
        with open(tmp_path, 'wb') as f:
            f.write(up_item['content'])
        parser = PDFParser()
        res = parser.process(str(tmp_path))
        if res.success:
            loaded_text['text'] = res.data.content
            status.value = f"✅ Loaded from upload ({len(loaded_text['text'])} chars)"
        else:
            status.value = f"❌ PDF parsing failed: {res.error}"
        return
    # Priority 2: path field
    p = Path(path_field.value)
    if p.exists() and p.suffix.lower() == '.pdf':
        parser = PDFParser()
        res = parser.process(str(p))
        if res.success:
            loaded_text['text'] = res.data.content
            status.value = f"✅ Loaded from path ({len(loaded_text['text'])} chars)"
        else:
            status.value = f"❌ PDF parsing failed: {res.error}"
    else:
        status.value = '⚠️ Provide a valid .pdf path or upload a file'

def on_use_text(_):
    if text_area.value.strip():
        loaded_text['text'] = text_area.value.strip()
        status.value = f"✅ Using pasted text ({len(loaded_text['text'])} chars)"
    else:
        status.value = '⚠️ Paste some text first'

load_btn.on_click(on_load_pdf)
use_text_btn.on_click(on_use_text)

ui = VBox([
    HBox([Label('Upload:'), pdf_uploader]),
    HBox([path_field, load_btn, use_text_btn]),
    text_area,
    status
])
ui


VBox(children=(HBox(children=(Label(value='Upload:'), FileUpload(value=(), accept='.pdf', description='Upload'…

## Prompt and Examples (Editable)


In [None]:
# Load default system prompt and examples; allow editing
from langextract_integration.schema_classes import BIOMEDICAL_SYSTEM_PROMPT, BiomedicExtractionClasses
from ipywidgets import Accordion, Textarea

extraction_classes = BiomedicExtractionClasses()

prompt_area = Textarea(
    value=BIOMEDICAL_SYSTEM_PROMPT.strip(),
    description='System Prompt:',
    layout=dict(width='100%', height='220px')
)

# Seed editable examples from PatientRecord few-shot
seed_examples = extraction_classes.patient_record.few_shot_examples or []
examples_area = Textarea(
    value=json.dumps(seed_examples, indent=2),
    description='Examples (JSON):',
    layout=dict(width='100%', height='260px')
)

acc = Accordion(children=[prompt_area, examples_area])
acc.set_title(0, 'System Prompt')
acc.set_title(1, 'Few-shot Examples')
acc


Accordion(children=(Textarea(value='You are a biomedical information extraction agent. Extract only facts that…

## Run Extraction


In [None]:
# Controls
from ipywidgets import IntSlider, Checkbox, VBox

passes_slider = IntSlider(description='Passes', min=1, max=3, value=2)
workers_slider = IntSlider(description='Workers', min=1, max=8, value=4)
segment_chk = Checkbox(description='Segment patients', value=True)
visual_chk = Checkbox(description='Include visualization', value=True)

VBox([passes_slider, workers_slider, segment_chk, visual_chk])


VBox(children=(IntSlider(value=2, description='Passes', max=3, min=1), IntSlider(value=4, description='Workers…

In [None]:
# Initialize engine
results = None
engine = LangExtractEngine(
    model_id=MODEL_ID,
    openrouter_api_key=os.getenv('OPENROUTER_API_KEY'),
    use_local_model=USE_LOCAL_MODEL,
    local_model_url=LOCAL_MODEL_URL
)

print('🚀 Ready to extract')


HPO data file not found: data/ontologies/hpo/hp.json


🚀 Ready to extract


In [None]:
# Run extraction
from ipywidgets import Output, Button

out = Output()
run_btn = Button(description='Run Extraction', button_style='primary')

def run_extraction(_):
    out.clear_output()
    with out:
        text = loaded_text.get('text') or ''
        if not text.strip():
            print('⚠️ No input text loaded. Load a PDF or paste text.')
            return
        try:
            # Parse examples JSON
            examples_override = None
            if examples_area.value.strip():
                try:
                    examples_override = json.loads(examples_area.value)
                except Exception as je:
                    print('⚠️ Examples JSON invalid, ignoring override:', je)
                    examples_override = None
            print('🚀 Running...')
            res = engine.extract_from_text(
                text=text,
                extraction_passes=passes_slider.value,
                max_workers=workers_slider.value,
                segment_patients=segment_chk.value,
                include_visualization=visual_chk.value,
                prompt_description=prompt_area.value,
                examples_override=examples_override,
            )
            global results
            results = res
            total = len(res.get('extractions') or res.get('original_extractions') or [])
            print(f'✅ Done. Extractions: {total}; Patients: {len(res.get("normalized_data", []))}')
        except Exception as e:
            print('❌ Extraction failed:', e)

run_btn.on_click(run_extraction)
VBox([run_btn, out])


VBox(children=(Button(button_style='primary', description='Run Extraction', style=ButtonStyle()), Output()))

## Results: Raw and Normalized


In [None]:
# Raw
if results:
    extractions = results.get('extractions') or results.get('original_extractions') or []
    print('🔍 Raw extraction sample:')
    for i, ex in enumerate(extractions[:3]):
        print(f'\nExtraction {i+1}:')
        print(json.dumps(ex, indent=2, default=str))
else:
    print('❌ No results yet')

# Normalized
if results and 'normalized_data' in results:
    df = pd.DataFrame(results['normalized_data'])
    print('\n📋 Normalized Patient Records:')
    print('Rows:', len(df))
    display(df.head(10))
else:
    df = None
    print('❌ No normalized data')


❌ No results yet
❌ No normalized data


## Visualization


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Built-in LangExtract HTML (if available)
if results and results.get('visualization_html'):
    html_content = results['visualization_html']
    with open('langextract_visualization.html', 'w') as f:
        f.write(html_content)
    print("🎨 Saved LangExtract visualization → langextract_visualization.html")
    display(HTML(html_content[:800] + ('...' if len(html_content) > 800 else '')))
else:
    print('ℹ️ No LangExtract HTML in results or visualization disabled')

# Fallback overview dashboard
if results and results.get('normalized_data'):
    viz = ExtractionVisualizer()
    fig = viz.create_overview_dashboard(results)
    fig.show()
else:
    print('❌ No normalized data for charts')


ℹ️ No LangExtract HTML in results or visualization disabled
❌ No normalized data for charts


## Save Outputs


In [None]:
from datetime import datetime

if results:
    out_dir = './output'
    prefix = 'langextract_demo'
    saved = engine.save_results(results, out_dir, prefix)
    print('💾 Saved:')
    for k, v in saved.items():
        print(' -', k.upper(), ':', v)
else:
    print('❌ No results to save')


❌ No results to save


## Configuration

## Visualization


In [32]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Built-in LangExtract HTML (if available)
if results and results.get('visualization_html'):
    html_content = results['visualization_html']
    with open('langextract_visualization.html', 'w') as f:
        f.write(html_content)
    print("🎨 Saved LangExtract visualization → langextract_visualization.html")
    display(HTML(html_content[:800] + ('...' if len(html_content) > 800 else '')))
else:
    print('ℹ️ No LangExtract HTML in results or visualization disabled')

# Fallback overview dashboard
if results and results.get('normalized_data'):
    viz = ExtractionVisualizer()
    fig = viz.create_overview_dashboard(results)
    fig.show()
else:
    print('❌ No normalized data for charts')


ℹ️ No LangExtract HTML in results or visualization disabled
❌ No normalized data for charts


## Save Outputs


In [33]:
from datetime import datetime

if results:
    out_dir = './output'
    prefix = 'langextract_demo'
    saved = engine.save_results(results, out_dir, prefix)
    print('💾 Saved:')
    for k, v in saved.items():
        print(' -', k.upper(), ':', v)
else:
    print('❌ No results to save')


❌ No results to save


## Configuration