# LangExtract

Para rodar localmente e sem custos, baixe um modelo local (e.g. Llama3) -> https://ollama.com/download

Para testar o servidor, digite no terminal:

`ollama run llama3 "say hello"`

Para conferir se o servidor está conectado corretamente:

`ollama serve`

In [None]:
from langextract.factory import ModelConfig
import langextract as lx
import textwrap

# 1. Define a concise prompt
prompt = textwrap.dedent("""\
Extract structured product information from the text.
Identify product name, brand, model, category, color, size, material, and any key attributes.
Use the exact text for extractions — do not paraphrase.
Return relevant attributes that describe each product clearly.
""")

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text="Camiseta PoloTech masculina de algodão, cor azul marinho, disponível nos tamanhos M, G e GG.",
        extractions=[
            lx.data.Extraction(
                extraction_class="product",
                extraction_text="Camiseta PoloTech masculina",
                attributes={
                    "brand": "PoloTech",
                    "category": "camiseta",
                    "material": "algodão",
                    "color": "azul marinho",
                    "sizes": ["M", "G", "GG"]
                },
            ),
        ],
    ),
    lx.data.ExampleData(
        text="Tênis esportivo Nike Air Zoom branco, ideal para corrida.",
        extractions=[
            lx.data.Extraction(
                extraction_class="product",
                extraction_text="Tênis esportivo Nike Air Zoom branco",
                attributes={
                    "brand": "Nike",
                    "category": "tênis esportivo",
                    "color": "branco",
                    "intended_use": "corrida"
                },
            ),
        ],
    ),
]

# 3. Run the extraction on your input text
input_text = "Bolsa feminina de couro sintético da marca Vizzano, cor bege, com alça ajustável e fechamento magnético."

model_config = ModelConfig(
    model_id="llama3",
    provider="ollama",
    provider_kwargs={"base_url": "http://localhost:11434"}, 
)

result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    config=model_config,
)

for e in result.extractions:
    print(f"{e.extraction_class}: {e.extraction_text}")
    if e.attributes:
        print(f"Atributes: {e.attributes}")

[94m[1mLangExtract[0m: Processing [00:20]

product: Bolsa feminina de couro sintético
Atributes: {'brand': 'Vizzano', 'category': 'bolsa', 'material': 'couro sintético', 'color': 'bege', 'attributes': ['alça ajustável', 'fechamento magnético']}





In [None]:
import langextract as lx

# Text with a medication mention
input_text = "Patient took 400 mg PO Ibuprofen q4h for two days."

# Define extraction prompt
prompt_description = "Extract medication information including medication name, dosage, route, frequency, and duration in the order they appear in the text."

# Define example data with entities in order of appearance
examples = [
    lx.data.ExampleData(
        text="Patient was given 250 mg IV Cefazolin TID for one week.",
        extractions=[
            lx.data.Extraction(extraction_class="dosage", extraction_text="250 mg"),
            lx.data.Extraction(extraction_class="route", extraction_text="IV"),
            lx.data.Extraction(extraction_class="medication", extraction_text="Cefazolin"),
            lx.data.Extraction(extraction_class="frequency", extraction_text="TID"),  # TID = three times a day
            lx.data.Extraction(extraction_class="duration", extraction_text="for one week")
        ]
    )
]

result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt_description,
    examples=examples,
    model_id="gemini-2.5-pro",
    api_key="your-api-key-here"  # Optional if LANGEXTRACT_API_KEY environment variable is set
)

# Display entities with positions
print(f"Input: {input_text}\n")
print("Extracted entities:")
for entity in result.extractions:
    position_info = ""
    if entity.char_interval:
        start, end = entity.char_interval.start_pos, entity.char_interval.end_pos
        position_info = f" (pos: {start}-{end})"
    print(f"• {entity.extraction_class.capitalize()}: {entity.extraction_text}{position_info}")

# Save and visualize the results
lx.io.save_annotated_documents([result], output_name="medical_ner_extraction.jsonl", output_dir=".")

# Generate the interactive visualization
html_content = lx.visualize("medical_ner_extraction.jsonl")
with open("medical_ner_visualization.html", "w") as f:
    if hasattr(html_content, 'data'):
        f.write(html_content.data)  # For Jupyter/Colab
    else:
        f.write(html_content)

print("Interactive visualization saved to medical_ner_visualization.html")