In [None]:
# Install necessary libraries
!pip install pymupdf4llm pdfplumber pandas requests -q

# Download and install Ollama
!curl -fsSL https://ollama.com/install.sh | sh

import subprocess
import time

# Start the ollama server process in the background
# Its output will be redirected to a log file
server_process = subprocess.Popen(
    ["ollama", "serve"],
    stdout=open("ollama_server.log", "w"),
    stderr=subprocess.STDOUT
)

print("✅ Ollama server started in the background.")
time.sleep(5) # Give the server a moment to initialize

# --- CHANGE MADE HERE ---
# Pull the Llama 3 8B model instead of the 3B version.
print("📥 Pulling the Llama 3 8B model. This may take a few minutes...")
!ollama pull llama3.1:8b
print("✅ Model download complete.")

>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
✅ Ollama server started in the background.
📥 Pulling the Llama 3 8B model. This may take a few minutes...
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?20

In [None]:
#!/usr/bin/env python3
"""
Graph Database Entity-Relation-Value Extractor for Pharmaceutical Documents
Extracts structured data from bula PDFs in a format optimized for graph databases
"""

import json
import re
import subprocess
import shlex
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
from datetime import datetime
import pymupdf4llm
import pdfplumber
from dataclasses import dataclass
from enum import Enum
import uuid

@dataclass
class GraphTriple:
    """Represents an entity-relation-value triple for graph database"""
    source_entity: str
    source_type: str
    relation: str
    target_entity: str
    target_type: str
    properties: Dict[str, Any] = None
    confidence: float = 0.8
    source_section: str = ""

    def to_dict(self):
        return {
            'source_entity': self.source_entity,
            'source_type': self.source_type,
            'relation': self.relation,
            'target_entity': self.target_entity,
            'target_type': self.target_type,
            'properties': self.properties or {},
            'confidence': self.confidence,
            'source_section': self.source_section
        }

class GraphEntityExtractor:
    def __init__(self, model_name: str = "llama3:8b"):
        self.model_name = model_name

        # Graph schema for pharmaceutical documents
        self.entity_types = {
            'MEDICATION': 'Main pharmaceutical product',
            'ACTIVE_INGREDIENT': 'Chemical compound that provides therapeutic effect',
            'MANUFACTURER': 'Company that produces the medication',
            'INDICATION': 'Medical condition treated by the medication',
            'CONTRAINDICATION': 'Condition where medication should not be used',
            'SIDE_EFFECT': 'Unwanted reaction caused by medication',
            'DOSAGE': 'Amount and frequency of medication administration',
            'INTERACTION': 'Other substances that affect medication behavior',
            'STORAGE_CONDITION': 'Requirements for proper medication storage',
            'PRESENTATION': 'Physical form and packaging of medication',
            'MECHANISM': 'How the medication works in the body',
            'PATIENT_GROUP': 'Specific population affected by medication use'
        }

        # Relation types for pharmaceutical graph
        self.relation_types = {
            'CONTAINS': 'medication contains active ingredient',
            'MANUFACTURED_BY': 'medication is made by company',
            'TREATS': 'medication treats condition',
            'CONTRAINDICATED_FOR': 'medication should not be used for condition',
            'CAUSES': 'medication may cause side effect',
            'HAS_DOSAGE': 'medication has specific dosage',
            'INTERACTS_WITH': 'medication interacts with substance',
            'STORED_AS': 'medication requires storage condition',
            'AVAILABLE_AS': 'medication comes in presentation form',
            'WORKS_BY': 'medication functions through mechanism',
            'AFFECTS': 'medication specifically affects patient group',
            'HAS_FREQUENCY': 'side effect occurs with specific frequency'
        }

        # Section patterns for Brazilian pharmaceutical documents
        self.section_patterns = {
            r'^\s*I+\)\s*(.+)$': 'primary_section',
            r'^\s*\d+\.\s*(.+)$': 'numbered_section',
            r'^\s*(IDENTIFICAÇÃO|IDENTIFICACAO)': 'identification',
            r'^\s*(INFORMAÇÕES|INFORMACOES).*PACIENTE': 'patient_info',
            r'^\s*(COMPOSIÇÃO|COMPOSICAO)': 'composition',
            r'^\s*(APRESENTAÇÕES|APRESENTACOES)': 'presentations',
            r'^\s*(INDICAÇÕES|INDICACOES)': 'indications',
            r'^\s*(CONTRAINDICAÇÕES|CONTRAINDICACOES)': 'contraindications',
            r'^\s*(PRECAUÇÕES|PRECAUCOES)': 'precautions',
            r'^\s*(REAÇÕES.*ADVERSAS|REACOES.*ADVERSAS|EFEITOS.*ADVERSOS)': 'adverse_effects',
            r'^\s*(INTERAÇÕES|INTERACOES)': 'drug_interactions',
            r'^\s*(POSOLOGIA|DOSAGEM)': 'dosage',
            r'^\s*(SUPERDOSAGEM|SUPERDOSE)': 'overdose',
            r'^\s*ARMAZENAMENTO': 'storage',
            r'^\s*DIZERES.*LEGAIS': 'legal_info'
        }

        self.setup_ollama()

    def setup_ollama(self):
        """Setup Ollama model"""
        print(f"Setting up Ollama model: {self.model_name}")
        try:
            subprocess.run(["ollama", "--version"], capture_output=True, check=True)
            print("Ollama CLI found")
        except (FileNotFoundError, subprocess.CalledProcessError):
            raise RuntimeError("Ollama CLI not found. Please install Ollama first.")

        try:
            print(f"Pulling model {self.model_name}...")
            result = subprocess.run(
                ["ollama", "pull", self.model_name],
                capture_output=True, text=True, timeout=300
            )
            if result.returncode == 0:
                print(f"Model {self.model_name} ready")
        except Exception as e:
            print(f"Error with model setup: {e}")

    def call_ollama_raw(self, prompt: str) -> str:
        """Call ollama with exact prompt"""
        cmd = ["ollama", "run", self.model_name]
        try:
            proc = subprocess.run(
                cmd, input=prompt, text=True, capture_output=True, timeout=120
            )
            return proc.stdout.strip() or proc.stderr.strip()
        except subprocess.TimeoutExpired:
            raise RuntimeError("Ollama call timed out")
        except Exception as e:
            raise RuntimeError(f"Error calling ollama: {e}")

    def extract_pdf_content(self, pdf_path: str) -> str:
        """Extract text content from PDF"""
        try:
            with pdfplumber.open(pdf_path) as pdf:
                all_text = []
                for page in pdf.pages:
                    page_text = page.extract_text()
                    if page_text:
                        all_text.append(page_text)
                if all_text:
                    return "\n\n".join(all_text)
        except Exception as e:
            print(f"pdfplumber failed: {e}")

        try:
            return pymupdf4llm.to_markdown(pdf_path)
        except Exception as e:
            print(f"pymupdf4llm failed: {e}")
            raise Exception("All extraction methods failed")

    def detect_section(self, text: str) -> Tuple[Optional[str], Optional[str]]:
        """Detect document section"""
        text_clean = text.strip()
        if len(text_clean) < 3:
            return None, None

        for pattern, section_type in self.section_patterns.items():
            if re.match(pattern, text_clean, re.IGNORECASE):
                return section_type, text_clean

        if (text_clean.isupper() and 5 < len(text_clean) < 100
            and not re.search(r'\d{3,}', text_clean)):
            return 'caps_header', text_clean

        return None, None

    def split_into_sections(self, text: str) -> List[Dict[str, str]]:
        """Split document into sections with content"""
        lines = text.split('\n')
        sections = []
        current_section = 'document_start'
        current_content = []

        for line in lines:
            line = line.strip()
            if not line:
                continue

            section_type, section_title = self.detect_section(line)

            if section_type and section_title:
                # Save previous section
                if current_content:
                    sections.append({
                        'section': current_section,
                        'content': '\n'.join(current_content)
                    })

                current_section = section_title
                current_content = []
                print(f"Section detected: {section_title}")
            else:
                current_content.append(line)

        # Add final section
        if current_content:
            sections.append({
                'section': current_section,
                'content': '\n'.join(current_content)
            })

        return sections

    def create_extraction_prompt(self, section_data: Dict[str, str]) -> str:
        """Create prompt for extracting graph triples from section"""
        section = section_data['section']
        content = section_data['content']

        entity_types_str = '\n'.join([f"- {k}: {v}" for k, v in self.entity_types.items()])
        relation_types_str = '\n'.join([f"- {k}: {v}" for k, v in self.relation_types.items()])

        prompt = f"""Extract pharmaceutical information as graph triples (Entity-Relation-Value) from this Brazilian pharmaceutical document section.

SECTION: {section}

ENTITY TYPES:
{entity_types_str}

RELATION TYPES:
{relation_types_str}

EXTRACTION RULES:
1. Extract ONLY information explicitly stated in the text
2. Each triple must have: source_entity, relation, target_entity
3. Use entity types from the list above
4. Use relation types from the list above
5. Extract exact values, don't paraphrase
6. Include confidence (0.1-1.0) based on clarity

RESPOND WITH VALID JSON ONLY:
{{
  "triples": [
    {{
      "source_entity": "exact_name_from_text",
      "source_type": "ENTITY_TYPE",
      "relation": "RELATION_TYPE",
      "target_entity": "exact_value_from_text",
      "target_type": "ENTITY_TYPE",
      "confidence": 0.9,
      "properties": {{"frequency": "common", "severity": "mild"}}
    }}
  ]
}}

TEXT TO ANALYZE:
{content[:2000]}

JSON:"""

        return prompt

    def parse_extraction_response(self, response: str) -> List[Dict]:
        """Parse LLM response into structured triples"""
        if not response or not response.strip():
            return []

        cleaned = response.strip()

        # Remove markdown blocks
        if "```json" in cleaned:
            start = cleaned.find("```json") + 7
            end = cleaned.rfind("```")
            if start < end:
                cleaned = cleaned[start:end].strip()
        elif "```" in cleaned:
            start = cleaned.find("```") + 3
            end = cleaned.rfind("```")
            if start < end:
                cleaned = cleaned[start:end].strip()

        # Extract JSON object
        json_start = cleaned.find('{')
        json_end = cleaned.rfind('}') + 1

        if json_start != -1 and json_end > json_start:
            cleaned = cleaned[json_start:json_end]

        try:
            parsed = json.loads(cleaned)
            if isinstance(parsed, dict) and 'triples' in parsed:
                return parsed['triples']
        except json.JSONDecodeError:
            pass

        return []

    def validate_triple(self, triple_dict: Dict) -> Optional[GraphTriple]:
        """Validate and create GraphTriple from dictionary"""
        try:
            required_fields = ['source_entity', 'source_type', 'relation', 'target_entity', 'target_type']

            # Check required fields
            for field in required_fields:
                if field not in triple_dict or not str(triple_dict[field]).strip():
                    return None

            # Validate entity types
            source_type = triple_dict['source_type'].strip().upper()
            target_type = triple_dict['target_type'].strip().upper()

            if source_type not in self.entity_types:
                return None
            if target_type not in self.entity_types:
                return None

            # Validate relation type
            relation = triple_dict['relation'].strip().upper()
            if relation not in self.relation_types:
                return None

            # Clean values
            source_entity = str(triple_dict['source_entity']).strip()
            target_entity = str(triple_dict['target_entity']).strip()

            if len(source_entity) < 2 or len(target_entity) < 2:
                return None

            confidence = float(triple_dict.get('confidence', 0.8))
            if not (0.0 <= confidence <= 1.0):
                confidence = 0.8

            return GraphTriple(
                source_entity=source_entity,
                source_type=source_type,
                relation=relation,
                target_entity=target_entity,
                target_type=target_type,
                properties=triple_dict.get('properties', {}),
                confidence=confidence
            )

        except Exception as e:
            print(f"Error validating triple {triple_dict}: {e}")
            return None

    def extract_graph_triples_from_section(self, section_data: Dict[str, str]) -> List[GraphTriple]:
        """Extract graph triples from a document section"""
        print(f"Extracting from section: {section_data['section'][:50]}...")

        try:
            prompt = self.create_extraction_prompt(section_data)
            response = self.call_ollama_raw(prompt)
            raw_triples = self.parse_extraction_response(response)

            validated_triples = []
            for triple_dict in raw_triples:
                triple = self.validate_triple(triple_dict)
                if triple:
                    triple.source_section = section_data['section']
                    validated_triples.append(triple)

            print(f"  Extracted {len(validated_triples)} valid triples")
            return validated_triples

        except Exception as e:
            print(f"Error extracting from section: {e}")
            return []

    def process_document(self, pdf_path: str) -> Dict[str, Any]:
        """Main processing function - extracts all graph data from PDF"""
        print(f"Processing pharmaceutical document: {pdf_path}")

        if not Path(pdf_path).exists():
            raise FileNotFoundError(f"File not found: {pdf_path}")

        # Extract text content
        print("Extracting text from PDF...")
        raw_text = self.extract_pdf_content(pdf_path)

        if not raw_text:
            raise ValueError("No text content extracted from PDF")

        print(f"Extracted {len(raw_text)} characters")

        # Split into sections
        print("Splitting document into sections...")
        sections = self.split_into_sections(raw_text)
        print(f"Found {len(sections)} sections")

        # Extract triples from each section
        all_triples = []
        for section_data in sections:
            section_triples = self.extract_graph_triples_from_section(section_data)
            all_triples.extend(section_triples)

        # Generate unique entities and relations
        entities = self.generate_entity_list(all_triples)
        relations = self.generate_relation_list(all_triples)

        # Compile results
        result = {
            'metadata': {
                'file_path': pdf_path,
                'file_name': Path(pdf_path).name,
                'processing_date': datetime.now().isoformat(),
                'total_text_length': len(raw_text),
                'sections_processed': len(sections),
                'model_used': self.model_name
            },
            'graph_data': {
                'entities': entities,
                'relations': relations,
                'triples': [triple.to_dict() for triple in all_triples]
            },
            'statistics': {
                'total_triples': len(all_triples),
                'unique_entities': len(entities),
                'unique_relations': len(relations),
                'entity_type_distribution': self.get_entity_type_stats(all_triples),
                'relation_type_distribution': self.get_relation_type_stats(all_triples)
            }
        }

        print(f"Extraction completed: {len(all_triples)} triples, {len(entities)} entities, {len(relations)} relations")
        return result

    def generate_entity_list(self, triples: List[GraphTriple]) -> List[Dict[str, Any]]:
        """Generate unique entity list from triples"""
        entity_dict = {}

        for triple in triples:
            # Add source entity
            if triple.source_entity not in entity_dict:
                entity_dict[triple.source_entity] = {
                    'id': str(uuid.uuid4()),
                    'name': triple.source_entity,
                    'type': triple.source_type,
                    'mentioned_in_sections': set()
                }
            entity_dict[triple.source_entity]['mentioned_in_sections'].add(triple.source_section)

            # Add target entity
            if triple.target_entity not in entity_dict:
                entity_dict[triple.target_entity] = {
                    'id': str(uuid.uuid4()),
                    'name': triple.target_entity,
                    'type': triple.target_type,
                    'mentioned_in_sections': set()
                }
            entity_dict[triple.target_entity]['mentioned_in_sections'].add(triple.source_section)

        # Convert sets to lists for JSON serialization
        entities = []
        for entity_data in entity_dict.values():
            entity_data['mentioned_in_sections'] = list(entity_data['mentioned_in_sections'])
            entities.append(entity_data)

        return entities

    def generate_relation_list(self, triples: List[GraphTriple]) -> List[Dict[str, Any]]:
        """Generate unique relation list from triples"""
        relation_dict = {}

        for triple in triples:
            relation_key = f"{triple.source_entity}_{triple.relation}_{triple.target_entity}"

            if relation_key not in relation_dict:
                relation_dict[relation_key] = {
                    'id': str(uuid.uuid4()),
                    'source_entity': triple.source_entity,
                    'relation_type': triple.relation,
                    'target_entity': triple.target_entity,
                    'properties': triple.properties,
                    'confidence': triple.confidence,
                    'source_section': triple.source_section
                }

        return list(relation_dict.values())

    def get_entity_type_stats(self, triples: List[GraphTriple]) -> Dict[str, int]:
        """Get statistics on entity types"""
        stats = {}
        for triple in triples:
            stats[triple.source_type] = stats.get(triple.source_type, 0) + 1
            stats[triple.target_type] = stats.get(triple.target_type, 0) + 1
        return stats

    def get_relation_type_stats(self, triples: List[GraphTriple]) -> Dict[str, int]:
        """Get statistics on relation types"""
        stats = {}
        for triple in triples:
            stats[triple.relation] = stats.get(triple.relation, 0) + 1
        return stats

    def save_results(self, results: Dict[str, Any], output_path: Optional[str] = None) -> str:
        """Save extraction results to JSON file"""
        if not output_path:
            file_name = results['metadata']['file_name']
            pdf_name = Path(file_name).stem
            output_path = f"{pdf_name}_graph_data.json"

        output_file = Path(output_path)
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)

        print(f"Results saved to: {output_file}")
        print(f"File size: {output_file.stat().st_size:,} bytes")
        return str(output_file)

    def export_for_neo4j(self, results: Dict[str, Any], output_dir: str = "neo4j_import") -> Dict[str, str]:
        """Export data in Neo4j import format"""
        output_path = Path(output_dir)
        output_path.mkdir(exist_ok=True)

        files_created = {}

        # Export entities
        entities_file = output_path / "entities.csv"
        with open(entities_file, 'w', encoding='utf-8') as f:
            f.write("id,name,type,sections\n")
            for entity in results['graph_data']['entities']:
                sections = '|'.join(entity['mentioned_in_sections'])
                f.write(f"{entity['id']},{entity['name']},{entity['type']},{sections}\n")
        files_created['entities'] = str(entities_file)

        # Export relations
        relations_file = output_path / "relations.csv"
        with open(relations_file, 'w', encoding='utf-8') as f:
            f.write("id,source_entity,relation_type,target_entity,confidence,section,properties\n")
            for relation in results['graph_data']['relations']:
                props = json.dumps(relation.get('properties', {}))
                f.write(f"{relation['id']},{relation['source_entity']},{relation['relation_type']},{relation['target_entity']},{relation['confidence']},{relation['source_section']},{props}\n")
        files_created['relations'] = str(relations_file)

        print(f"Neo4j import files created in: {output_dir}")
        return files_created

def main():
    """Demonstrate graph entity extraction"""
    extractor = GraphEntityExtractor(model_name="llama3:8b")

    pdf_path = "bula_1755192077396.pdf"  # Update with your PDF path

    print("Graph Database Entity-Relation-Value Extractor")
    print("=" * 60)

    try:
        # Process document
        results = extractor.process_document(pdf_path)

        # Save JSON results
        json_file = extractor.save_results(results)

        # Export for Neo4j
        neo4j_files = extractor.export_for_neo4j(results)

        # Show summary
        stats = results['statistics']
        print("\nEXTRACTION SUMMARY:")
        print(f"Total triples: {stats['total_triples']}")
        print(f"Unique entities: {stats['unique_entities']}")
        print(f"Unique relations: {stats['unique_relations']}")

        print("\nEntity types found:")
        for entity_type, count in stats['entity_type_distribution'].items():
            print(f"  {entity_type}: {count}")

        print("\nRelation types found:")
        for relation_type, count in stats['relation_type_distribution'].items():
            print(f"  {relation_type}: {count}")

        print(f"\nFiles created:")
        print(f"  JSON: {json_file}")
        print(f"  Neo4j entities: {neo4j_files['entities']}")
        print(f"  Neo4j relations: {neo4j_files['relations']}")

        # Show sample triples
        print("\nSample triples:")
        for i, triple in enumerate(results['graph_data']['triples'][:5]):
            print(f"  {i+1}. {triple['source_entity']} --{triple['relation']}--> {triple['target_entity']}")

    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

Setting up Ollama model: llama3:8b
Ollama CLI found
Pulling model llama3:8b...
Model llama3:8b ready
Graph Database Entity-Relation-Value Extractor
Processing pharmaceutical document: bula_1755192077396.pdf
Extracting text from PDF...
Extracted 11936 characters
Splitting document into sections...
Section detected: I) IDENTIFICAÇÃO DO MEDICAMENTO
Section detected: APRESENTAÇÕES
Section detected: USO ORAL
Section detected: USO ADULTO E PEDIÁTRICO ACIMA DE 6 ANOS DE IDADE
Section detected: COMPOSIÇÃO
Section detected: II) INFORMAÇÕES AO PACIENTE
Section detected: 1. PARA QUE ESTE MEDICAMENTO É INDICADO?
Section detected: 2. COMO ESTE MEDICAMENTO FUNCIONA?
Section detected: 3. QUANDO NÃO DEVO USAR ESTE MEDICAMENTO?
Section detected: 4. O QUE DEVO SABER ANTES DE USAR ESTE MEDICAMENTO?
Section detected: QUE ESTE MEDICAMENTO PODE ME CAUSAR? ”).
Section detected: Interações Medicamentosas: Você deve sempre informar seu médico sobre todos os medicamentos que estiver
Section detected: 5. ONDE, C

In [None]:
#!/usr/bin/env python3
"""
LLM-Enhanced Bula Entity-Relation Extractor (Portuguese Optimized)
Optimized for small LLMs (8B parameters) processing Brazilian medical texts
"""

import json
import requests
import re
from pathlib import Path
from typing import Dict, List, Any, Optional, Union
from dataclasses import dataclass, field, asdict
from datetime import datetime
import hashlib
import logging
import time
from enum import Enum
from tqdm import tqdm

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class ConfidenceLevel(Enum):
    """Confidence levels for extracted entities"""
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    VERY_HIGH = "very_high"

@dataclass
class Entity:
    """Enhanced entity with LLM-derived properties"""
    id: str
    type: str
    properties: Dict[str, Any]
    source_text: str = ""
    source_section: str = ""
    confidence: ConfidenceLevel = ConfidenceLevel.MEDIUM
    llm_reasoning: str = ""
    metadata: Dict[str, Any] = field(default_factory=dict)

    def to_dict(self) -> Dict[str, Any]:
        result = asdict(self)
        result['confidence'] = self.confidence.value
        return result

@dataclass
class Relation:
    """Enhanced relation with LLM-derived reasoning"""
    source_entity_id: str
    target_entity_id: str
    relation_type: str
    properties: Dict[str, Any] = field(default_factory=dict)
    confidence: ConfidenceLevel = ConfidenceLevel.MEDIUM
    llm_reasoning: str = ""
    metadata: Dict[str, Any] = field(default_factory=dict)

    def to_dict(self) -> Dict[str, Any]:
        result = asdict(self)
        result['confidence'] = self.confidence.value
        return result

class LLMClient:
    """Client for interacting with LLM APIs (Ollama)"""

    def __init__(self, model_name: str = "llama3.1:8b", base_url: str = "http://localhost:11434"):
        self.model_name = model_name
        self.base_url = base_url
        self.session = requests.Session()

    def generate(self, prompt: str, max_tokens: int = 800, temperature: float = 0.1) -> str:
        """Generate response from LLM"""
        try:
            response = self.session.post(
                f"{self.base_url}/api/generate",
                json={
                    "model": self.model_name,
                    "prompt": prompt,
                    "stream": False,
                    "options": {
                        "temperature": temperature,
                        "num_predict": max_tokens
                    }
                },
                timeout=60
            )
            response.raise_for_status()
            return response.json()["response"]
        except requests.exceptions.RequestException as e:
            logger.error(f"Erro de conexão LLM: {e}")
            raise
        except Exception as e:
            logger.error(f"Erro na API LLM: {e}")
            raise

class OptimizedBulaExtractor:
    """Extrator otimizado para LLMs pequenos processando textos médicos brasileiros"""

    def __init__(self, model_name: str = "llama3.1:8b", base_url: str = "http://localhost:11434"):
        self.llm = LLMClient(model_name, base_url)
        self.entities: Dict[str, Entity] = {}
        self.relations: List[Relation] = []
        self.medication_entity_id: Optional[str] = None
        self.document_metadata = {}
        self.processing_stats = {
            'frases_processadas': 0,
            'chamadas_llm': 0,
            'entidades_criadas': 0,
            'relacoes_criadas': 0,
            'tempo_processamento': 0
        }

    def generate_entity_id(self, entity_type: str, value: str) -> str:
        """Generate deterministic entity ID"""
        clean_value = re.sub(r'[^\w\s-]', '', value.lower())
        clean_value = re.sub(r'\s+', '_', clean_value.strip())
        hash_input = f"{entity_type}:{value}".encode('utf-8')
        hash_suffix = hashlib.sha256(hash_input).hexdigest()[:8]
        return f"{entity_type.lower()}_{clean_value}_{hash_suffix}"

    def extract_entities_and_relations_from_sentence(self, sentence: str, section: str) -> Dict[str, Any]:
        """Extrai entidades E relações de uma frase - otimizado para LLM pequeno"""

        # Prompt em português, focado e conciso
        prompt = f"""Você é um especialista em medicamentos brasileiros. Analise esta frase de bula médica e extraia informações importantes:

FRASE: "{sentence}"
SEÇÃO: {section}

Extraia apenas informações médicas relevantes e suas relações. Responda APENAS em JSON:

{{
  "entidades": [
    {{"tipo": "MEDICAMENTO|DOSAGEM|INDICACAO|CONTRAINDICACAO|EFEITO_ADVERSO|POPULACAO|FABRICANTE", "valor": "texto exato", "confianca": "alta|media|baixa"}}
  ],
  "relacoes": [
    {{"origem": "valor_entidade1", "destino": "valor_entidade2", "tipo": "TRATA|TEM_DOSE|CONTRAINDICADO_PARA|CAUSA|ADEQUADO_PARA", "confianca": "alta|media|baixa"}}
  ]
}}

Regras:
- Só extraia termos médicos específicos (nomes de medicamentos, doenças, doses)
- Não extraia palavras genéricas como "tratamento", "paciente"
- Seja preciso com dosagens (ex: "10 mg", "uma vez ao dia")
- Crie relações lógicas entre entidades
- Máximo 5 entidades e 3 relações por frase"""

        try:
            self.processing_stats['chamadas_llm'] += 1
            response = self.llm.generate(prompt, max_tokens=600, temperature=0.1)

            # Limpa e extrai JSON
            json_start = response.find('{')
            json_end = response.rfind('}') + 1

            if json_start == -1 or json_end == 0:
                return {"entidades": [], "relacoes": []}

            json_str = response[json_start:json_end]
            result = json.loads(json_str)

            return {
                "entidades": result.get("entidades", []),
                "relacoes": result.get("relacoes", [])
            }

        except json.JSONDecodeError as e:
            logger.warning(f"Erro ao parsear JSON do LLM: {e}")
            return {"entidades": [], "relacoes": []}
        except Exception as e:
            logger.warning(f"Erro na extração: {e}")
            return {"entidades": [], "relacoes": []}

    def split_into_sentences(self, text: str) -> List[str]:
        """Divide texto em frases significativas para LLM pequeno"""
        # Limpa o texto
        text = re.sub(r'\s+', ' ', text.strip())

        # Divide em frases usando pontuação
        sentences = re.split(r'[.!?;]\s+', text)

        # Filtra frases muito curtas ou muito longas
        meaningful_sentences = []
        for sentence in sentences:
            if 20 <= len(sentence) <= 200 and any(keyword in sentence.lower() for keyword in
                ['ezetimiba', 'mg', 'dose', 'indicado', 'contraindicado', 'efeito', 'paciente']):
                meaningful_sentences.append(sentence.strip())

        return meaningful_sentences

    def create_entity_from_llm(self, entity_data: Dict, source_text: str, section: str) -> Optional[Entity]:
        """Cria entidade a partir dos dados do LLM"""
        try:
            value = entity_data.get('valor', '').strip()
            if not value or len(value) < 3:
                return None

            # Mapeia tipos em português para inglês
            type_mapping = {
                'MEDICAMENTO': 'Medication',
                'DOSAGEM': 'Dosage',
                'INDICACAO': 'Indication',
                'CONTRAINDICACAO': 'Contraindication',
                'EFEITO_ADVERSO': 'SideEffect',
                'POPULACAO': 'PatientPopulation',
                'FABRICANTE': 'Manufacturer'
            }

            entity_type = type_mapping.get(entity_data.get('tipo', ''), 'Unknown')

            # Mapeia confiança
            confidence_mapping = {
                'alta': ConfidenceLevel.HIGH,
                'media': ConfidenceLevel.MEDIUM,
                'baixa': ConfidenceLevel.LOW
            }
            confidence = confidence_mapping.get(entity_data.get('confianca', 'media'), ConfidenceLevel.MEDIUM)

            entity_id = self.generate_entity_id(entity_type, value)

            # Evita duplicatas
            if entity_id in self.entities:
                return None

            return Entity(
                id=entity_id,
                type=entity_type,
                properties={'value': value, 'extraction_method': 'llm_otimizado'},
                source_text=source_text[:300],
                source_section=section,
                confidence=confidence,
                llm_reasoning=f"Extraído de: {section}",
                metadata={'llm_model': self.llm.model_name}
            )
        except Exception as e:
            logger.warning(f"Erro ao criar entidade: {e}")
            return None

    def create_relation_from_llm(self, relation_data: Dict, entities_map: Dict[str, str]) -> Optional[Relation]:
        """Cria relação a partir dos dados do LLM"""
        try:
            origem_value = relation_data.get('origem', '').strip()
            destino_value = relation_data.get('destino', '').strip()

            # Encontra IDs das entidades pelos valores
            origem_id = entities_map.get(origem_value)
            destino_id = entities_map.get(destino_value)

            if not origem_id or not destino_id or origem_id == destino_id:
                return None

            # Mapeia tipos de relação
            relation_mapping = {
                'TRATA': 'INDICATED_FOR',
                'TEM_DOSE': 'HAS_DOSAGE',
                'CONTRAINDICADO_PARA': 'CONTRAINDICATED_FOR',
                'CAUSA': 'MAY_CAUSE',
                'ADEQUADO_PARA': 'SUITABLE_FOR'
            }

            relation_type = relation_mapping.get(relation_data.get('tipo', ''), 'RELATED_TO')

            confidence_mapping = {
                'alta': ConfidenceLevel.HIGH,
                'media': ConfidenceLevel.MEDIUM,
                'baixa': ConfidenceLevel.LOW
            }
            confidence = confidence_mapping.get(relation_data.get('confianca', 'media'), ConfidenceLevel.MEDIUM)

            return Relation(
                source_entity_id=origem_id,
                target_entity_id=destino_id,
                relation_type=relation_type,
                confidence=confidence,
                llm_reasoning=f"Relação identificada pelo LLM",
                metadata={'llm_model': self.llm.model_name}
            )
        except Exception as e:
            logger.warning(f"Erro ao criar relação: {e}")
            return None

    def process_bula_json(self, json_file_path: Union[str, Path]) -> Dict[str, Any]:
        """Processamento principal otimizado"""
        json_path = Path(json_file_path)
        logger.info(f"🚀 Iniciando processamento otimizado: {json_path}")

        start_time = datetime.now()

        try:
            with open(json_path, 'r', encoding='utf-8') as f:
                data = json.load(f)

            self.document_metadata = data.get('document_metadata', {})
            self.document_metadata['source_file'] = str(json_path)

            flat_blocks = data.get('representations', {}).get('flat_blocks', [])
            if not flat_blocks:
                raise ValueError("Nenhum flat_blocks encontrado no JSON")

            # Agrupa blocos por seção
            sections = self._group_blocks_by_section(flat_blocks)

            # Mapa para encontrar entidades por valor
            entities_value_map = {}

            print(f"\n📊 Processando {len(sections)} seções...")

            # Processa cada seção
            for section_name, blocks in tqdm(sections.items(), desc="Seções", unit="seção"):
                section_content = self._combine_blocks_content(blocks)

                if len(section_content.strip()) < 50:
                    continue

                # Divide em frases menores
                sentences = self.split_into_sentences(section_content)

                if not sentences:
                    continue

                print(f"  🔍 {section_name}: {len(sentences)} frases")

                # Processa cada frase
                for sentence in tqdm(sentences, desc=f"Processando {section_name}", leave=False):
                    result = self.extract_entities_and_relations_from_sentence(sentence, section_name)

                    # Cria entidades
                    for entity_data in result.get('entidades', []):
                        entity = self.create_entity_from_llm(entity_data, sentence, section_name)
                        if entity and entity.id not in self.entities:
                            self.entities[entity.id] = entity
                            entities_value_map[entity.properties['value']] = entity.id
                            self.processing_stats['entidades_criadas'] += 1

                    # Cria relações
                    for relation_data in result.get('relacoes', []):
                        relation = self.create_relation_from_llm(relation_data, entities_value_map)
                        if relation:
                            self.relations.append(relation)
                            self.processing_stats['relacoes_criadas'] += 1

                    self.processing_stats['frases_processadas'] += 1
                    time.sleep(0.05)  # Evita sobrecarregar a API

            # Identifica medicamento principal
            self._identify_main_medication()

            end_time = datetime.now()
            self.processing_stats['tempo_processamento'] = (end_time - start_time).total_seconds()

            print(f"\n✅ Processamento concluído!")
            print(f"   📊 {len(self.entities)} entidades | 🔗 {len(self.relations)} relações")
            print(f"   ⏱️ {self.processing_stats['tempo_processamento']:.2f}s | 📞 {self.processing_stats['chamadas_llm']} chamadas LLM")

            return self._generate_output()

        except Exception as e:
            logger.error(f"Erro no processamento: {e}", exc_info=True)
            raise

    def _group_blocks_by_section(self, flat_blocks: List[Dict]) -> Dict[str, List[Dict]]:
        """Agrupa blocos por seção"""
        sections = {}
        for block in flat_blocks:
            section_title = block.get('context', {}).get('section_title', 'Geral')
            sections.setdefault(section_title, []).append(block)
        return sections

    def _combine_blocks_content(self, blocks: List[Dict]) -> str:
        """Combina conteúdo dos blocos de uma seção"""
        contents = []
        for block in blocks:
            content = block.get('content', '').strip()
            if len(content) > 20:
                contents.append(content)
        return ' '.join(contents)

    def _identify_main_medication(self):
        """Identifica o medicamento principal"""
        med_entities = [e for e in self.entities.values() if e.type == 'Medication']

        if not med_entities:
            logger.warning("Nenhuma entidade de medicamento encontrada")
            return

        # Prioriza 'ezetimiba'
        for entity in med_entities:
            if 'ezetimiba' in entity.properties.get('value', '').lower():
                self.medication_entity_id = entity.id
                logger.info(f"Medicamento principal: {entity.properties['value']}")
                return

        # Fallback para o de maior confiança
        confidence_order = {
            ConfidenceLevel.LOW: 1,
            ConfidenceLevel.MEDIUM: 2,
            ConfidenceLevel.HIGH: 3,
            ConfidenceLevel.VERY_HIGH: 4
        }

        main_med = max(med_entities, key=lambda e: confidence_order.get(e.confidence, 0))
        self.medication_entity_id = main_med.id
        logger.info(f"Medicamento principal (fallback): {main_med.properties['value']}")

    def _generate_output(self) -> Dict[str, Any]:
        """Gera saída estruturada para alimentar o banco de grafos"""

        # Organiza entidades por tipo para facilitar consultas
        entities_by_type = {}
        for entity in self.entities.values():
            entities_by_type.setdefault(entity.type, []).append(entity.to_dict())

        # Organiza relações por tipo
        relations_by_type = {}
        for relation in self.relations:
            relations_by_type.setdefault(relation.relation_type, []).append(relation.to_dict())

        return {
            'metadata': {
                'data_extracao': datetime.now().isoformat(),
                'arquivo_fonte': self.document_metadata.get('source_file', 'desconhecido'),
                'total_entidades': len(self.entities),
                'total_relacoes': len(self.relations),
                'medicamento_principal_id': self.medication_entity_id,
                'estatisticas_processamento': self.processing_stats,
                'modelo_llm': self.llm.model_name,
                'metodo_extracao': 'llm_otimizado_portugues'
            },

            # Dados estruturados para o grafo
            'entidades_por_tipo': entities_by_type,
            'relacoes_por_tipo': relations_by_type,

            # Dados completos (compatibilidade)
            'entidades': [entity.to_dict() for entity in self.entities.values()],
            'relacoes': [relation.to_dict() for relation in self.relations],

            # Estatísticas úteis
            'estatisticas': {
                'distribuicao_entidades': {tipo: len(lista) for tipo, lista in entities_by_type.items()},
                'distribuicao_relacoes': {tipo: len(lista) for tipo, lista in relations_by_type.items()},
                'estatisticas_llm': {
                    'total_chamadas_llm': self.processing_stats['chamadas_llm'],
                    'tempo_processamento': self.processing_stats['tempo_processamento'],
                    'frases_por_segundo': self.processing_stats['frases_processadas'] / max(self.processing_stats['tempo_processamento'], 1)
                }
            },

            # Queries Cypher otimizadas para Neo4j
            'queries_neo4j': self._generate_optimized_cypher()
        }

    def _generate_optimized_cypher(self) -> Dict[str, List[str]]:
        """Gera queries Cypher otimizadas e organizadas"""

        queries = {
            'constraints': [],
            'indexes': [],
            'entities': [],
            'relations': []
        }

        # Constraints únicos por tipo de entidade
        entity_types = {entity.type for entity in self.entities.values()}
        for entity_type in entity_types:
            queries['constraints'].append(
                f"CREATE CONSTRAINT {entity_type.lower()}_id_unique IF NOT EXISTS FOR (n:{entity_type}) REQUIRE n.id IS UNIQUE;"
            )
            queries['indexes'].append(
                f"CREATE INDEX {entity_type.lower()}_value_idx IF NOT EXISTS FOR (n:{entity_type}) ON (n.value);"
            )

        # Queries de entidades otimizadas
        for entity in self.entities.values():
            props = {
                'id': entity.id,
                'value': entity.properties.get('value', '').replace("'", "\\'"),
                'confidence': entity.confidence.value,
                'source_section': entity.source_section.replace("'", "\\'")
            }

            props_cypher = ', '.join([f"{k}: '{v}'" for k, v in props.items()])

            queries['entities'].append(
                f"MERGE (n:{entity.type} {{{props_cypher}}});"
            )

        # Queries de relações
        for relation in self.relations:
            queries['relations'].append(
                f"MATCH (a {{id: '{relation.source_entity_id}'}}), (b {{id: '{relation.target_entity_id}'}}) "
                f"MERGE (a)-[r:{relation.relation_type} {{confidence: '{relation.confidence.value}'}}]->(b);"
            )

        return queries

    def save_results(self, output_data: Dict[str, Any], output_path: Optional[str] = None) -> str:
        """Salva resultados otimizados"""
        if not output_path:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            source_name = Path(self.document_metadata.get('source_file', 'unknown')).stem
            output_path = f"bula_otimizada_{source_name}_{timestamp}.json"

        output_file = Path(output_path)
        output_file.parent.mkdir(parents=True, exist_ok=True)

        # Salva JSON principal
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(output_data, f, indent=2, ensure_ascii=False)

        # Salva queries Cypher organizadas
        cypher_queries = output_data['queries_neo4j']
        queries_path = output_file.with_suffix('.cypher')

        with open(queries_path, 'w', encoding='utf-8') as f:
            f.write("-- CONSTRAINTS E ÍNDICES\n")
            f.write('\n'.join(cypher_queries['constraints']))
            f.write('\n\n')
            f.write('\n'.join(cypher_queries['indexes']))

            f.write("\n\n-- ENTIDADES\n")
            f.write('\n'.join(cypher_queries['entities']))

            f.write("\n\n-- RELAÇÕES\n")
            f.write('\n'.join(cypher_queries['relations']))

        logger.info(f"✅ Resultados salvos em: {output_file.absolute()}")
        return str(output_file)


def main():
    """Função principal com configurações otimizadas"""

    # 🔧 CONFIGURAÇÕES
    json_file_path = "bula_1755192097944_llm_optimized.json"
    llm_model = "llama3.1:8b"
    llm_base_url = "http://localhost:11434"

    json_file = Path(json_file_path)

    if not json_file.exists():
        print(f"❌ ERRO: Arquivo não encontrado: {json_file.absolute()}")
        return 1

    try:
        print("🚀 Iniciando extração otimizada para LLM brasileiro...")
        print(f"📄 Arquivo: {json_file.name}")
        print(f"🤖 Modelo: {llm_model}")
        print("=" * 60)

        # Inicializa extrator otimizado
        extractor = OptimizedBulaExtractor(model_name=llm_model, base_url=llm_base_url)

        # Processa o arquivo
        output_data = extractor.process_bula_json(json_file)

        # Salva resultados
        result_file = extractor.save_results(output_data)

        # Resumo final
        stats = output_data['estatisticas']
        print("\n" + "=" * 60)
        print("🎉 EXTRAÇÃO CONCLUÍDA COM SUCESSO!")
        print("=" * 60)
        print(f"📊 Entidades extraídas: {len(output_data['entidades'])}")
        print(f"🔗 Relações criadas: {len(output_data['relacoes'])}")
        print(f"📞 Chamadas LLM: {stats['estatisticas_llm']['total_chamadas_llm']}")
        print(f"⏱️ Tempo total: {stats['estatisticas_llm']['tempo_processamento']:.1f}s")
        print(f"🔄 Velocidade: {stats['estatisticas_llm']['frases_por_segundo']:.1f} frases/s")

        print("\n📈 DISTRIBUIÇÃO DE ENTIDADES:")
        for tipo, count in stats['distribuicao_entidades'].items():
            print(f"  • {tipo}: {count}")

        if stats['distribuicao_relacoes']:
            print("\n🔗 DISTRIBUIÇÃO DE RELAÇÕES:")
            for tipo, count in stats['distribuicao_relacoes'].items():
                print(f"  • {tipo}: {count}")

        print(f"\n💾 Arquivos gerados:")
        print(f"  • JSON: {result_file}")
        print(f"  • Cypher: {result_file.replace('.json', '.cypher')}")
        print("=" * 60)

        return 0

    except Exception as e:
        print(f"❌ ERRO CRÍTICO: {e}")
        logger.error(f"Erro na execução: {e}", exc_info=True)
        return 1


if __name__ == "__main__":
    exit(main())

🚀 Iniciando extração otimizada para LLM brasileiro...
📄 Arquivo: bula_1755192097944_llm_optimized.json
🤖 Modelo: llama3.1:8b

📊 Processando 13 seções...


Seções:   0%|          | 0/13 [00:00<?, ?seção/s]

  🔍 INDICAÇÕES: 2 frases



Processando INDICAÇÕES:   0%|          | 0/2 [00:00<?, ?it/s][A
Processando INDICAÇÕES:  50%|█████     | 1/2 [00:10<00:10, 10.15s/it][A
Processando INDICAÇÕES: 100%|██████████| 2/2 [00:20<00:00, 10.34s/it][A
Seções:   8%|▊         | 1/13 [00:20<04:07, 20.64s/seção]

  🔍 RESULTADOS DE EFICÁCIA: 22 frases



Processando RESULTADOS DE EFICÁCIA:   0%|          | 0/22 [00:00<?, ?it/s][A
Processando RESULTADOS DE EFICÁCIA:   5%|▍         | 1/22 [00:06<02:23,  6.85s/it][A
Processando RESULTADOS DE EFICÁCIA:   9%|▉         | 2/22 [00:15<02:36,  7.83s/it][A
Processando RESULTADOS DE EFICÁCIA:  14%|█▎        | 3/22 [00:27<03:07,  9.87s/it][A
Processando RESULTADOS DE EFICÁCIA:  18%|█▊        | 4/22 [00:36<02:51,  9.53s/it][A
Processando RESULTADOS DE EFICÁCIA:  23%|██▎       | 5/22 [00:42<02:20,  8.28s/it][A
Processando RESULTADOS DE EFICÁCIA:  27%|██▋       | 6/22 [00:52<02:22,  8.88s/it][A
Processando RESULTADOS DE EFICÁCIA:  32%|███▏      | 7/22 [01:04<02:27,  9.82s/it][A
Processando RESULTADOS DE EFICÁCIA:  36%|███▋      | 8/22 [01:16<02:26, 10.47s/it][A
Processando RESULTADOS DE EFICÁCIA:  41%|████      | 9/22 [01:28<02:20, 10.85s/it][A
Processando RESULTADOS DE EFICÁCIA:  45%|████▌     | 10/22 [01:38<02:06, 10.58s/it][A
Processando RESULTADOS DE EFICÁCIA:  50%|█████     | 11/22 [

  🔍 CARACTERÍSTICAS FARMACOLÓGICAS: 19 frases



Processando CARACTERÍSTICAS FARMACOLÓGICAS:   0%|          | 0/19 [00:00<?, ?it/s][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:   5%|▌         | 1/19 [00:11<03:35, 11.98s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  11%|█         | 2/19 [00:25<03:35, 12.70s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  16%|█▌        | 3/19 [00:34<02:56, 11.00s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  21%|██        | 4/19 [00:40<02:18,  9.24s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  26%|██▋       | 5/19 [00:47<01:56,  8.34s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  32%|███▏      | 6/19 [00:53<01:39,  7.65s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  37%|███▋      | 7/19 [01:07<01:55,  9.62s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  42%|████▏     | 8/19 [01:17<01:48,  9.89s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  47%|████▋     | 9/19 [01:31<01:51, 11.16s/it][A
Processando CARACTERÍSTICAS FARMACOLÓGICAS:  53%|█████▎    | 10/19 

  🔍 CONTRAINDICAÇÕES: 1 frases



Processando CONTRAINDICAÇÕES:   0%|          | 0/1 [00:00<?, ?it/s][A
Processando CONTRAINDICAÇÕES: 100%|██████████| 1/1 [00:09<00:00,  9.50s/it][A
Seções:  31%|███       | 4/13 [07:41<16:03, 107.07s/seção]

  🔍 ADVERTÊNCIAS E PRECAUÇÕES: 35 frases



Processando ADVERTÊNCIAS E PRECAUÇÕES:   0%|          | 0/35 [00:00<?, ?it/s][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:   3%|▎         | 1/35 [00:07<04:22,  7.72s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:   6%|▌         | 2/35 [00:14<04:05,  7.44s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:   9%|▊         | 3/35 [00:21<03:40,  6.89s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:  11%|█▏        | 4/35 [00:29<03:52,  7.51s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:  14%|█▍        | 5/35 [00:36<03:38,  7.29s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:  17%|█▋        | 6/35 [00:42<03:16,  6.77s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:  20%|██        | 7/35 [00:50<03:26,  7.37s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:  23%|██▎       | 8/35 [00:58<03:19,  7.37s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:  26%|██▌       | 9/35 [01:06<03:16,  7.55s/it][A
Processando ADVERTÊNCIAS E PRECAUÇÕES:  29%|██▊       | 10/35 [01:14<03:17,  7.91s/it][A
Processando ADVERTÊNCIAS E 

  🔍 INTERAÇÕES MEDICAMENTOSAS: 12 frases



Processando INTERAÇÕES MEDICAMENTOSAS:   0%|          | 0/12 [00:00<?, ?it/s][A
Processando INTERAÇÕES MEDICAMENTOSAS:   8%|▊         | 1/12 [00:09<01:49,  9.99s/it][A
Processando INTERAÇÕES MEDICAMENTOSAS:  17%|█▋        | 2/12 [00:17<01:28,  8.82s/it][A
Processando INTERAÇÕES MEDICAMENTOSAS:  25%|██▌       | 3/12 [00:26<01:18,  8.71s/it][A
Processando INTERAÇÕES MEDICAMENTOSAS:  33%|███▎      | 4/12 [00:32<01:01,  7.63s/it][A
Processando INTERAÇÕES MEDICAMENTOSAS:  42%|████▏     | 5/12 [00:45<01:07,  9.57s/it][A
Processando INTERAÇÕES MEDICAMENTOSAS:  50%|█████     | 6/12 [01:01<01:09, 11.64s/it][A
Processando INTERAÇÕES MEDICAMENTOSAS:  58%|█████▊    | 7/12 [01:06<00:47,  9.56s/it][A