# HealthBench-BR - Teste com 20 Primeiras Perguntas

Este notebook testa os modelos ativos configurados em `providers.json` usando as primeiras 20 perguntas do dataset.

In [1]:
import json
import os
import sys
import asyncio
from typing import List, Dict
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv

# Carrega variáveis do .env sobrescrevendo as já existentes
load_dotenv(override=True)

# 🔍 Debug: imprime variáveis sensíveis carregadas (só nomes, não valores)
print("\n🔍 Variáveis carregadas do .env:")
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION", 
            "AWS_BEARER_TOKEN_BEDROCK", "OPENAI_API_KEY", "MARITACA_API_KEY"]:
    value = os.getenv(key)
    if value:
        masked = value[:4] + "..." if len(value) > 8 else "***"
        print(f"   {key} = {masked}")
    else:
        print(f"   {key} = ❌ Não definida")

# Add src to path
sys.path.insert(0, os.path.dirname(os.path.abspath('.')))

from src.config.loader import ProviderConfigItem
from src.providers.factory import ProviderFactory
from src.dataset.loader import DatasetLoader
from src.evaluation.evaluator import Evaluator
from src.reports.generator import ReportGenerator, EvaluationResult

import logging
import importlib

# Forçar reload dos módulos
import sys
modules_to_reload = ['src.providers.maritaca', 'src.providers.base', 'src.evaluation.evaluator']
for module in modules_to_reload:
    if module in sys.modules:
        importlib.reload(sys.modules[module])

# Configurar logging DEBUG
logging.basicConfig(level=logging.ERROR, force=True)


🔍 Variáveis carregadas do .env:
   AWS_ACCESS_KEY_ID = ❌ Não definida
   AWS_SECRET_ACCESS_KEY = ❌ Não definida
   AWS_DEFAULT_REGION = ❌ Não definida
   AWS_BEARER_TOKEN_BEDROCK = ABSK...
   OPENAI_API_KEY = sk-p...
   MARITACA_API_KEY = 1000...


## Configuração dos Providers

Carregamos os providers ativos do arquivo `providers.json`.

In [2]:
def load_active_providers(providers_file: str = "providers.json") -> List[Dict]:
    """Load active providers from JSON configuration file with environment and connectivity checks"""
    with open(providers_file, 'r', encoding='utf-8') as f:
        config = json.load(f)
    
    # Filter only active providers
    candidate_providers = [p for p in config['providers'] if p.get('active', False)]
    
    print(f"Providers candidatos encontrados: {len(candidate_providers)}")
    
    validated_providers = []
    
    for provider in candidate_providers:
        print(f"\n🔍 Validando {provider['name']} ({provider['type']}: {provider['model']})...")
        
        # Check environment variables and connectivity
        is_valid = validate_provider_config(provider)
        
        if is_valid:
            validated_providers.append(provider)
            print(f"   ✅ {provider['name']} - Configuração válida")
        else:
            print(f"   ❌ {provider['name']} - Configuração inválida (será ignorado)")
    
    print(f"\n📊 Resumo da validação:")
    print(f"   Providers candidatos: {len(candidate_providers)}")
    print(f"   Providers válidos: {len(validated_providers)}")
    
    if validated_providers:
        print(f"\n✅ Providers prontos para teste:")
        for provider in validated_providers:
            print(f"   - {provider['name']} ({provider['type']}: {provider['model']})")
    
    return validated_providers, config['default_settings']


def validate_provider_config(provider: Dict) -> bool:
    """Validate provider configuration including environment variables and connectivity"""
    provider_type = provider['type']
    
    try:
        # Check environment variables based on provider type
        if provider_type == 'openai':
            return validate_openai_config(provider)
        elif provider_type == 'maritaca':
            return validate_maritaca_config(provider)
        elif provider_type == 'ollama':
            return validate_ollama_config(provider)
        elif provider_type == 'aws_bedrock':
            return validate_bedrock_config(provider)
        else:
            print(f"      ⚠️ Tipo de provider desconhecido: {provider_type}")
            return False
            
    except Exception as e:
        print(f"      ❌ Erro na validação: {e}")
        return False


import os
from typing import Dict

def _env_or_literal(value: str) -> (str, str):
    """
    Se value for do tipo ${ENV}, retorna (valor_resolvido, "ENV").
    Caso contrário, retorna (value, None).
    """
    if isinstance(value, str) and value.startswith("${") and value.endswith("}"):
        env = value[2:-1]
        return os.getenv(env, ""), env
    return value, None

def _normalize_models_urls(base_url: str):
    """
    Gera duas variações corretas do endpoint de modelos, para evitar 404 por /v1 duplicado/faltando.
    Retorna (primary_url, alt_url), priorizando a forma mais provavel.
    """
    b = (base_url or "https://api.openai.com").rstrip("/")
    has_v1 = b.endswith("/v1")
    if has_v1:
        primary = f"{b}/models"       # .../v1/models
        alt     = f"{b[:-3]}/v1/models"  # .../models -> (evita erro caso b terminasse com /v1 já)
    else:
        primary = f"{b}/v1/models"    # .../v1/models
        alt     = f"{b}/models"       # .../models (para proxies compatíveis)
    return primary, alt

def validate_openai_config(provider: Dict) -> bool:
    """Validate OpenAI provider configuration"""
    # --- chave ---
    raw_key = provider.get("api_key", "")
    token, env_used = _env_or_literal(raw_key)
    if not token:
        if env_used:
            print(f"      ❌ Variável de ambiente {env_used} não encontrada")
        else:
            print("      ❌ API key não configurada")
        return False
    if env_used:
        print(f"      ✅ Variável de ambiente {env_used} encontrada")

    # --- headers ---
    headers = {"Authorization": f"Bearer {token}"}
    org, _ = _env_or_literal(provider.get("organization", ""))  # opcional
    if org:
        headers["OpenAI-Organization"] = org
    proj, _ = _env_or_literal(provider.get("project", ""))      # opcional
    if proj:
        headers["OpenAI-Project"] = proj

    # --- URLs ---
    base_url = provider.get("base_url", "https://api.openai.com")
    primary, alt = _normalize_models_urls(base_url)

    # --- conectividade ---
    try:
        import requests
        for url in (primary, alt):
            try:
                resp = requests.get(url, headers=headers, timeout=10)
                if resp.status_code in (200, 401):
                    print("      ✅ Conectividade com OpenAI OK")
                    # (opcional) validação do modelo informado sem consumir token:
                    # apenas checa se o campo 'data' existe quando 200
                    mdl = provider.get("model")
                    if mdl and resp.status_code == 200:
                        data = resp.json().get("data", [])
                        names = {m.get("id") for m in data if isinstance(m, dict)}
                        if names and mdl not in names:
                            # Não falha a conectividade; só informa.
                            print(f"      ⚠️ Modelo '{mdl}' não listado pelo endpoint /models")
                    return True
                elif resp.status_code == 404:
                    # tenta a próxima variação de URL
                    continue
                else:
                    print(f"      ⚠️ Resposta inesperada da API: {resp.status_code}")
                    return False
            except requests.RequestException as e:
                # tenta a próxima variação
                last_err = e
                continue

        # Se chegou aqui, as duas variações falharam especificamente com 404/erro
        print("      ❌ Endpoint /models não encontrado (provável base_url incorreta)")
        print(f"      💡 Verifique seu 'base_url': use 'https://api.openai.com' ou 'https://api.openai.com/v1'")
        return False

    except Exception as e:
        print(f"      ⚠️ Não foi possível testar conectividade: {e}")
        # Mantém seu comportamento original de não bloquear o fluxo
        return True


def validate_ollama_config(provider: Dict) -> bool:
    """Validate Ollama provider configuration"""
    base_url = provider.get('base_url', 'http://localhost:11434')
    model = provider.get('model', '')
    
    try:
        import requests
        # Testa a conexão básica
        response = requests.get(f"{base_url}/api/tags", timeout=10)
        if response.status_code == 200:
            print(f"      ✅ Conectividade com Ollama OK")
        else:
            print(f"      ⚠️ Resposta inesperada da API Ollama: {response.status_code}")
            return False

        # Testa se o modelo especificado existe localmente
        tags = response.json().get('models', [])
        model_names = [m.get('name', '') for m in tags]
        if model and not any(model in name for name in model_names):
            print(f"      ⚠️ Modelo '{model}' não encontrado entre os disponíveis: {model_names}")
            return False
        
        print(f"      ✅ Modelo '{model}' disponível localmente")
        return True

    except requests.ConnectionError:
        print(f"      ❌ Não foi possível conectar ao Ollama em {base_url}")
        print("      💡 Verifique se o serviço está em execução (ex: `ollama serve` ou container ativo)")
        return False
    except Exception as e:
        print(f"      ⚠️ Erro durante validação do Ollama: {e}")
        return True  # Permite continuar mesmo se falhar no teste


def validate_maritaca_config(provider: Dict) -> bool:
    """Validate Maritaca provider configuration"""
    api_key = provider.get('api_key', '')

    # Resolve environment variable
    if api_key.startswith('${') and api_key.endswith('}'):
        env_var = api_key[2:-1]
        actual_key = os.getenv(env_var)
        if not actual_key:
            print(f"      ❌ Variável de ambiente {env_var} não encontrada")
            return False
        print(f"      ✅ Variável de ambiente {env_var} encontrada")
    elif not api_key:
        print(f"      ❌ API key não configurada")
        return False

    # Test connectivity with the models endpoint
    base_url = provider.get('base_url', 'https://chat.maritaca.ai/api')
    try:
        import requests
        response = requests.get(f"{base_url}/models", timeout=10, headers={
            'Authorization': f'Bearer {actual_key if "actual_key" in locals() else api_key}'
        })
        if response.status_code in [200, 401]:  # 401 means auth failed but service is reachable
            print(f"      ✅ Conectividade com Maritaca OK")
            return True
        else:
            print(f"      ⚠️ Resposta inesperada da API: {response.status_code}")
            return False
    except Exception as e:
        print(f"      ⚠️ Não foi possível testar conectividade: {e}")
        return True  # Allow to proceed anyway


def validate_bedrock_config(provider: Dict) -> bool:
    """Validate AWS Bedrock provider configuration"""
    bearer_token = provider.get('aws_bearer_token', '')
    region = provider.get('region', 'us-east-1')
    
    # Check bearer token environment variable
    if bearer_token.startswith('${') and bearer_token.endswith('}'):
        env_var = bearer_token[2:-1]
        actual_token = os.getenv(env_var)
        if not actual_token:
            print(f"      ❌ Variável de ambiente {env_var} não encontrada")
            return False
        print(f"      ✅ Variável de ambiente {env_var} encontrada")

        # Se o bearer token existe, já é suficiente
        os.environ['AWS_BEARER_TOKEN_BEDROCK'] = actual_token
        print(f"      ✅ Bearer token configurado para Bedrock")
        return True   # ← retorno antecipado se token está ok
    
    # Caso não exista bearer token, verifica credenciais tradicionais
    aws_access_key = os.getenv('AWS_ACCESS_KEY_ID')
    aws_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY')
    
    if not aws_access_key and not aws_secret_key:
        print(f"      ⚠️ Credenciais AWS não encontradas no ambiente")
        print(f"      💡 Tentando usar perfil padrão AWS...")
        
        try:
            import boto3
            session = boto3.Session()
            credentials = session.get_credentials()
            if credentials:
                print(f"      ✅ Credenciais AWS encontradas no perfil")
                return True
            else:
                print(f"      ❌ Nenhuma credencial AWS disponível")
                return False
        except Exception as e:
            print(f"      ❌ Erro ao verificar credenciais AWS: {e}")
            return False
    else:
        print(f"      ✅ Credenciais AWS encontradas no ambiente")
        return True


# Load providers with validation
active_providers, default_settings = load_active_providers()

if not active_providers:
    raise ValueError("\n❌ Nenhum provider válido encontrado!\n\n" +
                    "💡 Possíveis soluções:\n" +
                    "   1. Verifique as variáveis de ambiente necessárias\n" +
                    "   2. Certifique-se de que o Ollama está executando (se aplicável)\n" +
                    "   3. Configure as credenciais AWS (se aplicável)\n" +
                    "   4. Verifique as configurações em providers.json")

Providers candidatos encontrados: 4

🔍 Validando GPT-4.1 (openai: gpt-4.1)...
      ✅ Variável de ambiente OPENAI_API_KEY encontrada
      ✅ Conectividade com OpenAI OK
   ✅ GPT-4.1 - Configuração válida

🔍 Validando GPT-5 (openai: gpt-5)...
      ✅ Variável de ambiente OPENAI_API_KEY encontrada
      ✅ Conectividade com OpenAI OK
   ✅ GPT-5 - Configuração válida

🔍 Validando Maritaca-Sabiazinho-3 (maritaca: sabiazinho-3)...
      ✅ Variável de ambiente MARITACA_API_KEY encontrada
      ✅ Conectividade com Maritaca OK
   ✅ Maritaca-Sabiazinho-3 - Configuração válida

🔍 Validando Maritaca-Sabia-3.1 (maritaca: sabia-3.1)...
      ✅ Variável de ambiente MARITACA_API_KEY encontrada
      ✅ Conectividade com Maritaca OK
   ✅ Maritaca-Sabia-3.1 - Configuração válida

📊 Resumo da validação:
   Providers candidatos: 4
   Providers válidos: 4

✅ Providers prontos para teste:
   - GPT-4.1 (openai: gpt-4.1)
   - GPT-5 (openai: gpt-5)
   - Maritaca-Sabiazinho-3 (maritaca: sabiazinho-3)
   - Marita

## Carregamento do Dataset

Carregamos apenas as primeiras 20 perguntas do dataset para teste.

In [3]:
# Load dataset (first 20 questions)
print("Carregando dataset...")
dataset = DatasetLoader.load_dataset("benchmark_perguntas_unificado.json")
test_dataset = dataset[:50]  # Only first 20 questions

print(f"Dataset carregado: {len(test_dataset)} perguntas (primeiras 20)")
print(f"Arquivos únicos: {len(set(q.arquivo for q in test_dataset))}")
print(f"Títulos únicos: {len(set(q.titulo for q in test_dataset))}")

# Show sample questions
print("\nExemplo de perguntas:")
for i, question in enumerate(test_dataset[:3]):
    print(f"\n{i+1}. [{question.arquivo}] {question.titulo}")
    print(f"   Pergunta: {question.pergunta[:100]}...")
    print(f"   Resposta esperada: {question.esperado}")

Carregando dataset...
Dataset carregado: 50 perguntas (primeiras 20)
Arquivos únicos: 5
Títulos únicos: 5

Exemplo de perguntas:

1. [V_PCDT_-_Vasculite__-_Atualizado.pdf] Protocolo clínico e diretrizes terapêuticas da vasculite associada aos anticorpos anti-citoplasma de neutrófilos
   Pergunta: No que diz respeito ao protocolo clínico "Protocolo Clínico e Diretrizes Terapêuticas da Vasculite A...
   Resposta esperada: Verdadeiro

2. [V_PCDT_-_Vasculite__-_Atualizado.pdf] Protocolo clínico e diretrizes terapêuticas da vasculite associada aos anticorpos anti-citoplasma de neutrófilos
   Pergunta: No que diz respeito ao protocolo clínico "Protocolo Clínico e Diretrizes Terapêuticas da Vasculite A...
   Resposta esperada: Falso

3. [V_PCDT_-_Vasculite__-_Atualizado.pdf] Protocolo clínico e diretrizes terapêuticas da vasculite associada aos anticorpos anti-citoplasma de neutrófilos
   Pergunta: No que diz respeito ao protocolo clínico "Protocolo Clínico e Diretrizes Terapêuticas da Vascul

## Função para Criar Providers

Função auxiliar para instanciar providers baseado na configuração.

In [4]:
def create_provider_from_config(provider_config: Dict):
    """Create a provider instance from configuration"""
    
    # ✅ Criar ProviderConfigItem corretamente
    config_item = ProviderConfigItem(
        name=provider_config['name'],
        type=provider_config['type'],
        model=provider_config['model'],
        api_key=provider_config.get('api_key'),
        base_url=provider_config.get('base_url'),
        temperature=provider_config.get('temperature', 0.0),
        max_tokens=provider_config.get('max_tokens', 12000),
        timeout=provider_config.get('timeout', 120),
        region=provider_config.get('region', 'us-east-1'),
        aws_bearer_token=provider_config.get('aws_bearer_token'),
        extra_params=provider_config.get('extra_params', {})
    )
    
    # ✅ Usar ProviderFactory.create_from_config corretamente
    factory = ProviderFactory()
    return factory.create_from_config(config_item)

print("Função create_provider_from_config definida.")

Função create_provider_from_config definida.


## Execução dos Testes

Executamos a avaliação para cada provider ativo.

In [5]:
async def run_evaluation_for_provider(provider_config: Dict, dataset: List, parallelism: int = 3):
    """Run evaluation for a single provider"""
    print(f"\n{'='*60}")
    print(f"Testando: {provider_config['name']} ({provider_config['model']})")
    print(f"{'='*60}")
    
    try:
        # Create provider
        provider = create_provider_from_config(provider_config)
        
        # Create evaluator with reduced parallelism for testing
        evaluator = Evaluator(provider, parallelism=parallelism)
        
        # Run evaluation
        start_time = datetime.now()
        results = await evaluator.evaluate(dataset, show_progress=True)
        end_time = datetime.now()
        
        # Calculate metrics
        total = len(results)
        correct = sum(1 for r in results if r.correta)
        accuracy = correct / total if total > 0 else 0
        duration = (end_time - start_time).total_seconds()
        
        print(f"\n📊 Resultados para {provider_config['name']}:")
        print(f"   Total de perguntas: {total}")
        print(f"   Acertos: {correct}")
        print(f"   Acurácia: {accuracy:.2%}")
        print(f"   Tempo: {duration:.1f}s")
        
        return {
            'provider_name': provider_config['name'],
            'provider_type': provider_config['type'],
            'model': provider_config['model'],
            'total': total,
            'correct': correct,
            'accuracy': accuracy,
            'duration_seconds': duration,
            'results': results
        }
        
    except Exception as e:
        print(f"❌ Erro ao testar {provider_config['name']}: {e}")
        import traceback
        traceback.print_exc()
        return None

print("Função run_evaluation_for_provider definida.")

Função run_evaluation_for_provider definida.


## Execução dos Testes para Todos os Providers Ativos

In [6]:
# Run tests for all active providers
all_results = []

for provider_config in active_providers:
    result = await run_evaluation_for_provider(provider_config, test_dataset, parallelism=3)
    if result:
        all_results.append(result)

print(f"\n🎉 Testes concluídos! {len(all_results)} providers testados com sucesso.")


Testando: GPT-4.1 (gpt-4.1)
Total de perguntas a avaliar: 50


Batch 1:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/3 = 0.000


Batch 2:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 2:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/6 = 0.000


Batch 3:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/9 = 0.000


Batch 4:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 4:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/12 = 0.000


Batch 5:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/15 = 0.000


Batch 6:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 6:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/18 = 0.000


Batch 7:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 7:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/21 = 0.000


Batch 8:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 8:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/24 = 0.000


Batch 9:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/27 = 0.000


Batch 10:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 10:  33%|▎| 1/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/30 = 0.000


Batch 11:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/33 = 0.000


Batch 12:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 12:  33%|▎| 1/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/36 = 0.000


Batch 13:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/39 = 0.000


Batch 14:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 14:  33%|▎| 1/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/42 = 0.000


Batch 15:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/45 = 0.000


Batch 16:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 16:  33%|▎| 1/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/48 = 0.000


Batch 17:   0%| | 0/2 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 17: 100%|█| 2/2 [00


Acurácia parcial: 0/50 = 0.000

📊 Resultados para GPT-4.1:
   Total de perguntas: 50
   Acertos: 0
   Acurácia: 0.00%
   Tempo: 4.7s

Testando: GPT-5 (gpt-5)
Total de perguntas a avaliar: 50


Batch 1:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 1:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/3 = 0.000


Batch 2:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/6 = 0.000


Batch 3:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 3:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/9 = 0.000


Batch 4:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 4:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/12 = 0.000


Batch 5:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/15 = 0.000


Batch 6:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 6:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/18 = 0.000


Batch 7:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 7:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/21 = 0.000


Batch 8:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/24 = 0.000


Batch 9:   0%| | 0/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 9:  33%|▎| 1/3 [00:ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/27 = 0.000


Batch 10:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/30 = 0.000


Batch 11:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 11:  33%|▎| 1/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/33 = 0.000


Batch 12:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/36 = 0.000


Batch 13:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 13:  33%|▎| 1/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/39 = 0.000


Batch 14:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/42 = 0.000


Batch 15:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 15:  33%|▎| 1/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 

Acurácia parcial: 0/45 = 0.000


Batch 16:   0%| | 0/3 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_

Acurácia parcial: 0/48 = 0.000


Batch 17:   0%| | 0/2 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 17:  50%|▌| 1/2 [00ERROR:src.providers.base:❌ BaseLLMProvider.ainvoke failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
ERROR:src.providers.base:❌ Exception type: AuthenticationError
Batch 17: 100%|█| 2/2 [00


Acurácia parcial: 0/50 = 0.000

📊 Resultados para GPT-5:
   Total de perguntas: 50
   Acertos: 0
   Acurácia: 0.00%
   Tempo: 5.4s

Testando: Maritaca-Sabiazinho-3 (sabiazinho-3)
Total de perguntas a avaliar: 50


Batch 1: 100%|█| 3/3 [00:


Acurácia parcial: 2/3 = 0.667


Batch 2: 100%|█| 3/3 [00:


Acurácia parcial: 5/6 = 0.833


Batch 3: 100%|█| 3/3 [00:


Acurácia parcial: 7/9 = 0.778


Batch 4: 100%|█| 3/3 [00:


Acurácia parcial: 8/12 = 0.667


Batch 5: 100%|█| 3/3 [00:


Acurácia parcial: 10/15 = 0.667


Batch 6: 100%|█| 3/3 [00:


Acurácia parcial: 12/18 = 0.667


Batch 7: 100%|█| 3/3 [00:


Acurácia parcial: 14/21 = 0.667


Batch 8: 100%|█| 3/3 [00:


Acurácia parcial: 16/24 = 0.667


Batch 9: 100%|█| 3/3 [00:


Acurácia parcial: 18/27 = 0.667


Batch 10: 100%|█| 3/3 [00


Acurácia parcial: 20/30 = 0.667


Batch 11: 100%|█| 3/3 [00


Acurácia parcial: 22/33 = 0.667


Batch 12: 100%|█| 3/3 [00


Acurácia parcial: 24/36 = 0.667


Batch 13: 100%|█| 3/3 [00


Acurácia parcial: 26/39 = 0.667


Batch 14: 100%|█| 3/3 [00


Acurácia parcial: 28/42 = 0.667


Batch 15: 100%|█| 3/3 [00


Acurácia parcial: 30/45 = 0.667


Batch 16: 100%|█| 3/3 [00


Acurácia parcial: 31/48 = 0.646


Batch 17: 100%|█| 2/2 [00


Acurácia parcial: 32/50 = 0.640

📊 Resultados para Maritaca-Sabiazinho-3:
   Total de perguntas: 50
   Acertos: 32
   Acurácia: 64.00%
   Tempo: 9.4s

Testando: Maritaca-Sabia-3.1 (sabia-3.1)
Total de perguntas a avaliar: 50


Batch 1: 100%|█| 3/3 [00:


Acurácia parcial: 2/3 = 0.667


Batch 2: 100%|█| 3/3 [00:


Acurácia parcial: 5/6 = 0.833


Batch 3: 100%|█| 3/3 [00:


Acurácia parcial: 7/9 = 0.778


Batch 4: 100%|█| 3/3 [00:


Acurácia parcial: 9/12 = 0.750


Batch 5: 100%|█| 3/3 [00:


Acurácia parcial: 11/15 = 0.733


Batch 6: 100%|█| 3/3 [00:


Acurácia parcial: 12/18 = 0.667


Batch 7: 100%|█| 3/3 [00:


Acurácia parcial: 15/21 = 0.714


Batch 8: 100%|█| 3/3 [00:


Acurácia parcial: 17/24 = 0.708


Batch 9: 100%|█| 3/3 [00:


Acurácia parcial: 18/27 = 0.667


Batch 10: 100%|█| 3/3 [00


Acurácia parcial: 20/30 = 0.667


Batch 11: 100%|█| 3/3 [00


Acurácia parcial: 23/33 = 0.697


Batch 12: 100%|█| 3/3 [00


Acurácia parcial: 24/36 = 0.667


Batch 13: 100%|█| 3/3 [00


Acurácia parcial: 26/39 = 0.667


Batch 14: 100%|█| 3/3 [00


Acurácia parcial: 29/42 = 0.690


Batch 15: 100%|█| 3/3 [00


Acurácia parcial: 31/45 = 0.689


Batch 16: 100%|█| 3/3 [00


Acurácia parcial: 33/48 = 0.688


Batch 17: 100%|█| 2/2 [00

Acurácia parcial: 35/50 = 0.700

📊 Resultados para Maritaca-Sabia-3.1:
   Total de perguntas: 50
   Acertos: 35
   Acurácia: 70.00%
   Tempo: 34.2s

🎉 Testes concluídos! 4 providers testados com sucesso.





## Comparação de Resultados

Visualizamos uma tabela comparativa dos resultados.

In [7]:
if all_results:
    # Create comparison DataFrame
    comparison_data = []
    for result in all_results:
        comparison_data.append({
            'Provider': result['provider_name'],
            'Tipo': result['provider_type'],
            'Modelo': result['model'],
            'Total': result['total'],
            'Acertos': result['correct'],
            'Acurácia': f"{result['accuracy']:.2%}",
            'Tempo (s)': f"{result['duration_seconds']:.1f}"
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    
    print("📋 COMPARAÇÃO DE RESULTADOS")
    print("=" * 80)
    print(comparison_df.to_string(index=False))
    
    # Find best performer
    best_result = max(all_results, key=lambda x: x['accuracy'])
    print(f"\n🏆 Melhor desempenho: {best_result['provider_name']} com {best_result['accuracy']:.2%} de acurácia")
else:
    print("❌ Nenhum resultado válido para comparar.")

📋 COMPARAÇÃO DE RESULTADOS
             Provider     Tipo       Modelo  Total  Acertos Acurácia Tempo (s)
              GPT-4.1   openai      gpt-4.1     50        0    0.00%       4.7
                GPT-5   openai        gpt-5     50        0    0.00%       5.4
Maritaca-Sabiazinho-3 maritaca sabiazinho-3     50       32   64.00%       9.4
   Maritaca-Sabia-3.1 maritaca    sabia-3.1     50       35   70.00%      34.2

🏆 Melhor desempenho: Maritaca-Sabia-3.1 com 70.00% de acurácia


## Geração de Relatórios Detalhados

Geramos relatórios CSV e HTML para cada provider testado.

In [8]:
# Generate detailed reports for each provider
for result in all_results:
    provider_name = result['provider_name'].replace(' ', '_').replace('-', '_')
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    # CSV report
    csv_filename = f"./test_results/{provider_name}_{timestamp}.csv"
    report_gen = ReportGenerator(output_path=csv_filename)
    report_gen.add_results(result['results'])
    report_gen.save_csv()
    
    # HTML report
    html_filename = f"./test_results/{provider_name}_{timestamp}_report.html"
    report_gen.save_html_report(path=html_filename, model_name=result['model'])
    
    print(f"📄 Relatórios gerados para {result['provider_name']}:")
    print(f"   CSV: {csv_filename}")
    print(f"   HTML: {html_filename}")

print("\n✅ Todos os relatórios foram gerados com sucesso!")

CSV salvo em: /Users/filipelopes/Desktop/Development/notebooks/noharm-maritaca/healthbench-br/test_results/GPT_4.1_20251007_093839.csv
Relatório HTML salvo em: /Users/filipelopes/Desktop/Development/notebooks/noharm-maritaca/healthbench-br/test_results/GPT_4.1_20251007_093839_report.html
📄 Relatórios gerados para GPT-4.1:
   CSV: ./test_results/GPT_4.1_20251007_093839.csv
   HTML: ./test_results/GPT_4.1_20251007_093839_report.html
CSV salvo em: /Users/filipelopes/Desktop/Development/notebooks/noharm-maritaca/healthbench-br/test_results/GPT_5_20251007_093839.csv
Relatório HTML salvo em: /Users/filipelopes/Desktop/Development/notebooks/noharm-maritaca/healthbench-br/test_results/GPT_5_20251007_093839_report.html
📄 Relatórios gerados para GPT-5:
   CSV: ./test_results/GPT_5_20251007_093839.csv
   HTML: ./test_results/GPT_5_20251007_093839_report.html
CSV salvo em: /Users/filipelopes/Desktop/Development/notebooks/noharm-maritaca/healthbench-br/test_results/Maritaca_Sabiazinho_3_20251007_09

## Análise Detalhada por Categoria

Analisamos o desempenho por arquivo e título das questões.

In [9]:
# Detailed analysis by category
if all_results:
    print("📊 ANÁLISE POR CATEGORIA")
    print("=" * 50)
    
    for result in all_results:
        print(f"\n{result['provider_name']} ({result['model']})")
        print("-" * 40)
        
        # Group by arquivo
        by_file = {}
        for eval_result in result['results']:
            if eval_result.arquivo not in by_file:
                by_file[eval_result.arquivo] = {'total': 0, 'correct': 0}
            by_file[eval_result.arquivo]['total'] += 1
            if eval_result.correta:
                by_file[eval_result.arquivo]['correct'] += 1
        
        print("Por arquivo:")
        for arquivo, stats in by_file.items():
            accuracy = stats['correct'] / stats['total'] if stats['total'] > 0 else 0
            print(f"  {arquivo}: {accuracy:.2%} ({stats['correct']}/{stats['total']})")
        
        # Group by titulo
        by_title = {}
        for eval_result in result['results']:
            if eval_result.titulo not in by_title:
                by_title[eval_result.titulo] = {'total': 0, 'correct': 0}
            by_title[eval_result.titulo]['total'] += 1
            if eval_result.correta:
                by_title[eval_result.titulo]['correct'] += 1
        
        print("\nPor categoria:")
        for titulo, stats in sorted(by_title.items()):
            accuracy = stats['correct'] / stats['total'] if stats['total'] > 0 else 0
            print(f"  {titulo}: {accuracy:.2%} ({stats['correct']}/{stats['total']})")

print("\n✅ Análise completa finalizada!")

📊 ANÁLISE POR CATEGORIA

GPT-4.1 (gpt-4.1)
----------------------------------------
Por arquivo:
  V_PCDT_-_Vasculite__-_Atualizado.pdf: 0.00% (0/10)
  P_Porfirias.pdf: 0.00% (0/10)
  R_Rastreamento_-_Câncer_do_Colo_do_Útero.pdf: 0.00% (0/10)
  A_PCDT_Escorpionismo.pdf: 0.00% (0/10)
  D_Diabete_Insípido.pdf: 0.00% (0/10)

Por categoria:
  Acidentes escorpiônicos: 0.00% (0/10)
  Diretrizes brasileiras para o rastreamento do câncer de colo do útero: Parte I - Rastreamento organizado utilizando testes moleculares para detecção de DNA-HPV oncogênico: 0.00% (0/10)
  Protocolo clínico e diretrizes terapêuticas da vasculite associada aos anticorpos anti-citoplasma de neutrófilos: 0.00% (0/10)
  Protocolo clínico e diretrizes terapêuticas das porfirias: 0.00% (0/10)
  Protocolo clínico e diretrizes terapêuticas do diabete insípido: 0.00% (0/10)

GPT-5 (gpt-5)
----------------------------------------
Por arquivo:
  V_PCDT_-_Vasculite__-_Atualizado.pdf: 0.00% (0/10)
  P_Porfirias.pdf: 0.00% (

## Resumo Final

Sumário dos testes realizados com as primeiras 20 perguntas.

In [10]:
print("🎯 RESUMO FINAL DO TESTE")
print("=" * 50)
print(f"Dataset testado: Primeiras 50 perguntas do HealthBench-BR")
print(f"Providers testados: {len(all_results)} de {len(active_providers)} ativos")
print(f"Data/Hora: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

if all_results:
    print("\nResultados:")
    for result in sorted(all_results, key=lambda x: x['accuracy'], reverse=True):
        print(f"  {result['provider_name']}: {result['accuracy']:.2%} ({result['correct']}/50)")
    
    avg_accuracy = sum(r['accuracy'] for r in all_results) / len(all_results)
    print(f"\nAcurácia média: {avg_accuracy:.2%}")

print("\n✨ Teste concluído com sucesso!")
print("\nPara executar o teste completo com todo o dataset, use:")
print("python evaluate.py --provider <provider> --model <model> --html_report")

🎯 RESUMO FINAL DO TESTE
Dataset testado: Primeiras 50 perguntas do HealthBench-BR
Providers testados: 4 de 4 ativos
Data/Hora: 2025-10-07 09:38:39

Resultados:
  Maritaca-Sabia-3.1: 70.00% (35/50)
  Maritaca-Sabiazinho-3: 64.00% (32/50)
  GPT-4.1: 0.00% (0/50)
  GPT-5: 0.00% (0/50)

Acurácia média: 33.50%

✨ Teste concluído com sucesso!

Para executar o teste completo com todo o dataset, use:
python evaluate.py --provider <provider> --model <model> --html_report
