# RAG Pipeline A/B Evaluation (Model-only)

This notebook loads your project and evaluates the **LLM-only** behavior of `rag.pipeline.RAGPipeline.answer` for a fixed set of queries, comparing two Ollama models (A/B).

In [1]:
import os

#OpenBLAS Warning : Detect OpenMP Loop and this application may hang...
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("GOTO_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")

# silence HF advisory warnings & progress bars
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

from transformers.utils import logging as hf_logging
hf_logging.set_verbosity_error()

# (optional) quiet sentence-transformers too
import logging
logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
logging.getLogger("transformers").setLevel(logging.ERROR)


  from .autonotebook import tqdm as notebook_tqdm


# RAG ENV VARIABLE

In [2]:
import os
def init_env_defaults() -> None:
    s = os.environ.setdefault
    nproc = str(os.cpu_count() or 8)
    # ---- defaults (override via env) ----
    
    s("RAG_ART_INDEX_PATH", "src/st3405_data/index/st4305_text_bgem3.faiss")
    s("RAG_ART_STORE_PATH", "src/st3405_data/index/st4305_store.pkl.gz")
    s("RAG_CTX_BUDGET_CHARS", "10000")
    
    s("RAG_HYBRID_ALPHA", "0.5")      # weight for dense in fusion (0..1)
    s("RAG_QUERY_EXPAND", "1")        # 1=enable LLM query reformulation via Ollama
    s("RAG_QUERY_EXPAND_K", "0.5")    # keep as in script
    s("RAG_PRF_ENABLE", "1")
    s("RAG_PRF_TERMS", "8")
    #
    # s("RAG_OLLAMA_MODEL", "llama3:latest")
    s("RAG_OLLAMA_MODEL", "llama3.2:3b")
    s("RAG_RERANK_TIMEOUT", "15.0")
    s("RAG_SELECT_TIMEOUT", "10.0")
    s("RAG_QUERY_VARIATIONS", "10")

    s("RAG_OLLAMA_NUM_PREDICT", "512")

    s("RAG_MIN_CTX", "2")
    s("RAG_MAX_CTX", "3")

    s("RAG_CANDIDATE_K_FOR_RERANK", "10")
    s("RAG_FINAL_TOP_N", "3")

    s("RAG_DISABLE_SELECT", "1")

    # Models / batching
    
    s("RAG_EMBEDDER_MODEL", "BAAI/bge-m3")
    s("RAG_EMBED_DEVICE", "cpu")
    s("RAG_EMBED_BATCH", "1")    
    s("RAG_EMBED_MAX_LENGTH", "256")

    #"BAAI/bge-reranker-base"
    # or jinaai/jina-reranker-v2-base-multilingual
    #naver/xprovence-reranker-bgem3-v1
    #naver/provence-reranker-debertav3-v1
    s("RAG_RERANKER_MODEL", "BAAI/bge-reranker-base")  
    
    # s("RAG_RERANKER_DEVICE", "cuda")
    # s("RAG_RERANKER_WINDOW", "384")
    # s("RAG_RERANKER_STRIDE", "256")
    # s("RAG_RERANKER_FP16", "1")
    # s("RAG_RERANKER_PAD_MAX", "0")
    # s("RAG_RERANKER_WINDOW_BATCH", "1")

    #cpu
    s("RAG_RERANKER_DEVICE", "cpu")
    s("RAG_RERANKER_WINDOW", "384")
    s("RAG_RERANKER_STRIDE", "256")
    s("RAG_RERANKER_FP16", "0")
    s("RAG_RERANKER_WINDOW_BATCH", "8")    
    s("RAG_RERANKER_QUANTIZE", "1")      
    s("TOKENIZERS_PARALLELISM", "true")    
    s("OMP_NUM_THREADS", nproc)
    s("MKL_NUM_THREADS", os.environ.get("OMP_NUM_THREADS", nproc))
    s("OPENBLAS_NUM_THREADS", os.environ.get("OMP_NUM_THREADS", nproc))
    s("NUMEXPR_NUM_THREADS", os.environ.get("OMP_NUM_THREADS", nproc))

    s("RAG_RERANKER_MAX_LEN", "384")
    s("RAG_RERANKER_AGG", "max")    

    s("RAG_DISABLE_RERANK", "1")
    s("RAG_MAX_VARIANTS", "4")

    # Timeouts / perf
    s("RAG_EXPAND_TIMEOUT", "10.0")
    s("RAG_BM25_PARALLEL", "1")
    s("RAG_BM25_WORKERS", "16")

    s("RAG_DENSE_K_PER_QUERY", "10")
    s("RAG_SPARSE_K_PER_QUERY", "10")
    s("RAG_ENV_DUMPED", "0")
    s("RAG_FAISS_GPU", "0")

    # CUDA allocator
    s("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
    s("CUDA_VISIBLE_DEVICES", "1")
    
    s("KMP_AFFINITY", "granularity=fine,compact,1,0")

    try:
        import torch
        torch.set_grad_enabled(False)
        torch.set_num_threads(int(os.environ["OMP_NUM_THREADS"]))
        torch.set_num_interop_threads(1)
    except Exception:
        pass



# Chame no startup da sua app:
init_env_defaults()


In [3]:
import os
from pathlib import Path
import sys
from pathlib import Path

def get_root_path():
    """Always use the same, absolute (relative to root) paths

    which makes moving the notebooks around easier.
    """
    return Path(os.getcwd()).parent

PROJECT_DIR = Path(get_root_path())
assert PROJECT_DIR.exists(), PROJECT_DIR

if str(PROJECT_DIR) not in sys.path:
    sys.path.insert(0, str(PROJECT_DIR))

print("Using project at:", PROJECT_DIR)

Using project at: /mnt/dados/projetos/gps-tracking-rag-assistent-v1/src


# Avalia√ß√£o do RagPipeline com Conjunto de Perguntas (Homologa√ß√£o)

In [4]:

import json
from pathlib import Path
import pandas as pd

DATASET = Path("eval_queries.jsonl")
RESULTS_DIR = Path("eval_results"); RESULTS_DIR.mkdir(exist_ok=True, parents=True)

rows = [json.loads(line) for line in DATASET.read_text(encoding="utf-8").splitlines()]
df = pd.DataFrame(rows)

display(df.head(10))
print("Total de exemplos:", len(df))

Unnamed: 0,id,category,query,answer,gold_doc_ids,gold_refs_hint,section_key
0,Q01,Comandos/Par√¢metros,Qual o comando para reiniciar o dispositivo vi...,Use o comando 'Reboot' (reinicia o dispositivo).,[20.envio de comandos],[20. ENVIO DE COMANDOS ‚Äî Comando Reboot.],20.envio de comandos
1,Q02,Comandos/Par√¢metros,Qual o comando para habilitar a Sa√≠da 1?,Use o comando 'Enable1' (ativa a Sa√≠da 1).,[20.envio de comandos],[20. ENVIO DE COMANDOS ‚Äî Enable1 / Disable1.],20.envio de comandos
2,Q03,Comandos/Par√¢metros,Como consultar a vers√£o do firmware do rastrea...,Envie 'ReqVer' com Option=1 para solicitar a v...,[20.envio de comandos],[20. ENVIO DE COMANDOS ‚Äî ReqVer (Option=1).],20.envio de comandos
3,Q04,Comandos/Par√¢metros,Como iniciar a calibra√ß√£o DPA por comando?,Use 'Start DPA Calibration' para iniciar e 'St...,[20.envio de comandos],[20. ENVIO DE COMANDOS ‚Äî Comandos de calibra√ß√£...,20.envio de comandos
4,Q05,Comandos/Par√¢metros,Existe comando para consultar o status do anti...,Sim. Utilize 'Get anti theft status' para cons...,[20.envio de comandos],[20. ENVIO DE COMANDOS ‚Äî Consulta antifurto.],20.envio de comandos
5,Q06,Configura√ß√µes de interface,Como habilitar o Fine Tracking no ST4305?,Par√¢metro (3010) ‚Äî Habilitar Fine Tracking: 01...,[27.configurando fine tracking],[27. CONFIGURANDO FINE TRACKING ‚Äî (3010) Habil...,27.configurando fine tracking
6,Q07,Configura√ß√µes de interface,Qual par√¢metro define o intervalo entre posi√ß√µ...,Par√¢metro (3011) ‚Äî Intervalo de posi√ß√µes GPS e...,[27.configurando fine tracking],[27. CONFIGURANDO FINE TRACKING ‚Äî (3011) Inter...,27.configurando fine tracking
7,Q08,Configura√ß√µes de interface,Como definir a quantidade de posi√ß√µes enviadas...,Par√¢metro (3012) ‚Äî Quantidade de posi√ß√µes por ...,[27.configurando fine tracking],[27. CONFIGURANDO FINE TRACKING ‚Äî (3012) Quant...,27.configurando fine tracking
8,Q09,Configura√ß√µes de interface,Quais campos podem ser habilitados no cabe√ßalh...,"No modo Small Table, √© poss√≠vel habilitar camp...",[24.configura√ß√£o de cabe√ßalhos (stt e alt)],[24. CONFIGURA√á√ÉO DE CABE√áALHOS (STT E ALT) ‚Äî ...,24.configura√ß√£o de cabe√ßalhos (stt e alt)
9,Q10,Configura√ß√µes de interface,Como configurar a APN de dados (par√¢metros de ...,"Na se√ß√£o de Par√¢metro de Rede, configure APN, ...",[7.par√¢metro de rede],[7. PAR√ÇMETRO DE REDE ‚Äî APN/dados m√≥veis.],7.par√¢metro de rede


Total de exemplos: 20


# RagPipeline

In [5]:
# src/api/api_manager.py
from __future__ import annotations

import gzip
import os
import pickle
from pathlib import Path
from typing import List, Dict

import faiss
from rank_bm25 import BM25Okapi

# RAG plumbing (optimized service uses sentence selection + compact prompts)
from rag.config import RetrievalConfig
from rag.pipeline import RAGPipeline
from rag.config import RetrievalConfig, Timeouts, BuildLimits

# Rerankers / LLM / embedder
from rag.rerank.hf_cross_encoder import HFCrossEncoderLongReranker
from rag.ollama import OllamaClient
from rag.init import get_embedder

# Tokenizer used to build/search BM25
from rag.utils import tokenize

# Retrievers
from rag.retrievers.dense_faiss import FaissRetriever
from rag.retrievers.sparse_bm25 import BM25Retriever
from rag.retrievers.multiquery_hybrid import MultiQueryHybridRetriever
from rag.query_expander import QueryExpander
from rag.types import DocStore

@staticmethod
def _env_int(name: str, default: int) -> int:
    try:
        return int(os.getenv(name, default))
    except Exception:
        return default

In [6]:
# Load FAISS index (GPU if available) ----
faiss_path = PROJECT_DIR / "st3405_data/index/st4305_text_bgem3.faiss"
if not Path(faiss_path).exists():
    raise FileNotFoundError(f"FAISS index not found at {faiss_path}")

index = faiss.read_index(str(faiss_path))
try:
    if os.getenv("RAG_FAISS_GPU", "0") == "1":
        if faiss.get_num_gpus() > 0:
            res = faiss.StandardGpuResources()
            index = faiss.index_cpu_to_gpu(res, 0, index)
            index_device = "gpu"
except Exception:
    # GPU not available or FAISS GPU build not installed ‚Üí stay on CPU
    index_device = "cpu"

# IVF/HNSW search breadth (no-op for Flat)
try:
    if hasattr(index, "nprobe"):
        index.nprobe = max(32, int(0.1 * getattr(index, "nlist", 100)))
    if hasattr(index, "hnsw"):
        index.hnsw.efSearch = int(os.getenv("RAG_FAISS_EFSEARCH", "256"))
except Exception:
    pass

# Load store: docs (text), meta, ids, and stored model name ----
store_path = PROJECT_DIR / "st3405_data/index/st4305_store.pkl.gz"
if not Path(store_path).exists():
    raise FileNotFoundError(f"Store file not found at {store_path}")

with gzip.open(store_path, "rb") as f:
    store = pickle.load(f)

docs: List[str] = store["docs"]
meta: List[dict] = store["meta"]
ids:  List[str] = store["ids"]

embedder_model_name = store.get("model", os.getenv("RAG_EMBEDDER_MODEL"))

assert index.ntotal == len(docs) == len(meta) == len(ids), (
    f"Index/store size mismatch: faiss={index.ntotal}, docs={len(docs)}, meta={len(meta)}, ids={len(ids)}"
)

# id -> metadata/text
id_to_meta = {ids[i]: {**meta[i], "doc_id": ids[i]} for i in range(len(ids))}
doc_store: DocStore = {ids[i]: docs[i] for i in range(len(ids))}

# ---- 4) Embedder (normalize=True for cosine/IP) ----
emb_model = get_embedder(embedder_model_name, use_fp16=True, normalize=True)

# ---- 5) BM25 over the docs using the SAME tokenizer ----
tokenized = [tokenize(t) for t in docs]
bm25 = BM25Okapi(
    tokenized,
    k1=float(os.getenv("RAG_BM25_K1", "1.4")),
    b=float(os.getenv("RAG_BM25_B", "0.4")),
)

# ---- 6) LLM client (Ollama) ----
ollama = OllamaClient(
    base_url=os.getenv("RAG_OLLAMA_BASE_URL", "http://localhost:11434"),
    model=os.getenv("RAG_OLLAMA_MODEL", "llama3:latest"),
    timeout=int(os.getenv("RAG_OLLAMA_TIMEOUT", "120")),
)

# ---- 7) Retrieval configuration ----
cfg = RetrievalConfig(
    query_variations=_env_int("RAG_QUERY_VARIATIONS", 5),
    dense_k_per_query=_env_int("RAG_DENSE_K_PER_QUERY", 80),
    sparse_k_per_query=_env_int("RAG_SPARSE_K_PER_QUERY", 80),
    rrf_k_const=_env_int("RAG_RRF_K_CONST", 50),
    candidate_k_for_rerank=_env_int("RAG_CANDIDATE_K_FOR_RERANK", 160),
    final_top_n=_env_int("RAG_FINAL_TOP_N", 12),
)

# ---- 8) Build retriever stack (dense + sparse ‚Üí per-query hybrid ‚Üí multi-query hybrid) ----
dense = FaissRetriever(faiss_index=index, id_map=ids, embedder=emb_model)
sparse = BM25Retriever(bm25=bm25, id_map=ids, tokenize_fn=tokenize)

use_expand = os.getenv("RAG_QUERY_EXPAND", "1") == "1"
expander = QueryExpander(llm=ollama) if use_expand else None

retriever = MultiQueryHybridRetriever(
    dense=dense,
    sparse=sparse,
    expander=expander,
    rrf_k=cfg.rrf_k_const,
    per_query_k=max(cfg.dense_k_per_query, cfg.sparse_k_per_query),
    final_limit=cfg.candidate_k_for_rerank,
)

# ---- 10) Build the optimized RAG service ----

tmo = Timeouts()          # uses env defaults
limits = BuildLimits()    # uses env defaults

rag_pipeline = RAGPipeline(
    retriever=retriever,
    embedder=emb_model,
    llm=ollama,
    doc_store=doc_store,
    cfg=cfg,
    tmo=tmo,
    limits=limits,
    meta_store=id_to_meta
)

# M√©tricas (Retrieval & Ranking)

In [7]:
import math

def precision_at_k(relevant: set, ranked_ids: list, k: int = 5):
    k = min(k, len(ranked_ids))
    if k == 0: return 0.0
    hits = sum(1 for i in range(k) if ranked_ids[i] in relevant)
    return hits / k

def recall_at_k(relevant: set, ranked_ids: list, k: int = 5):
    if not relevant: return 0.0
    k = min(k, len(ranked_ids))
    hits = sum(1 for i in range(k) if ranked_ids[i] in relevant)
    return hits / len(relevant)

def mrr(relevant: set, ranked_ids: list):
    for idx, rid in enumerate(ranked_ids, start=1):
        if rid in relevant:
            return 1.0 / idx
    return 0.0

def hit_rate_at_k(relevant: set, ranked_ids: list, k: int = 5):
    k = min(k, len(ranked_ids))
    return 1.0 if any(ranked_ids[i] in relevant for i in range(k)) else 0.0

def dcg_at_k(gains: list, k: int):
    return sum((gains[i] / math.log2(i+2)) for i in range(min(k, len(gains))))

def ndcg_at_k(relevant_ordered: list, ranked_ids: list, k: int = 5):
    ideal = [1.0]*len(relevant_ordered)  # bin√°rio
    gains = [1.0 if rid in set(relevant_ordered) else 0.0 for rid in ranked_ids]
    idcg = dcg_at_k(ideal, k)
    if idcg == 0: return 0.0
    return dcg_at_k(gains, k) / idcg

# M√©tricas (Gera√ß√£o de Texto) ‚Äî BLEU e ROUGE-L

In [8]:
def ngrams(tokens, n): 
    return list(zip(*[tokens[i:] for i in range(n)]))

def bleu(candidate, reference, max_n=4):
    cand, ref = candidate.split(), reference.split()
    if not cand: return 0.0
    precisions = []
    for n in range(1, max_n+1):
        cand_ngrams, ref_ngrams = ngrams(cand, n), ngrams(ref, n)
        ref_counts = {ng:ref_ngrams.count(ng) for ng in ref_ngrams}
        match = sum(min(cand_ngrams.count(ng), ref_counts.get(ng,0)) for ng in set(cand_ngrams))
        precisions.append(match / max(1, len(cand_ngrams)))
    bp = 1.0 if len(cand)>len(ref) else math.exp(1 - len(ref)/max(1,len(cand)))
    gm = math.exp(sum(math.log(p) for p in precisions if p>0)/len(precisions)) if any(p>0 for p in precisions) else 0.0
    return bp*gm

def _lcs(a,b):
    la, lb = len(a), len(b)
    dp=[[0]*(lb+1) for _ in range(la+1)]
    for i in range(la):
        for j in range(lb):
            if a[i]==b[j]: dp[i+1][j+1]=dp[i][j]+1
            else: dp[i+1][j+1]=max(dp[i][j+1],dp[i+1][j])
    return dp[la][lb]

def rouge_l(candidate, reference):
    cand, ref = candidate.split(), reference.split()
    if not cand or not ref: return 0.0
    lcs = _lcs(cand, ref)
    prec, rec = lcs/len(cand), lcs/len(ref)
    beta = 1.2**2
    return ((1+beta)*prec*rec)/(rec+beta*prec) if (rec+beta*prec)>0 else 0.0

# Execu√ß√£o da Avalia√ß√£o

# Normaliza√ß√£o e constru√ß√£o do GOLD

In [9]:
import unicodedata
from typing import Dict, Iterable, List, Set
import re

def _norm_id(x: str) -> str:
    """Normaliza identificadores de documentos de forma agn√≥stica."""
    if not isinstance(x, str):
        x = str(x)
    return unicodedata.normalize("NFKC", x).strip().lower()

def normalize_ids(ids: Iterable[str]) -> List[str]:
    """Aplica normaliza√ß√£o a uma lista de IDs."""
    return [_norm_id(i) for i in ids if i is not None]

def build_gold_map(row) -> Dict[str, float]:
    """
    Constr√≥i o mapa GOLD de doc_id -> ganho (peso).
    - Usa APENAS section_key e gold_doc_ids do dataset.
    - Suporta opcionalmente 'gold_gains' (dict) para pesos graduados.
    """
    gold_ids: Set[str] = set()

    # 1) section_key (string) ‚Äî um √∫nico doc ‚Äúcorreto‚Äù principal
    sk = row.get("section_key")
    if isinstance(sk, str) and sk.strip():
        gold_ids.add(_norm_id(sk))

    # 2) gold_doc_ids (lista) ‚Äî outros docs aceitos como corretos
    g = row.get("gold_doc_ids")
    if isinstance(g, list):
        for x in g:
            if isinstance(x, str) and x.strip():
                gold_ids.add(_norm_id(x))

    # 3) Se nada foi especificado, for√ßamos corre√ß√£o do dataset (falha expl√≠cita)
    if not gold_ids:
        raise ValueError(
            f"[Dataset inv√°lido] A linha id={row.get('id')} n√£o tem 'section_key' nem 'gold_doc_ids'. "
            "Preencha um dos dois para calcular m√©tricas corretamente."
        )

    # 4) Ganhos graduados (opcional). Se n√£o vier, tudo = 1.0 (bin√°rio).
    gains_cfg = row.get("gold_gains") or {}
    gold_map: Dict[str, float] = {}
    for doc_id in gold_ids:
        gain = gains_cfg.get(doc_id, 1.0)
        try:
            gain = float(gain)
        except Exception:
            gain = 1.0
        gold_map[doc_id] = gain

    return gold_map

def command_match_ratio(gold_answer: str, pred_answer: str) -> float:
    """
    Retorna propor√ß√£o de comandos 'CMD;..' presentes no pred_answer.
    Se n√£o houver comandos no gold, retorna NaN.
    """
    cmds = re.findall(r"[A-Z]{2,4};[0-9;]+", gold_answer or "")
    if not cmds:
        return float("nan")
    pred = pred_answer or ""
    hits = sum(1 for c in cmds if c in pred)
    return hits / len(cmds)

# Utilitarios para 



In [10]:
# ==== Detectores + M√©tricas adaptativas para gera√ß√£o ====
import re
import math
from typing import Dict, List, Tuple
import unicodedata

_CMD_RE = re.compile(r"[A-Z]{2,4};[0-9;]+")
_PARAM_ID_RE = re.compile(r"\((\d{3,4})\)")   # captura (3010), (1055), etc.
_NUM_UNIT_RE = re.compile(
    r"(?P<num>(?:\d+[.,]?\d*))\s*(?P<unit>mhz|dbm|v|vc{1,2}|s|ms|km/h|ma|¬µa|mah|ghz|kbps|mbps)\b",
    flags=re.IGNORECASE
)
_BAND_RE = re.compile(r"\bband(as)?\s*:?|\b\b(?:banda|band)\b", re.IGNORECASE)

def _norm_txt(x: str) -> str:
    if not isinstance(x, str): x = str(x or "")
    return unicodedata.normalize("NFKC", x).strip()

def detect_qtype(row) -> str:
    """Heur√≠stica agn√≥stica baseada no GOLD (answer + hint + query)."""
    gold = " ".join([
        _norm_txt(row.get("answer","")),
        " ".join(row.get("gold_refs_hint", [])) if isinstance(row.get("gold_refs_hint"), list) else _norm_txt(row.get("gold_refs_hint","")),
        _norm_txt(row.get("query",""))
    ])
    if _CMD_RE.search(gold) or re.search(r"\b(reboot|reqimsi|reqver|enable\d|disable\d|preset|initmsgno)\b", gold, re.I):
        return "command"
    if _PARAM_ID_RE.search(gold) or re.search(r"\b(apn|porta|cabe√ßalho|stt|table|perfil)\b", gold, re.I):
        return "config"
    if _NUM_UNIT_RE.search(gold) or _BAND_RE.search(gold) or re.search(r"\bmhz|dbm|vcc|v\b", gold, re.I):
        return "specs"
    # fallback sem penalizar
    return "general"

def extract_cmds(text: str) -> List[str]:
    return _CMD_RE.findall(text or "")

def command_match_ratio(gold_answer: str, pred_answer: str) -> float:
    g = extract_cmds(gold_answer); 
    if not g: return float("nan")
    hits = sum(1 for c in g if c in (pred_answer or ""))
    return hits/len(g)

def command_exact_match(gold_answer: str, pred_answer: str) -> float:
    g, p = set(extract_cmds(gold_answer)), set(extract_cmds(pred_answer))
    if not g: return float("nan")
    return 1.0 if p.issuperset(g) else 0.0

def command_jaccard(gold_answer: str, pred_answer: str) -> float:
    g, p = set(extract_cmds(gold_answer)), set(extract_cmds(pred_answer))
    if not g and not p: return float("nan")
    inter = len(g & p); uni = len(g | p)
    return (inter/uni) if uni else float("nan")

def extract_numbers_units(text: str) -> List[Tuple[str,str]]:
    """Retorna lista de (numero_normalizado, unidade_normalizada)."""
    res = []
    for m in _NUM_UNIT_RE.finditer(text or ""):
        num = m.group("num").replace(",", ".")
        unit = m.group("unit").lower()
        res.append((num, unit))
    return res

def numeric_unit_match(gold_answer: str, pred_answer: str) -> float:
    """Propor√ß√£o de (n√∫mero,unidade) do GOLD presentes no pred (como substring)."""
    gold_pairs = extract_numbers_units(gold_answer)
    if not gold_pairs: 
        return float("nan")
    pred = (pred_answer or "").lower()
    hits = 0
    for num, unit in gold_pairs:
        if (num.lower() in pred) and (unit.lower() in pred):
            hits += 1
    return hits/len(gold_pairs)

def list_overlap_ratio(gold_items: List[str], pred_text: str) -> float:
    """Checa a presen√ßa de cada item (ex.: bandas LTE) no texto predito (case-insensitive)."""
    if not gold_items:
        return float("nan")
    pred = (pred_text or "").lower()
    hits = sum(1 for it in gold_items if _norm_txt(it).lower() in pred)
    return hits/len(gold_items)

def bands_from_text(text: str) -> List[str]:
    """Extrai bandas do tipo '1 [2100MHz], 3 [1800MHz], 5 [850MHz], 28 [700MHz]' como lista '1','3','5','28'."""
    if not isinstance(text, str): return []
    bands = re.findall(r"\b(\d{1,2})\s*\[\s*\d+\s*mhz\s*\]", text, flags=re.I)
    if bands:
        return bands
    # fallback: s√≥ n√∫meros isolados precedidos de 'banda'/'band'
    if _BAND_RE.search(text or ""):
        bands = re.findall(r"\b(\d{1,2})\b", text)
    return bands

def compute_generation_metrics(row, pred_answer: str) -> Dict[str, float]:
    """Retorna m√©tricas adequadas ao tipo detectado. NaN quando n√£o aplic√°vel."""
    gold_answer = _norm_txt(row.get("answer",""))
    qtype = detect_qtype(row)
    out = {"gen_type": qtype}

    # if qtype == "command":
    #     out["cmd_match"]   = command_match_ratio(gold_answer, pred_answer)
    #     out["cmd_exact"]   = command_exact_match(gold_answer, pred_answer)
    #     out["cmd_jaccard"] = command_jaccard(gold_answer, pred_answer)
    #     # BLEU/ROUGE se quiser manter:
    #     out["bleu"]    = bleu(pred_answer, gold_answer) if gold_answer.strip() else float("nan")
    #     out["rouge_l"] = rouge_l(pred_answer, gold_answer) if gold_answer.strip() else float("nan")
    #     return out

    # if qtype == "config":
    #     out["num_unit_match"] = numeric_unit_match(gold_answer, pred_answer)
    #     # checagem de presen√ßa de IDs de par√¢metros (3010 etc.)
    #     gold_pids = set(_PARAM_ID_RE.findall(gold_answer))
    #     if gold_pids:
    #         hits = sum(1 for pid in gold_pids if f"({pid})" in (pred_answer or ""))
    #         out["param_id_cover"] = hits/len(gold_pids)
    #     else:
    #         out["param_id_cover"] = float("nan")
    #     out["bleu"]    = bleu(pred_answer, gold_answer) if gold_answer.strip() else float("nan")
    #     out["rouge_l"] = rouge_l(pred_answer, gold_answer) if gold_answer.strip() else float("nan")
    #     return out

    # if qtype == "specs":        
    #     out["bleu"]    = bleu(pred_answer, gold_answer) if gold_answer.strip() else float("nan")
    #     out["rouge_l"] = rouge_l(pred_answer, gold_answer) if gold_answer.strip() else float("nan")
    #     return out

    # general
    out["bleu"]    = bleu(pred_answer, gold_answer) if gold_answer.strip() else float("nan")
    out["rouge_l"] = rouge_l(pred_answer, gold_answer) if gold_answer.strip() else float("nan")

    return out

def safe_bertscore(hyp: str, ref: str, lang: str = "pt"):
    from bert_score import score as bertscore_score
    """
    Retorna (P, R, F1) como floats ou (None, None, None) se indispon√≠vel.
    """
    hyp = hyp or ""
    ref = ref or ""
    try:
        P, R, F1 = bertscore_score(
            [hyp], [ref],
            lang=lang,
            rescale_with_baseline=True,
            batch_size=8,
            device="cpu",
        )
        # tensores -> float
        return float(P.mean()), float(R.mean()), float(F1.mean())
    except Exception as e:
        print(f"‚ö†Ô∏è Erro ao calcular BERTScore: {e}")
        return None, None, None



In [11]:
os.environ.setdefault("RAG_LOG_ENABLED","0")

'0'

# Avaliacao das metricas

In [12]:
# ==== Execu√ß√£o (N quest√µes) + m√©tricas (inclui BERTScore) + salvamento em JSONL ====
import time, json
from pathlib import Path
import pandas as pd


K_LIST = [1, 3, 5]
records, latencies = [], []

RESULTS_DIR = RESULTS_DIR
res_path = RESULTS_DIR / "rag_pipeline_results.jsonl"


# Limpa arquivo anterior (opcional)
if res_path.exists():
    res_path.unlink()

rows_iter = df.iterrows()  # ex.: df.iterrows() para todas

for i, row in rows_iter:
    qid         = row["id"]
    query       = row["query"]
    gold_answer = row.get("answer", "") or ""

    print(f"\nüîç Pergunta {qid}: {query}")

    # Execu√ß√£o do pipeline (end-to-end)
    t0 = time.time()
    result = rag_pipeline.answer(query)
    t1 = time.time()
    lat = t1 - t0
    latencies.append(lat)

    # Resposta e docs retornados pelo pipeline
    answer          = (result.get("answer") or "").strip()
    ranked_ids_raw  = result.get("docs", []) or []
    ranked_ids      = normalize_ids(ranked_ids_raw)   # <<< sua normaliza√ß√£o
    top1_score      = float(result.get("score_top1", 0.0) or 0.0)

    #print(f"\nüß† Resposta gerada:\n{answer}\n")
    #print(f"üìÑ Docs recuperados: {ranked_ids}")

    # === GOLD determin√≠stico (sem heur√≠stica/df_docs) ===
    gold_map = build_gold_map(row)     # dict: doc_id_normalizado -> ganho
    relevant = set(gold_map.keys())    # conjunto para m√©tricas bin√°rias

    # === M√©tricas de retrieval ===
    rec = {
        "id": qid,
        "category": row.get("category", ""),
        **{f"precision@{k}": precision_at_k(relevant, ranked_ids, k) for k in K_LIST},
        **{f"recall@{k}":    recall_at_k(relevant,  ranked_ids, k) for k in K_LIST},
        "mrr": mrr(relevant, ranked_ids),
        **{f"hit@{k}":  hit_rate_at_k(relevant, ranked_ids, k) for k in K_LIST},
        **{f"ndcg@{k}": ndcg_at_k(list(relevant), ranked_ids, k) for k in K_LIST},
    }

    # === M√©tricas de gera√ß√£o existentes (ex.: BLEU/ROUGE) ===
    genm = compute_generation_metrics(row, answer)  # mant√©m sua fun√ß√£o atual
    rec.update(genm)

    # === BERTScore (P/R/F1) por quest√£o ===
    bsP, bsR, bsF1 = safe_bertscore(answer, gold_answer, lang="pt")
    rec["bertscore_P"]  = bsP
    rec["bertscore_R"]  = bsR
    rec["bertscore_F1"] = bsF1

    # Extras √∫teis para auditoria
    rec["gen_answer"]     = answer
    rec["gold_answer"]    = gold_answer
    rec["score_top1"]     = top1_score
    rec["docs"]           = ranked_ids
    rec["latency_sec"]    = lat
    rec["gold_refs_hint"] = row["gold_refs_hint"] if "gold_refs_hint" in row else ""

    records.append(rec)

# Tabela consolidada
records_df = pd.DataFrame(records)

# Salvar JSONL
with res_path.open("w", encoding="utf-8") as f:
    for r in records:
        json.dump(r, f, ensure_ascii=False)
        f.write("\n")

# Sum√°rio r√°pido
avg_latency = (sum(latencies) / len(latencies)) if latencies else float("nan")
print(f"\n‚úÖ Resultados salvos em: {res_path}")
print(f"‚è±Ô∏è Lat√™ncia m√©dia (s/query): {avg_latency:.3f}")
print(f"üß™ Linhas avaliadas: {len(records_df)}")



üîç Pergunta Q01: Qual o comando para reiniciar o dispositivo via envio de comandos?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4144.57it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.16it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7476.48it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.37it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7839.82it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.48it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7612.17it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.86it/s]



üîç Pergunta Q02: Qual o comando para habilitar a Sa√≠da 1?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4124.19it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  8.50it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7449.92it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  8.12it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6909.89it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.20it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5714.31it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.72it/s]



üîç Pergunta Q03: Como consultar a vers√£o do firmware do rastreador por comando?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4152.78it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.70it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7182.03it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.67it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6786.90it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.73it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5090.17it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.51it/s]



üîç Pergunta Q04: Como iniciar a calibra√ß√£o DPA por comando?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3711.77it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.83it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7145.32it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.53it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7332.70it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  9.69it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7543.71it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  9.47it/s]



üîç Pergunta Q05: Existe comando para consultar o status do antifurto?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4815.50it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.71it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4505.16it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  8.02it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6732.43it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.12it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6955.73it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.55it/s]



üîç Pergunta Q06: Como habilitar o Fine Tracking no ST4305?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5282.50it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.83it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7463.17it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.57it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 8719.97it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.35it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6909.89it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.47it/s]



üîç Pergunta Q07: Qual par√¢metro define o intervalo entre posi√ß√µes GPS no Fine Tracking?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3347.41it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.71it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6543.38it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.78it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 8305.55it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  9.26it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6864.65it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  9.84it/s]



üîç Pergunta Q08: Como definir a quantidade de posi√ß√µes enviadas por relat√≥rio no Fine Tracking?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4064.25it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.21it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6462.72it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.28it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7294.44it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.66it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5518.82it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.99it/s]



üîç Pergunta Q09: Quais campos podem ser habilitados no cabe√ßalho STT (Small Table)?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4136.39it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.09it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6700.17it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.99it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6921.29it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.54it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 8305.55it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.41it/s]



üîç Pergunta Q10: Como configurar a APN de dados (par√¢metros de rede)?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3063.77it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.33it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6452.78it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.14it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 8112.77it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 12.79it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7145.32it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.88it/s]



üîç Pergunta Q11: Como configurar ou desabilitar a senha de acesso no Synctrak?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3960.63it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.24it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5652.70it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.36it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5203.85it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.13it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6864.65it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  9.42it/s]



üîç Pergunta Q12: Como adicionar o ID do motorista usando 1-Wire?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3792.32it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.82it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5629.94it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.79it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6061.13it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.65it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6472.69it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  9.52it/s]



üîç Pergunta Q13: Como remover um ID do motorista previamente cadastrado?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4401.16it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.88it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4798.97it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.76it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6241.52it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.11it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6492.73it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.02it/s]



üîç Pergunta Q14: Como ler (identificar) o ID do motorista presente no ve√≠culo?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4350.94it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.45it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5275.85it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.91it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7037.42it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.22it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6132.02it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.08it/s]



üîç Pergunta Q15: Onde verificar o status do GPS/WWAN e do cart√£o SIM para diagn√≥stico?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4190.11it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.85it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5210.32it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.86it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7584.64it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.94it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5216.80it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.75it/s]



üîç Pergunta Q16: Quais bandas de 2G e LTE s√£o suportadas pelo ST4305/ST8300?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3423.92it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.50it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4987.28it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.53it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7332.70it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.72it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5223.29it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.49it/s]



üîç Pergunta Q17: Qual √© a faixa de temperatura de opera√ß√£o do equipamento?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3269.14it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  8.07it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4609.13it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  8.02it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 8272.79it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 12.54it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6605.20it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.59it/s]



üîç Pergunta Q18: Qual √© a tens√£o de alimenta√ß√£o principal suportada?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3344.74it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  8.04it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6034.97it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.84it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 8097.11it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.23it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 8793.09it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 12.31it/s]



üîç Pergunta Q19: Qual a capacidade de mem√≥ria FIFO de posi√ß√µes?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 4284.27it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.86it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6355.01it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  8.05it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6932.73it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.05it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7530.17it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.73it/s]



üîç Pergunta Q20: O produto √© homologado por algum √≥rg√£o regulat√≥rio?


pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 3685.68it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.82it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6853.44it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  7.57it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 5793.24it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.58it/s]
pre tokenize: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 7958.83it/s]
Inference Embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 12.66it/s]



‚úÖ Resultados salvos em: eval_results/rag_pipeline_results.jsonl
‚è±Ô∏è Lat√™ncia m√©dia (s/query): 17.175
üß™ Linhas avaliadas: 20


# Sum√°rios (Global e por Categoria) em .jsonl

In [13]:
# ==== Sum√°rios Global e por Categoria em JSONL ====
import pandas as pd
import json

DEC = 3 

res_path = RESULTS_DIR / "rag_pipeline_results.jsonl"
sum_cat_path = RESULTS_DIR / "summary_by_category.jsonl"
sum_glb_path = RESULTS_DIR / "summary_global.jsonl"

# Carrega o JSONL recem-salvo
pred = [json.loads(l) for l in res_path.read_text(encoding="utf-8").splitlines() if l.strip()]
df_pred = pd.DataFrame(pred)

agg_cols = [c for c in [
    "precision@1","precision@3","precision@5",
    "recall@1","recall@3","recall@5",
    "mrr","hit@1","hit@3","hit@5",
    "ndcg@1","ndcg@3","ndcg@5",
    "bleu","rouge_l",
    "bertscore_P","bertscore_R","bertscore_F1",
] if c in df_pred.columns]

# Global
summary_global = df_pred[agg_cols].mean(numeric_only=True).to_frame("mean").reset_index().rename(columns={"index":"metric"})
summary_global["mean"] = summary_global["mean"].round(DEC)

# Por categoria (se houver)
cat_col = "category" if "category" in df_pred.columns else None
if cat_col:
    summary_by_cat = df_pred.groupby(cat_col)[agg_cols].mean(numeric_only=True).reset_index()
    summary_by_cat[agg_cols] = summary_by_cat[agg_cols].round(DEC)
else:
    summary_by_cat = pd.DataFrame()

# Salvar como JSONL
with sum_glb_path.open("w", encoding="utf-8") as f:
    for _, row in summary_global.iterrows():
        json.dump(row.to_dict(), f, ensure_ascii=False)
        f.write("\n")

with sum_cat_path.open("w", encoding="utf-8") as f:
    for _, row in summary_by_cat.iterrows():
        json.dump(row.to_dict(), f, ensure_ascii=False)
        f.write("\n")

print(f"‚úÖ summary_global.jsonl ‚Üí {sum_glb_path}")
print(f"‚úÖ summary_by_category.jsonl ‚Üí {sum_cat_path}")


‚úÖ summary_global.jsonl ‚Üí eval_results/summary_global.jsonl
‚úÖ summary_by_category.jsonl ‚Üí eval_results/summary_by_category.jsonl
