In [1]:
# 0) Instalar dependencias
!pip install -q gradio pymupdf sentence-transformers faiss-cpu transformers torch accelerate pandas bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m45.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# ================================
# 1) Imports y configuración
# ================================
import os
import io
import hashlib
import time
import pandas as pd
import numpy as np
import fitz  # PyMuPDF
import faiss
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig

In [19]:
# ================================
# 2) Parámetros de modelos y runtime
# ================================
# Configuración de modelos

EMBEDDING_MODEL = "paraphrase-multilingual-mpnet-base-v2" #@param ["paraphrase-multilingual-mpnet-base-v2", "all-mpnet-base-v2", "all-MiniLM-L6-v2"]
LLM_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" #@param ["TinyLlama/TinyLlama-1.1B-Chat-v1.0", "facebook/opt-1.3b", "gpt2-medium"]

# Hyperparámetros del pipeline
CHUNK_MAX_WORDS = 200        # tamaño máximo del chunk en palabras
CHUNK_OVERLAP = 40           # overlap entre chunks para contexto
TOP_K = 2                    # reducido de 4 a 3 para mayor precisión
SIMILARITY_THRESHOLD = 0.68  # aumentado de 0.65 a 0.72 para mayor rigor
MAX_RETRIES = 1              # reintentos para GPT
EARLY_EXIT_THRESHOLD = 0.4   # Umbral para corte temprano
LOG_CSV = "/content/qa_logs.csv"  # archivo de logging

# Device config
USE_CUDA = torch.cuda.is_available()
DEVICE_STR = "cuda" if USE_CUDA else "cpu"
print(f"Device: {DEVICE_STR}")

# Configuración de cuantización para ahorrar memoria
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

Device: cpu


In [4]:
# ================================
# 3) Cargar modelos
# ================================
print("Cargando modelo de embeddings...")
embedder = SentenceTransformer(EMBEDDING_MODEL)
embedder.max_seq_length = 512  # por si acaso, podria ser menos quizas

print("Cargando LLM (puede tardar)...")
try:
    # Configuración del tokenizador
    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, use_fast=True)
    tokenizer.pad_token = tokenizer.eos_token  # Para evitar warnings

    # Cargar modelo con configuración de cuantización
    llm_model = AutoModelForCausalLM.from_pretrained(
        LLM_MODEL,
        quantization_config=bnb_config if USE_CUDA else None,
        device_map="auto",
        torch_dtype=torch.float16 if USE_CUDA else torch.float32
    )

    # Configurar pipeline de generación
    generator = pipeline(
        "text-generation",
        model=llm_model,
        tokenizer=tokenizer,
        do_sample=True,
        temperature=0.3,
        top_p=0.9,
        repetition_penalty=1.1,
        pad_token_id=tokenizer.eos_token_id
    )
    print("Modelo LLM cargado exitosamente!")
except Exception as e:
    print(f"Error al cargar el modelo LLM: {str(e)}")
    raise

Cargando modelo de embeddings...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/723 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/402 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Cargando LLM (puede tardar)...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cpu


Modelo LLM cargado exitosamente!


In [5]:
# ================================
# 4) Estado global e indexación incremental
# ================================
# Guardamos estructura: lista de documentos (chunks), embeddings en numpy, FAISS index
DOCS = []             # lista de strings (chunks)
EMBS = None           # numpy array NxD
FAISS_INDEX = None
CURRENT_DOC_HASH = None

def file_hash_bytes(b: bytes):
    h = hashlib.sha256()
    h.update(b)
    return h.hexdigest()

In [6]:
# ================================
# 5) Utilidades: limpieza y chunking
# ================================
def normalize_text(text: str) -> str:
    """Limpia y normaliza texto bruto."""
    # quita espacios múltiples, tabs y líneas vacías redundantes
    text = text.replace("\r", " ").replace("\t", " ")
    lines = [ln.strip() for ln in text.splitlines()]
    lines = [ln for ln in lines if ln]
    text = " ".join(lines)
    # opcional: más normalización (unicode, lower), pero mantenemos mayúsculas para QA exacta
    return " ".join(text.split())

def chunk_text(text: str, max_words=CHUNK_MAX_WORDS, overlap=CHUNK_OVERLAP):
    """Divide texto en chunks con overlap (por palabras)."""
    words = text.split()
    if len(words) <= max_words:
        return [text]
    chunks = []
    i = 0
    while i < len(words):
        chunk_words = words[i:i+max_words]
        chunks.append(" ".join(chunk_words))
        i += max_words - overlap
    return chunks

In [7]:
# ================================
# 6) Procesamiento e indexado del PDF
# ================================
def process_and_index_pdf_bytes(pdf_input):
    """Procesa PDF (desde diferentes fuentes), crea chunks, embeddings y construye FAISS index.
       Utiliza indexación incremental: si el hash del archivo no cambió, no reindexa.
    """
    global DOCS, EMBS, FAISS_INDEX, CURRENT_DOC_HASH

    try:
        # Método que funciona en Google Colab + Gradio
        # pdf_input tiene un atributo .name con la ruta del archivo temporal
        with open(pdf_input.name, "rb") as f:
            pdf_bytes = f.read()

    except AttributeError:
        # Fallback para otros tipos de input
        try:
            if hasattr(pdf_input, "read") and callable(getattr(pdf_input, "read", None)):
                pdf_bytes = pdf_input.read()
            elif isinstance(pdf_input, bytes):
                pdf_bytes = pdf_input
            else:
                return "Error: Tipo de archivo no soportado"
        except Exception as e:
            return f"Error al leer archivo: {str(e)}"
    except Exception as e:
        return f"Error al acceder al archivo: {str(e)}"

    # Verificar que tenemos bytes válidos
    if not isinstance(pdf_bytes, bytes) or len(pdf_bytes) == 0:
        return "Error: No se pudieron obtener datos válidos del PDF"

    # Calcular hash para indexación incremental
    try:
        new_hash = file_hash_bytes(pdf_bytes)
        if CURRENT_DOC_HASH == new_hash and FAISS_INDEX is not None:
            return f"No hubo cambios en el PDF (hash igual). Index existente reutilizado. Chunks: {len(DOCS)}"
    except Exception as e:
        return f"Error al calcular hash: {str(e)}"

    # ---  Abrir PDF y extraer texto ---
    try:
        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
        raw_text = []

        for page in doc:
            txt = page.get_text("text")
            if txt:
                raw_text.append(txt)

        doc.close()

    except Exception as e:
        return f"Error al abrir PDF: {str(e)}. Verifica que el archivo no esté corrupto."

    if not raw_text:
        return "Error: No se pudo extraer texto del PDF. Puede estar vacío o ser un PDF de imágenes."

    full_text = "\n\n".join(raw_text)

    # Normalizar texto
    try:
        full_text = normalize_text(full_text)
    except Exception as e:
        return f"Error al normalizar texto: {str(e)}"

    # ---  Chunking con solapamiento ---
    chunks = []
    try:
        for paragraph in full_text.split("\n\n"):
            paragraph = paragraph.strip()
            if not paragraph:
                continue
            generated = chunk_text(paragraph, max_words=CHUNK_MAX_WORDS, overlap=CHUNK_OVERLAP)
            chunks.extend([chunk for chunk in generated if len(chunk.split()) >= 8])

    except Exception as e:
        return f"Error en chunking: {str(e)}"

    if not chunks:
        return "Error: No se pudieron crear chunks útiles del texto extraído."

    DOCS = chunks

    # ---  Embeddings ---
    try:
        batch_size = 32
        embs_list = []
        for i in range(0, len(DOCS), batch_size):
            batch = DOCS[i:i+batch_size]
            emb = embedder.encode(batch, convert_to_tensor=False, show_progress_bar=False, normalize_embeddings=True)
            embs_list.append(emb)
        EMBS = np.vstack(embs_list).astype("float32")

    except Exception as e:
        return f"Error al crear embeddings: {str(e)}"

    # Normalizar vectores
    try:
        faiss.normalize_L2(EMBS)
    except Exception as e:
        return f"Error al normalizar embeddings: {str(e)}"

    # --- Construir FAISS index ---
    try:
        dim = EMBS.shape[1]
        FAISS_INDEX = faiss.IndexFlatIP(dim)
        FAISS_INDEX.add(EMBS)

    except Exception as e:
        return f"Error al crear índice FAISS: {str(e)}"

    CURRENT_DOC_HASH = new_hash
    return f"✅ PDF procesado exitosamente: {len(DOCS)} chunks, dimensión embeddings {dim}"

In [8]:
# ================================
# 7) Búsqueda de contexto
# ================================
def search_top_k(query: str, top_k: int = TOP_K):
    """Recupera top_k fragments y sus scores (cosine) para la pregunta"""
    if FAISS_INDEX is None or EMBS is None:
        return [], []

    q_emb = embedder.encode([query], convert_to_tensor=False, normalize_embeddings=True)
    q_arr = np.array(q_emb, dtype="float32")
    faiss.normalize_L2(q_arr)
    distances, indices = FAISS_INDEX.search(q_arr, top_k)
    # distances are inner products (cosine since normalized)
    idxs = indices[0].tolist()
    scores = distances[0].tolist()
    fragments = [DOCS[i] for i in idxs]
    return fragments, scores

def search_top_k_with_indices(query: str, top_k: int = TOP_K):
    """Recupera top_k fragments, sus scores y los índices (para obtener embeddings ya calculados)."""
    if FAISS_INDEX is None or EMBS is None:
        return [], [], []

    q_emb = embedder.encode([query], convert_to_tensor=False, normalize_embeddings=True)
    q_arr = np.array(q_emb, dtype="float32")
    faiss.normalize_L2(q_arr)
    distances, indices = FAISS_INDEX.search(q_arr, top_k)
    idxs = indices[0].tolist()
    scores = distances[0].tolist()
    fragments = [DOCS[i] for i in idxs]
    return fragments, scores, idxs

def mean_embedding_for_indices(idxs):
    """Devuelve el embedding promedio (L2-normalizado) para los indices de fragments seleccionados."""
    if EMBS is None or len(idxs) == 0:
        return None
    vecs = EMBS[idxs]  # ya es numpy float32 y normalizado si hicimos faiss.normalize_L2(EMBS)
    mean = np.mean(vecs, axis=0)
    # normalizar L2
    norm = np.linalg.norm(mean)
    if norm == 0:
        return mean
    return (mean / norm).astype("float32")

In [9]:
# ================================
# 8) Evaluador de similitud (BERT/SBERT)
# ================================
def clean_answer(answer: str, question: str) -> str:
    # Eliminar repeticiones del contexto
    if "Contexto:" in answer:
        answer = answer.split("Contexto:")[0].strip()

    # Eliminar texto redundante
    stop_phrases = ["basado en el contexto", "según el documento"]
    for phrase in stop_phrases:
        answer = answer.replace(phrase, "")

    # Capitalización correcta
    if answer and answer[-1] not in {".", "!", "?"}:
        answer += "."

    return answer.strip()

def semantic_similarity(a: str, b: str):
    """Devuelve cosine similarity entre dos textos usando embedder"""
    return util.cos_sim(embedder.encode(a), embedder.encode(b)).item()

In [10]:
# ================================
# 9) Generador GPT con refinamiento y logging
# ================================
from time import perf_counter

def generate_gpt_answer(prompt: str, max_length=200, sample=True):
    """Generación con control (respuestas más cortas por default)."""
    try:
        # ajustar sampling para debug/performance
        gen_kwargs = {
            "max_new_tokens": max_length,
            "pad_token_id": tokenizer.eos_token_id,
            "do_sample": sample,
            "temperature": 0.3 if sample else 0.0,
            "top_p": 0.9 if sample else 1.0,
            "num_return_sequences": 1
        }
        t0 = perf_counter()
        output = generator(prompt, **gen_kwargs)
        gen_time = perf_counter() - t0

        full_text = output[0]['generated_text']
        # intentar sacar la porción después del prompt
        answer = full_text.replace(prompt, "").strip()
        # fallback: si quedó vacío, tomar todo y limpiar
        if not answer:
            answer = full_text.strip()
        return answer, gen_time
    except Exception as e:
        print("Error en generación:", e)
        return "ERROR_GENERACION", 0.0


def generate_answer_with_refinement(question: str, top_k=TOP_K, threshold=SIMILARITY_THRESHOLD, max_retries=MAX_RETRIES, log=True):
    ts = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

    if not question or len(question.split()) < 2:
        return "Por favor formula una pregunta más específica."

    if FAISS_INDEX is None or not DOCS:
        return "Primero debes subir y procesar un documento PDF."

    # Medir búsqueda
    t0 = perf_counter()
    fragments, frag_scores, idxs = search_top_k_with_indices(question, top_k=top_k)
    t_search = perf_counter() - t0

    if not fragments or max(frag_scores) < EARLY_EXIT_THRESHOLD:
        if log:
            _append_log(question, "No tengo información sobre este tema en los documentos.", "", [], [], ts, note="early_exit")
        return "No tengo información sobre este tema en los documentos."

    # Precomputar embedding del contexto (media de embeddings de fragments seleccionados)
    t0 = perf_counter()
    emb_context = mean_embedding_for_indices(idxs)  # numpy float32 normalizado
    # convertir a tensor en GPU si posible (para usar util.cos_sim más rápido)
    use_torch = False
    if emb_context is None:
        return "No pude calcular embedding del contexto."
    try:
        import torch as _torch
        if _torch.cuda.is_available():
            emb_context_t = _torch.from_numpy(emb_context).to(_torch.float16).unsqueeze(0).to('cuda')
            use_torch = True
        else:
            emb_context_t = _torch.from_numpy(emb_context).unsqueeze(0)
    except Exception:
        emb_context_t = None
    t_emb_context = perf_counter() - t0

    best_answer = None
    best_score = -1.0
    attempts_info = []

    # loop de intentos
    for attempt in range(1, max_retries + 1):
        try:
            # timeout check simple (reduce si querés)
            # Construir prompt pero **mantener context corto** para el LLM: limitar a por ejemplo 8000 chars
            short_context = " ".join([f[:1000] for f in fragments])  # recorta fragments largos
            prompt = build_optimized_prompt(question, short_context)

            # Generar
            ans, gen_time = generate_gpt_answer(prompt, max_length=80, sample=False)  # sample=False para velocidad
            ans = clean_answer(ans, question)

            # Si modelo dice directamente que no tiene info, salir
            if "No tengo información suficiente" in ans:
                attempts_info.append({"attempt": attempt, "answer": ans, "score": 0.0, "gen_time": gen_time})
                best_answer = ans
                best_score = 0.0
                break

            # Calcular embedding de la respuesta (rapido)
            t0 = perf_counter()
            # usar embedder.encode en modo vector (convert_to_tensor True) es rápido y usa GPU si está disponible
            ans_emb = embedder.encode(ans, convert_to_tensor=True, normalize_embeddings=True)
            t_ans_emb = perf_counter() - t0

            # calcular similitud (usa tensores para speed si tenemos emb_context_t)
            if use_torch and emb_context_t is not None:
                # convertir ans_emb a cuda
                ans_emb_t = ans_emb.to('cuda')
                sim = util.cos_sim(ans_emb_t, emb_context_t).item()
            else:
                # convertir ambos a numpy y dot product
                ans_emb_np = ans_emb.cpu().numpy() if hasattr(ans_emb, "cpu") else np.array(ans_emb)
                sim = float(np.dot(ans_emb_np, emb_context))

            attempts_info.append({"attempt": attempt, "answer": ans, "score": float(sim), "gen_time": gen_time, "ans_emb_time": t_ans_emb})

            # lógica para elegir mejor respuesta
            if sim > best_score:
                best_score = sim
                best_answer = ans

            if sim >= threshold:
                break

        except Exception as e:
            print(f"Intento {attempt} fallo:", e)
            attempts_info.append({"attempt": attempt, "answer": f"Error: {e}", "score": 0.0})

    final = best_answer if best_answer else "No encontré información suficiente para responder."

    if log:
        _append_log(question, final, " ".join(fragments), fragments, attempts_info, ts,
                   note="ok" if best_score >= threshold else "low_score", best_score=float(best_score) if best_score!=-1.0 else None)

    # prints de performance (útiles durante debug)
    print(f"[TIMINGS] search={t_search:.3f}s emb_ctx={t_emb_context:.3f}s attempts={len(attempts_info)}")

    return final


def build_optimized_prompt(question, context):
    """Prompt optimizado para respuestas rápidas"""
    return (
        "Responde concisamente (máximo 2 oraciones) basado EXCLUSIVAMENTE en este contexto:\n"
        f"Contexto: {context}\n\n"
        f"Pregunta: {question}\n\n"
        "Si no puedes responder con certeza, di exactamente: 'No tengo información suficiente en los documentos para responder a eso.'\n"
        "Respuesta:"
    )

In [11]:
# ================================
# 10) Logging de preguntas/respuestas
# ================================
def _append_log(question, final_answer, context, fragments, attempts_info, timestamp, note="", best_score=None):
    row = {
        "timestamp": timestamp,
        "question": question,
        "final_answer": final_answer,
        "best_score": best_score,
        "note": note,
        "context_excerpt": context[:500],
        "fragments_count": len(fragments),
        "attempts_info": str(attempts_info)
    }
    df = pd.DataFrame([row])
    if not os.path.exists(LOG_CSV):
        df.to_csv(LOG_CSV, index=False)
    else:
        df.to_csv(LOG_CSV, mode="a", header=False, index=False)

In [12]:
# ================================
# 11) Interfaz Gradio
# ================================
import gradio as gr

def gr_upload_and_index(pdf_file):
    """Handler para subir PDF desde Gradio (archivo)."""
    if pdf_file is None:
        return "No se subió ningún archivo."

    # Pasar el objeto file directamente - tiene atributo .name con la ruta
    return process_and_index_pdf_bytes(pdf_file)

def gr_answer_question(question, top_k=TOP_K, threshold=SIMILARITY_THRESHOLD, max_retries=MAX_RETRIES):
    if not question or not question.strip():
        return "Ingresá una pregunta válida."
    # llamar pipeline
    return generate_answer_with_refinement(question, top_k=int(top_k), threshold=float(threshold), max_retries=int(max_retries))

with gr.Blocks() as demo:
    gr.Markdown("# NotebookLM-lite (Híbrido BERT + GPT) ✅\nCarga PDFs y pregunta sobre su contenido.")
    with gr.Row():
        with gr.Column(scale=1):
            pdf_input = gr.File(label="Subí tu PDF", file_types=[".pdf"])
            status = gr.Textbox(label="Estado", interactive=False, value="Subí un PDF para comenzar...")
            gr.Markdown("### Ajustes:")
            topk = gr.Slider(minimum=1, maximum=8, value=TOP_K, step=1, label="Top-K fragments")
            umbral = gr.Slider(minimum=0.1, maximum=0.95, value=SIMILARITY_THRESHOLD, step=0.01, label="Umbral aceptación (similitud)")
            retries = gr.Slider(minimum=0, maximum=4, value=MAX_RETRIES, step=1, label="Máx reintentos GPT")
        with gr.Column(scale=2):
            question = gr.Textbox(label="Tu pregunta", placeholder="Ej: ¿Cuál es la metodología usada? ...")
            ask = gr.Button("Preguntar", variant="primary")
            answer = gr.Textbox(label="Respuesta", interactive=False, lines=8)
            show_logs = gr.Button("Mostrar últimos logs (CSV)")

    # Solo procesamiento automático al subir
    pdf_input.upload(gr_upload_and_index, inputs=pdf_input, outputs=status)
    ask.click(gr_answer_question, inputs=[question, topk, umbral, retries], outputs=answer)

    # mostrar logs
    def read_logs(n=10):
        if not os.path.exists(LOG_CSV):
            return "Aún no hay logs."
        df = pd.read_csv(LOG_CSV)
        return df.tail(n).to_string(index=False)

    show_logs.click(read_logs, outputs=status)

In [13]:
# Iniciar demo
demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://882b1d2a53fa2136ac.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [17]:
import pandas as pd
from datetime import datetime
import time

# === Evaluador de respuestas ===
def evaluar_respuesta(respuesta_obtenida, respuesta_esperada, tipo_pregunta):
    if tipo_pregunta == "fuera_dominio":
        if "no tengo información" in respuesta_obtenida.lower():
            return 5
        else:
            return 1
    else:
        respuesta_obtenida_clean = respuesta_obtenida.lower().strip()
        respuesta_esperada_clean = respuesta_esperada.lower().strip()

        if (respuesta_obtenida_clean == respuesta_esperada_clean or
            respuesta_esperada_clean in respuesta_obtenida_clean):
            return 5

        elementos_clave = [word for word in respuesta_esperada_clean.split() if len(word) > 3]

        if not elementos_clave:
            palabras_comunes = set(respuesta_obtenida_clean.split()) & set(respuesta_esperada_clean.split())
            return 3 if palabras_comunes else 1

        coincidencias = sum(1 for word in elementos_clave if word in respuesta_obtenida_clean)
        proporcion_coincidencias = coincidencias / len(elementos_clave)

        if proporcion_coincidencias >= 0.75:
            return 4
        elif proporcion_coincidencias >= 0.5:
            return 3
        elif proporcion_coincidencias >= 0.25:
            return 2
        else:
            return 1

# === Script de pruebas masivas ===
def pruebas_masivas(csv_entrada, csv_salida):
    df = pd.read_csv(csv_entrada)

    resultados = []
    for _, row in df.iterrows():
        tipo = row["tipo_pregunta"]
        pregunta = row["pregunta"]
        esperado = row["respuesta_esperada"]

        # Medir tiempo de respuesta
        inicio = time.time()
        obtenido = gr_answer_question(pregunta)  # <- tu función principal
        fin = time.time()
        tiempo_respuesta = fin - inicio

        puntaje = evaluar_respuesta(obtenido, esperado, tipo)

        resultados.append({
            "tipo_pregunta": tipo,
            "pregunta": pregunta,
            "respuesta_esperada": esperado,
            "respuesta_obtenida": obtenido,
            "puntaje": puntaje,
            "tiempo_seg": round(tiempo_respuesta, 3)
        })

    df_result = pd.DataFrame(resultados)

    # Métricas generales
    promedio_puntaje = df_result["puntaje"].mean()
    aciertos = (df_result["puntaje"] >= 4).sum()
    porcentaje_aciertos = (aciertos / len(df_result)) * 100
    promedio_tiempo = df_result["tiempo_seg"].mean()

    # Guardar resultados
    df_result.to_csv(csv_salida, index=False)
    print(f"Resultados guardados en {csv_salida}")
    print(f"Promedio de puntaje: {promedio_puntaje:.2f}")
    print(f"Aciertos (puntaje >= 4): {aciertos} / {len(df_result)} ({porcentaje_aciertos:.2f}%)")
    print(f"Tiempo promedio por respuesta: {promedio_tiempo:.3f} segundos")

In [18]:
# === Ejemplo de uso ===
pruebas_masivas(
    csv_entrada="/content/drive/MyDrive/Proyectos/Versiones ChatBot/BERT/preguntas_prueba.csv",
    csv_salida=f"/content/drive/MyDrive/Proyectos/Versiones ChatBot/BERT/resultados_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
)

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.157s emb_ctx=0.006s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.127s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.118s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.092s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.213s emb_ctx=0.001s attempts=1
[TIMINGS] search=0.128s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.151s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.132s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.164s emb_ctx=0.003s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.195s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.100s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.116s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.140s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.173s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.119s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.138s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.128s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.114s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.131s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.119s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.136s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.119s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.096s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.161s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.146s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.128s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.095s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.149s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.100s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.144s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.156s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.136s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.218s emb_ctx=0.001s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.144s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.099s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.160s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.146s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.125s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.122s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.131s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.145s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.100s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.161s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.171s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.161s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.141s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.187s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.152s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.120s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.162s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.145s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.153s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.142s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.159s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.127s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.154s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.138s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.163s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.122s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.136s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.118s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.130s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.168s emb_ctx=0.001s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.135s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.120s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.131s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.234s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.095s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.147s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.126s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.127s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.190s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.210s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.136s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.167s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.141s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.094s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.094s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.172s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.142s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.117s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.110s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.125s emb_ctx=0.000s attempts=1
[TIMINGS] search=0.171s emb_ctx=0.000s attempts=1


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[TIMINGS] search=0.242s emb_ctx=0.000s attempts=1
Resultados guardados en /content/drive/MyDrive/Proyectos/Versiones ChatBot/BERT/resultados_20250811_214126.csv
Promedio de puntaje: 2.48
Aciertos (puntaje >= 4): 33 / 102 (32.35%)
Tiempo promedio por respuesta: 55.483 segundos
