In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from src.embeddings import create_and_persist_embeddings, load_embeddings
from src.model import load_model
from src.retriever import create_qa_chain
import src.config as config

# 1. Cargar y procesar el PDF
loader = PyPDFLoader(config.PDF_PATH)
docs = loader.load()
splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=650,
    chunk_overlap=85,
    length_function=len
)
fragmentos = splitter.split_documents(docs)

In [None]:
# 2. Crear o cargar embeddings
vectordb = create_and_persist_embeddings(fragmentos, config.PERSIST_DIRECTORY)

In [None]:
# 3. Cargar el modelo de lenguaje
llm = load_model(config.LLM_MODEL_NAME)

In [None]:
# 4. Crear el QA Chain
qa_chain = create_qa_chain(llm, vectordb.as_retriever())

In [None]:
# 5. Hacer una consulta
question = "¿Qué es un ábaco?"
result = qa_chain({"query": question})

In [None]:
# 6. Mostrar resultados
print("Respuesta:", result["result"])
for idx, doc in enumerate(result["source_documents"], 1):
    print(f"Documento {idx}: {doc.page_content}")