### RAG (PDF) â€” LangChain 0.3+ + Ollama (Llama 3.2)This notebook implements a very simple PDF-only RAG pipeline.

In [None]:
# Imports
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
#from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv

In [None]:
load_dotenv()

In [None]:
# CONFIG
PDF_PATH = "./docs/myfile.pdf"
CHROMA_DIR = "./my_chroma_db"

EMBED_MODEL = "text-embedding-3-large"
LLM_MODEL = "gpt-5-nano"

CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200

In [None]:
# LOAD PDF
loader = PyPDFLoader(PDF_PATH)

docs = loader.load()
print('Documents loaded:', len(docs))
print(docs[2].page_content)
print(docs[2].metadata)


In [None]:
# CHUNK
splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP
)
chunks = splitter.split_documents(docs)

print('Document chunks created:', len(chunks))

print('Document chunks :', chunks[0])


In [None]:
# VECTOR DB with Granite embeddings
embeddings = OpenAIEmbeddings(model=EMBED_MODEL)

vector_db = Chroma(
    collection_name="pdf_collection",
    embedding_function=embeddings,
    persist_directory=CHROMA_DIR,
)

vector_db.add_documents(chunks)

In [None]:
# LLM (Llama 3.2)
llm = ChatOpenAI(model=LLM_MODEL)

In [None]:
# PROMPT (new RAG pattern)
prompt = ChatPromptTemplate.from_template(
    """
Use ONLY the context below to answer the question.

Context:
{context}

Question:
{question}
""")

In [None]:
# FINAL RAG FUNCTION (manual RAG)
def rag(question):
    # 1. retrieve top chunks
    retriever = vector_db.as_retriever(search_kwargs={"k": 3})
    docs = retriever.invoke(question)

    # 2. combine retrieved chunks
    context = "\n\n".join([d.page_content for d in docs])

    # 3. create prompt
    final_prompt = prompt.invoke({"context": context, "question": question})

    # 4. send to Llama 3.2
    return llm.invoke(final_prompt)


In [None]:
# CHAT LOOP
while True:
    q = input("You: ")
    if q.lower() == "exit":
        break

    answer = rag(q)
    print("\nAnswer:\n", answer.content)