# Lab 2.1: Local RAG with Ollama - Solution

Complete working implementation of a local RAG system.

In [None]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.schema import Document

CONFIG = {
    "corpus_path": "data/corpus",
    "chunk_size": 1000,
    "chunk_overlap": 200,
    "embedding_model": "all-MiniLM-L6-v2",
    "llm_model": "llama2",
    "vector_db_path": "./chroma_db",
    "top_k": 5
}

In [None]:
# Sample documents
documents = [
    Document(page_content="""Retrieval Augmented Generation (RAG) is a technique that combines 
    information retrieval with text generation. It allows language models to access external 
    knowledge bases, reducing hallucinations and improving factual accuracy.""", 
    metadata={"source": "rag_intro.txt"}),
    Document(page_content="""Vector databases store embeddings and enable semantic search. 
    Popular options include Chroma, FAISS, and Elasticsearch.""", 
    metadata={"source": "vector_db.txt"}),
    Document(page_content="""Chunking strategies affect RAG performance. Common approaches include 
    fixed-size chunking, semantic chunking, and recursive splitting.""", 
    metadata={"source": "chunking.txt"})
]

print(f"✅ Loaded {len(documents)} documents")

In [None]:
# Chunking
splitter = RecursiveCharacterTextSplitter(
    chunk_size=CONFIG["chunk_size"],
    chunk_overlap=CONFIG["chunk_overlap"],
    separators=["\n\n", "\n", ". ", " "]
)
chunks = splitter.split_documents(documents)
print(f"✅ Created {len(chunks)} chunks")

In [None]:
# Embeddings
embeddings = HuggingFaceEmbeddings(
    model_name=CONFIG["embedding_model"],
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)
print("✅ Embeddings initialized")

In [None]:
# Vector store
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=CONFIG["vector_db_path"]
)
vectorstore.persist()
print("✅ Vector store created")

In [None]:
# Test retrieval
results = vectorstore.similarity_search("What is RAG?", k=3)
print(f"Retrieved {len(results)} documents")
for i, doc in enumerate(results):
    print(f"\n[{i+1}] {doc.page_content[:100]}...")

In [None]:
# LLM
llm = Ollama(model=CONFIG["llm_model"])
print("✅ LLM initialized")

In [None]:
# RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": CONFIG["top_k"]}),
    return_source_documents=True
)
print("✅ RAG chain created")

In [None]:
# Query function
def ask_question(question: str):
    result = qa_chain({"query": question})
    print(f"Q: {question}")
    print(f"A: {result['result']}")
    print(f"Sources: {[d.metadata['source'] for d in result['source_documents']]}")
    return result

# Test
ask_question("What is RAG?")
ask_question("What are vector databases?")

## Complete! ✅

You now have a working local RAG system with:
- Document loading and chunking
- Embeddings and vector storage
- Retrieval and generation
- Source attribution