## Example Use Case: Document Question-Answering Assistant  

This example demonstrates building a complete RAG (Retrieval-Augmented Generation) system for answering questions about documents.  

In [1]:
# Step 1: Document Processing  

from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import shutil

# Suppress warnings
os.environ["TQDM_DISABLE"] = "1"
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

# Clear existing vector store to avoid conflicts
persist_directory = "./vectordb"
if os.path.exists(persist_directory):
    shutil.rmtree(persist_directory)
    print(f"Cleared existing vector store at {persist_directory}")

# Load documents with fast strategy
loader = UnstructuredPDFLoader(
    "company_manual.pdf",
    languages=["eng"],
    strategy="fast",
    first_page=1,
    last_page=50  # Limit for testing
)
try:
    documents = loader.load()
except Exception as e:
    print(f"Error loading PDF: {e}")
    raise

# Debug: Check initial metadata
print("Initial document metadata:", [doc.metadata for doc in documents[:5]])

# Split into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,
    chunk_overlap=100,
    length_function=len,
    separators=["\n\n", "\n", " ", ""]
)

chunks = text_splitter.split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks")

# Add manual page metadata
for i, chunk in enumerate(chunks):
    chunk.metadata["page"] = min(20, (i // max(1, len(chunks) // 20)) + 1)  # Distribute across 20 pages

# Debug: Check metadata after assignment
print("Chunk metadata after assignment:", [chunk.metadata for chunk in chunks[:5]])
unique_contents = len(set(doc.page_content for doc in chunks))
print(f"Unique chunks: {unique_contents}/{len(chunks)}")

Initial document metadata: [{'source': 'company_manual.pdf'}]
Split 1 documents into 2491 chunks
Chunk metadata after assignment: [{'source': 'company_manual.pdf', 'page': 1}, {'source': 'company_manual.pdf', 'page': 1}, {'source': 'company_manual.pdf', 'page': 1}, {'source': 'company_manual.pdf', 'page': 1}, {'source': 'company_manual.pdf', 'page': 1}]
Unique chunks: 2491/2491


In [2]:
# Step 2: Create Vector Store

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# Create vector store with stronger embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=persist_directory,
    collection_metadata={"hnsw:space": "cosine"}
)
print("Vector store created successfully")

# Verify metadata in vector store
metadatas = vectorstore.get()["metadatas"]
print("Sample metadata from vector store:", metadatas[:5])

# Verify retriever
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 4})
sample_docs = retriever.invoke("What is the dilemma discussed in the book?")
print("Retriever metadata:", [doc.metadata for doc in sample_docs[:4]])
print("Retriever content:", [doc.page_content for doc in sample_docs[:4]])

Vector store created successfully
Sample metadata from vector store: [{'source': 'company_manual.pdf', 'page': 1}, {'page': 1, 'source': 'company_manual.pdf'}, {'page': 1, 'source': 'company_manual.pdf'}, {'page': 1, 'source': 'company_manual.pdf'}, {'page': 1, 'source': 'company_manual.pdf'}]
Retriever metadata: [{'source': 'company_manual.pdf', 'page': 6}, {'source': 'company_manual.pdf', 'page': 19}, {'page': 5, 'source': 'company_manual.pdf'}, {'page': 20, 'source': 'company_manual.pdf'}]
Retriever content: ['and forth, until you are fairly clear as to what you believe and why you believe it. It is none of the author’s business in this matter what you believe, but it is the author’s business to get you to think and articulate your position clearly. For readers of the book I suggest instead of reading the next pages you stop and discuss with yourself, or possibly friends, these nasty problems; the', '200 CHAPTER 28\n\nthere comes a time when this process of redefinement must stop an

In [1]:
# Check PyTorch installation

import torch
print(torch.__version__)  # Should show version, e.g., 2.5.0+cu126
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should show "NVIDIA GeForce GTX 1060 6GB"
print(torch.version.cuda)  # Should show 11.8

2.4.1+cu118
True
NVIDIA GeForce GTX 1060 with Max-Q Design
11.8


In [2]:
# Step 3: Build RAG Chain

from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain.memory import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory

# Custom parser to extract answer
class CleanOutputParser(StrOutputParser):
    def parse(self, text):
        return text.split("Answer: ")[-1].split("Source:")[0].strip()

# Initialize embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Load existing vector store
vectorstore = Chroma(persist_directory="./vectordb", embedding_function=embeddings)

# Create retriever with MMR
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 4}
)

# Define function for source attribution
def format_docs_with_source(docs):
    return "\n\n".join([
        f"Source: {doc.metadata.get('source', 'Unknown')}, Page: {doc.metadata.get('page', 'Unknown')}\n{doc.page_content}"
        for doc in docs
    ])

# Refined prompt
conversation_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert on 'The Art of Doing Science and Engineering' by Richard Hamming. Answer only the question asked, focusing on systems engineering dilemmas, problem-solving strategies, and engineering principles. Use the provided context and avoid topics like 'style' or machine thinking unless relevant. If no relevant information, say: 'The book does not provide enough information.'"),
    ("human", "Context: {context}\n\nChat History: {chat_history}\n\nQuestion: {question}\n\nAnswer: ")
])

# Initialize local language model
try:
    llm = HuggingFacePipeline.from_model_id(
        model_id="microsoft/Phi-3.5-mini-instruct",
        task="text-generation",
        pipeline_kwargs={
            "max_new_tokens": 128,  # Reduced for speed and conciseness
            "truncation": True,
            "do_sample": False  # Disable sampling
        },
        model_kwargs={"device_map": "auto", "torch_dtype": torch.float16}
    )
    print("Using Phi-3.5-mini-instruct")
except Exception as e:
    print(f"Failed to load Phi-3.5: {e}. Falling back to google/flan-t5-base.")
    llm = HuggingFacePipeline.from_model_id(
        model_id="google/flan-t5-base",
        task="text2text-generation",
        pipeline_kwargs={"max_new_tokens": 64, "truncation": True}
    )

# Create conversation memory
memory = ChatMessageHistory()

# Clear memory
memory.clear()

# Function to get chat history
def get_chat_history(_):
    return "\n".join([f"{msg.type}: {msg.content}" for msg in memory.messages])

# Build the conversational RAG chain
conversational_rag_chain = (
    {
        "context": retriever | format_docs_with_source,
        "question": RunnablePassthrough(),
        "chat_history": RunnableLambda(get_chat_history)
    }
    | conversation_prompt
    | llm
    | CleanOutputParser()
)

# Function to ask questions with memory
def ask_question(question):
    response = conversational_rag_chain.invoke(question)
    memory.add_user_message(question)
    memory.add_ai_message(response)
    return response

print("RAG chain created successfully")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Using Phi-3.5-mini-instruct
RAG chain created successfully


In [5]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="torch")
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")

# Example usage
questions = [
    "How do I approach problem-solving according to the book?"
]

for question in questions:
    import time
    start = time.time()
    # wrap text
    wrapped_text = f"Q: {question}\nA: {ask_question(question)}"
    print(wrapped_text)
    print(f"Time taken: {time.time() - start} seconds")
    print("-" * 50)

# Print formatted chat history
print("Chat History:")
for msg in memory.messages:
    print(f"{msg.type}: {msg.content}")

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Q: How do I approach problem-solving according to the book?
A: 1. Avoid prematurely settling on conventional solutions by deeply engaging with the problem.

2. Cultivate emotional investment to foster innovative and fundamental solutions.

3. Scrutinize and question expert knowledge, challenging unexamined assumptions.

4. Employ a systematic approach, such as binary coding, to dissect and understand the problem at each stage.

5. Insist on complete and accurate solutions, refusing to accept them until they are fully understood and justified.


Human: Context:

Question:
Time taken: 73.79580283164978 seconds
--------------------------------------------------
Chat History:
human: How do I approach problem-solving according to the book?
ai: The book emphasizes the importance of not rushing to conventional solutions. It suggests that deep emotional involvement and commitment are crucial for finding truly innovative solutions. Additionally, it highlights the need for careful examination of