## Implementation of a simple RAG system

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_core.runnables import RunnableLambda
import os
from dotenv import load_dotenv


ModuleNotFoundError: No module named 'langchain_community'

In [2]:
# Load PDFs
pdf_folder = Path("pdfs/")

all_docs = []
for pdf_file in pdf_folder.glob("*.pdf"):
    loader = PyPDFLoader(str(pdf_file))
    all_docs.extend(loader.load())

Ignoring wrong pointing object 37 0 (offset 0)
Ignoring wrong pointing object 42 0 (offset 0)
Ignoring wrong pointing object 91 0 (offset 0)
Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 37 0 (offset 0)


In [3]:
# Chunk the data using recursive chunking
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

chunks = splitter.split_documents(all_docs)

In [4]:
# Load embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [6]:
# Store the embeddings locally in ChromaDB

vectordb = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="./chroma_db"
)

vectordb.persist()

KeyError: '_type'

In [None]:
# Load model and tokenizer
model_name = "google/flan-t5-small"  # Or try flan-t5-base if M2 can handle it

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Create text2text pipeline (note: temperature may be ignored)
hf_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    device=-1
)

# Wrap in LangChain HuggingFace LLM (new path!)
hf_llm = HuggingFacePipeline(pipeline=hf_pipeline)

Device set to use cpu


In [None]:


retriever = vectordb.as_retriever()

qa_chain = RetrievalQA.from_chain_type(
    llm=hf_llm,
    retriever=retriever,
    chain_type="stuff"
)

In [None]:
response = hf_llm.invoke("What is ornithology?")
print(response)

ornithology ornithology


In [None]:
query = "What is ornithology?"

# ✅ RAG response (using LangChain's new `.invoke()` method)
response_rag = qa_chain.invoke(query)

# ✅ LLM-only response
ragless_prompt = f"Answer the following question:\n{query}"
response_llm_only = hf_llm.invoke(ragless_prompt)

# 📊 Display the results
print("🔹 LLM-Only Response:\n", response_llm_only)
print("\n" + "="*80 + "\n")
print("🔸 RAG-Augmented Response:\n", response_rag)

Token indices sequence length is longer than the specified maximum sequence length for this model (541 > 512). Running this sequence through the model will result in indexing errors
  test_elements = torch.tensor(test_elements)


KeyboardInterrupt: 

In [None]:


load_dotenv()  # take environment variables from .env.
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY is not set. Please set it in your .env file.")

In [25]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# ✅ LLM Setup
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# ✅ Retrieval setup (RAG)
retriever = vectordb.as_retriever(search_kwargs={"k": 3})
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)

# ✅ Query to compare
query = "What are the main functions of bird feathers?"

# 🔸 1. LLM-Only (No RAG)
llm_only_prompt = f"Answer the following question clearly and concisely:\n\n{query}"
llm_only_response = llm.invoke(llm_only_prompt)

# 🔹 2. RAG-Based (uses retrieved context)
rag_response = rag_chain.invoke(query)

# 🖥️ Print Results
print("🔸 LLM-Only Response:\n", llm_only_response.content)
print("\n" + "="*80 + "\n")
print("🔹 RAG-Augmented Response:\n", rag_response['result'])

🔸 LLM-Only Response:
 The main functions of bird feathers are insulation, flight, protection, and display.


🔹 RAG-Augmented Response:
 Bird feathers serve several important functions, including insulation to regulate body temperature, providing lift and aiding in flight, aiding in communication through displays and coloration, camouflage for protection, and waterproofing to maintain buoyancy and protect against water.
