In [29]:
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma 
from langchain_community.llms import HuggingFaceHub
from langchain.llms import Ollama
from langchain.vectorstores import FAISS
from langchain import hub
from langchain.chains import create_retrieval_chain
from langchain.chains import RetrievalQA

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
from langchain_community.chat_models import ChatOllama

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "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" 
os.environ["LANGCHAIN_PROJECT"] = "My RAG Project"
os.environ["USER_AGENT"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
print("Libraries imported successfully!")

Libraries imported successfully!


In [30]:
loader = WebBaseLoader("https://www.promptingguide.ai/research/rag")
documents = loader.load()
print(f"Loaded {len(documents)} document(s) from the web page.")

Loaded 1 document(s) from the web page.


In [31]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)
print(f"Original document split into {len(splits)} chunks.")

Original document split into 63 chunks.


In [33]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
print("HuggingFace Embeddings model initialized.")
vectorstore = FAISS.from_documents(splits, embeddings)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever()
)
vectorstore.save_local("faiss_index")
print(f"Vector store saved successfully to faiss_index folder.")

HuggingFace Embeddings model initialized.
Vector store saved successfully to faiss_index folder.


In [34]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
print("Retriever created from the vector store.")
llm =Ollama(model= "tinyllama")
repo_id="mistralai/Mistral-7B-Instruct-v0.2",
model_kwargs={"temperature": 0.5, "max_length": 512} 
print(f"LLM initialized: Ollama with model {llm.model}")

Retriever created from the vector store.
LLM initialized: Ollama with model tinyllama


In [35]:
rag_prompt = hub.pull("rlm/rag-prompt")
print("RAG prompt pulled from LangChain Hub.")
combine_docs_chain = create_stuff_documents_chain(llm, rag_prompt)
print("Combine documents chain created.")
rag_chain = ({
    "context": itemgetter("input") | retriever,
     "question": itemgetter("input"),
    }
              | combine_docs_chain
)
print("Full RAG chain created.")

RAG prompt pulled from LangChain Hub.
Combine documents chain created.
Full RAG chain created.


In [36]:
query ="define RAG?"
response= qa_chain.run(query)
print("ANSWER:\n",response)

  response= qa_chain.run(query)


ANSWER:
 The research papers below highlight key insight and latest development trends in RAG systems, which include the development of advanced paradigms for customization and further performance and utility across a wide range of domains. There is a huge demand for RAG applications that have accelerated the development of methods to improve different components of RAG systems, including hybrid methodologies, self-retrieval techniques, evaluation tools, and metrics. The figure below provides a recap of the RAG ecosystem, techniques to enhance RAG, challenges, and other related aspects covered in these overviews:

As introduced here (opens in a new tab), RAG can be defined as:

RAG takes input and retrieves a set of relevant/supporting documents given a source (e.g., Wikipedia). The documents are concatenated as context with the original input prompt, and RAG produces the final output. This makes RAG adaptive for situations where facts could evolve over time. RAG allows language models