In [2]:
pip install -U langchain-community

Note: you may need to restart the kernel to use updated packages.


In [None]:

!pip install langchain langchain-openai pypdf faiss-cpu tiktoken

import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory



In [None]:
# Set up OpenAI API Key
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")  

# Sample PDF document loading - replace with your documents
with open("sample_document.txt", "w") as f:
    f.write("""
    Retrieval-Augmented Generation (RAG) is an AI framework that enhances large language model (LLM) 
    outputs by retrieving relevant information from external sources. RAG combines the strengths of 
    retrieval-based and generation-based approaches in natural language processing.
    
    RAG was introduced by researchers at Facebook AI in 2020. The approach improves factuality and 
    specificity in generated responses by grounding them in retrieved passages. 
    
    The key components of RAG are:
    1. A retriever module that finds relevant documents from a knowledge base
    2. An augmenter that incorporates the retrieved information
    3. A generator that produces the final response using both the query and retrieved context
    """)

# Load documents
documents = [open("sample_document.txt").read()]

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=["\n\n", "\n", " ", ""]
)
chunks = text_splitter.create_documents(documents)

print(f"Split documents into {len(chunks)} chunks")

Split documents into 2 chunks


In [None]:
# Initialize the embeddings model
embeddings = OpenAIEmbeddings()

# Create a vector store
vectorstore = FAISS.from_documents(chunks, embeddings)

# Save the vector store locally 
vectorstore.save_local("faiss_index")

print("Vector store created successfully")

  embeddings = OpenAIEmbeddings()


Vector store created successfully


In [None]:
# Create a retriever from the vector store
retriever = vectorstore.as_retriever(
    search_type="similarity",  
    search_kwargs={"k": 3}  # number of documents to return
)

# RAG prompt template
template = """
You are an AI assistant providing helpful answers based on the retrieved context.
Use the following context to answer the question at the end. If you don't know 
the answer, say you don't know - don't try to make up an answer.

Context:
{context}

Question: {question}

Your answer:
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# Create the RAG pipeline using RetrievalQA
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt}
)

print("RAG chain setup complete")

RAG chain setup complete


In [None]:
# Test the RAG pipeline 
questions = [
    "What is RAG?",
    "When was RAG introduced?",
    "What are the key components of a RAG system?"
]


for question in questions:
    answer = rag_chain.invoke({"query": question})
    print(f"Question: {question}")
    print(f"Answer: {answer['result']}")
    print("-" * 50)

Question: What is RAG?
Answer: RAG, or Retrieval-Augmented Generation, is an AI framework that enhances the outputs of large language models (LLMs) by retrieving relevant information from external sources. It combines retrieval-based and generation-based approaches in natural language processing to improve the factuality and specificity of generated responses by grounding them in retrieved passages. RAG was introduced by researchers at Facebook AI in 2020 and consists of three key components: a retriever module that finds relevant documents from a knowledge base, an augmenter that incorporates the retrieved information, and a generator that produces the final response using both the query and the retrieved context.
--------------------------------------------------
Question: When was RAG introduced?
Answer: RAG was introduced by researchers at Facebook AI in 2020.
--------------------------------------------------
Question: What are the key components of a RAG system?
Answer: The key c

In [27]:
from langchain.chains import ConversationalRetrievalChain

# Set up memory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Create a conversational RAG chain
conversational_rag = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

# Example conversation
response = conversational_rag.invoke({"question": "What is RAG?"})
print("Question: What is RAG?")
print(f"Answer: {response['answer']}")

response = conversational_rag.invoke({"question": "What are its key components?"})
print("\nQuestion: What are its key components?")
print(f"Answer: {response['answer']}")



Question: What is RAG?
Answer: Retrieval-Augmented Generation (RAG) is an AI framework that enhances large language model (LLM) outputs by retrieving relevant information from external sources. It combines the strengths of retrieval-based and generation-based approaches in natural language processing. RAG was introduced by researchers at Facebook AI in 2020 and aims to improve the factuality and specificity of generated responses by grounding them in retrieved passages. The key components of RAG include a retriever module that finds relevant documents from a knowledge base, an augmenter that incorporates the retrieved information, and a generator that produces the final response using both the query and the retrieved context.

Question: What are its key components?
Answer: The key components of Retrieval-Augmented Generation (RAG) are:

1. A retriever module that finds relevant documents from a knowledge base.
2. An augmenter that incorporates the retrieved information.
3. A generator 