##### Retrieval-Augmented Generation (RAG) Full Pipeline Example

In [None]:
from langchain_community.document_loaders import TextLoader

In [None]:
#load the text document
loader = TextLoader("speech.txt")
documents = loader.load()
print(f"Loaded{len(documents)}document.")
for doc in documents:
    print(f"preview:{doc.page_content[:100]}...") #show first 100 characters 

In [None]:
#Split the document into chunks

from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
split_docs = text_splitter.split_documents(documents)
print(f"Split into {len(split_docs)} chunks.")

In [None]:
#create an embedding and vector store
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

embedding_model = OpenAIEmbeddings()
vector_store = FAISS.from_documents(split_docs, embedding_model)
print("Vector store created.")

In [None]:
#create a retriever for similarity search
retriever = vector_store.as_retreiver(search_type="similarity", search_kwargs={"k": 3})

In [None]:
#retreive relenvant chunks from the vector store 
query = "What are the main points of the speech?"
retrieved_docs = retriever.get_relevant_documents(query)
print(f"Retrieved {len(retrieved_docs)} relevant documents.")
for i, doc in enumerate(retrieved_docs):
    print(f"\nChunks {i+1}: {doc.page_content[:200]}...")  # Show first 100 characters of each retrieved document

In [None]:
#setup a prompt and LLM for answer generation
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAI
from langchain_core.output_parsers import StrOutputParser

prompt = PromptTemplate(
    "give the context below, answer the user's questions. \n\n {context}\n\n Question: {question}\n\n Answer: ",
)
llm = OpenAI(model="gpt-3.5-turbo")
output_parser = StrOutputParser()

In [None]:
#prepare the context and run RAG application
context = "\n\n".join([doc.page_content for doc in retrieved_docs])
chain_with_parser = prompt | llm | output_parser

response = chain_with_parser.invoke({
    "context": context,
    "question": query
}) 
print(f"Response: {response['output']}")