In [11]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
FILE_PATH = "Data/llama2.pdf"

In [6]:
loader = PyPDFLoader(FILE_PATH)

In [13]:
pages = loader.load()

To load this faster use lazy load (async load)

In [15]:
pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [16]:
# To check the number of pages
len(pages)

77

In [12]:
# Let's perform chunking
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, 
    chunk_overlap=50
)

In [18]:
split_docs = splitter.split_documents(pages)

In [19]:
len(split_docs)

615

Now let's store this data into Vector

In [22]:
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

load_dotenv()
os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [23]:
index=faiss.IndexFlatIP(384)
vector_store=FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [24]:
vector_store.add_documents(documents=split_docs)

['7518e075-3574-47a2-a058-da93e25955b9',
 '1c775a4f-2c15-4acc-94e6-7660ddea5871',
 '50f97c8b-c8f0-4c6d-a1eb-6f3ea3abe7ef',
 '67142b61-c227-44de-86c9-07879423bbdd',
 'f4f3a48a-d019-4442-a7a7-ecb43b1c8bf3',
 'bea30368-8e05-4347-a825-2db84fea9734',
 '8c2eb823-b855-4c17-ae41-3a35283ad881',
 '3f2fddd1-adbe-47a6-9218-c7c3485c1adb',
 '32c0f670-ebc5-4c27-bcfb-dee6c76bb801',
 'cab3a667-a349-412f-b9fa-c9dae9a929c4',
 'cb9024e3-8a43-43ec-8525-cff663475f78',
 '795559f9-46f6-43c5-bb83-df8f9d89ad31',
 '0e7f8ba4-6551-477c-9360-d3e466052ee8',
 '9109b60b-b837-46ed-8241-f2e74f316d9c',
 '279b4bdf-df75-4be4-97d2-53014c7f4e84',
 'fca5b0f3-231c-4b9d-8014-78110681711a',
 '333c3325-8ab1-4453-b915-36dd1c0c8c3b',
 'e545d582-0c26-47cd-bec3-b9953a2fef67',
 'e2d03d33-acb7-4303-8a81-008d7f7eb0a0',
 'a90879a1-8c1b-4bd2-b8f9-d1271b2baa01',
 'cac2883b-5bef-47c0-9938-ef85ec3df2d1',
 '57e23d0b-d816-4166-ae84-c44addff691f',
 '3ff83a8d-718e-40a6-80b2-d3443b95a0ce',
 'f549ece8-d87d-41d7-aeed-8226f6b44896',
 '6eae8ffe-df8a-

In [None]:
# Retriev 10 documents for each query
retriever = vector_store.as_retriever(
    search_kwargs = {"k": 10}
)

In [26]:
retriever.invoke("What is the llama model?")

[Document(id='6eae8ffe-df8a-4c2c-b24b-5de4554e1394', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'Data/llama2.pdf', 'total_pages': 77, 'page': 3, 'page_label': '4'}, page_content='work (Section 6), and conclusions (Section 7).\n‡https://ai.meta.com/resources/models-and-libraries/llama/\n§We are delaying the release of the 34B model due to a lack of time to sufficiently red team.\n¶https://ai.meta.com/llama\n‖https://github.com/facebookresearch/llama\n4'),
 Document(id='6a3fbefd-6503-4494-b8cc-33da0a3bdca1', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00

Till now we have implemented,
1. Data Ingestion pipeline and 
2. Data retrieval pipeline

Now the Next Part is to combine it with LLM (Augmentation stage)
- And this step is going to complete the RAG Pipeline

In [46]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-3-flash-preview")

# Let's now use the chaining concept.

#### So here what we have to chain?
- context, prompt, model and parser

#### Do we have prompt as of now?
- No we havn't written a prompt yet.

#### Can we write a prompt?
- Yes we can write a prompt.

#### But why we want to write a prompt when template is already available.
- So for the basic RAG, we can find a ready made template

So on the Langchain hub we can find templates

In [39]:
from langchain_classic import hub
prompt = hub.pull("rlm/rag-prompt")

In [40]:
import pprint

pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]

#### Context is taken from retriever
#### Prompt is taken from hub
#### Model is taken from google
#### Parser is taken from Langchain

In [41]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [42]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [43]:
# Now finally we are going to create a chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [48]:
rag_chain.invoke("What is llama model?")

'Llama 2 is a new technology that carries risks with its use. Llama 2 was trained between January 2023 and July 2023. It is intended for commercial and research use in English.'