In [2]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader(file_path='./input.pdf')
docs = loader.load()

In [3]:
docs

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-10-24T00:50:47+00:00', 'author': '', 'keywords': '', 'moddate': '2023-10-24T00:50:47+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': './input.pdf', 'total_pages': 24, 'page': 0, 'page_label': '1'}, page_content='Active Retrieval Augmented Generation\nZhengbao Jiang1∗ Frank F. Xu1∗ Luyu Gao1∗ Zhiqing Sun1∗ Qian Liu2\nJane Dwivedi-Yu3 Yiming Yang1 Jamie Callan1 Graham Neubig1\n1Language Technologies Institute, Carnegie Mellon University\n2Sea AI Lab 3FAIR, Meta\n{zhengbaj,fangzhex,luyug,zhiqings,gneubig}@cs.cmu.edu\nAbstract\nDespite the remarkable ability of large lan-\nguage models (LMs) to comprehend and gen-\nerate language, they have a tendency to hal-\nlucinate and create factually inaccurate out-\nput. Augmenting LMs by retrieving informa-\ntion from 

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 20)
chunks = splitter.split_documents(docs)
chunks

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-10-24T00:50:47+00:00', 'author': '', 'keywords': '', 'moddate': '2023-10-24T00:50:47+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': './input.pdf', 'total_pages': 24, 'page': 0, 'page_label': '1'}, page_content='Active Retrieval Augmented Generation\nZhengbao Jiang1∗ Frank F. Xu1∗ Luyu Gao1∗ Zhiqing Sun1∗ Qian Liu2\nJane Dwivedi-Yu3 Yiming Yang1 Jamie Callan1 Graham Neubig1'),
 Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-10-24T00:50:47+00:00', 'author': '', 'keywords': '', 'moddate': '2023-10-24T00:50:47+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': './input.pdf', 'tot

In [6]:
from dotenv import load_dotenv
load_dotenv()

from langchain.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name = 'BAAI/bge-base-en-v1.5')

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [7]:
from langchain.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=chunks, embedding=embedding_model)

In [8]:
vectorstore_retriever = vectorstore.as_retriever(search_kwargs={'k':3})

In [9]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
keyword_retriever = BM25Retriever.from_documents(documents=chunks)
keyword_retriever.k = 3

In [10]:

retriver = EnsembleRetriever(retrievers=[vectorstore_retriever, keyword_retriever], weights=[0.3,0.7])

In [32]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash-preview-04-17')

In [33]:
from langchain.prompts import ChatPromptTemplate

template = 'You are an assistant for question-answering task.Use the following pieces of retrieved contect to answer the question.' \
            'If you dont know the answer just say that you dont know .Use ten sentences maximum and keep the answer concise' \
            'Question : {question}'\
            'Context : {context}'\
            'Answer ::'

prompt = ChatPromptTemplate.from_template(template=template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='You are an assistant for question-answering task.Use the following pieces of retrieved contect to answer the question.If you dont know the answer just say that you dont know .Use ten sentences maximum and keep the answer conciseQuestion : {question}Context : {context}Answer ::'), additional_kwargs={})])

In [34]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = ({'context': retriver, 'question': RunnablePassthrough()} | prompt | llm | StrOutputParser())

In [38]:
rag_chain.invoke('What is FLARE?')

'Based on the provided context, FLARE is described as a generic method. It is effective at retrieving information. The document states that FLARE outperforms baseline methods across all tasks and datasets mentioned in the context. Its performance is evaluated using metrics such as EM, F1, Precision, and Recall. Hyperparameters for FLARE are selected based on a development set. The context also notes that FLARE refers to FLAREdirect if not specifically stated.'