In [2]:
import os
import sys
from dotenv import load_dotenv
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers import ContextualCompressionRetriever
from langchain.chains import RetrievalQA

# Load environment variables from a .env file
load_dotenv()

# Original path append replaced for Colab compatibility
from helper_functions import *
from evaluation.rag_evaluation import *

# Set the OpenAI API key environment variable
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

In [3]:
path = r"C:\Users\Revathi\Documents\GenAIProjects\All_RAG_Techniques\data\Understanding_Climate_Change.pdf"

In [4]:
vector_store = encode_pdf(path)

##### Retriever + Compressor + Combine Them

In [5]:
# Create a retriever
retriever = vector_store.as_retriever()


#Create a contextual compressor
llm = ChatOpenAI(temperature=0, model_name="gpt-4o-mini", max_tokens=4000)
compressor = LLMChainExtractor.from_llm(llm)

#Combine the retriever with the compressor
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

# Create a QA chain with the compressed retriever
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=compression_retriever,
    return_source_documents=True
)

In [6]:
query = "What is the main topic of the document?"
result = qa_chain.invoke({"query": query})
print(result["result"])
print("Source documents:", result["source_documents"])

The main topic of the document is climate change, focusing on its impacts on human health, the importance of global and local climate action, and the ethical dimensions of climate justice. It discusses various health impacts related to climate change, international collaboration efforts, national strategies for emission reductions, and the need for social equity in addressing climate issues.
Source documents: [Document(metadata={'producer': 'Microsoft® Word 2021', 'creator': 'Microsoft® Word 2021', 'creationdate': '2024-07-13T20:17:34+03:00', 'author': 'Nir', 'moddate': '2024-07-13T20:17:34+03:00', 'source': 'C:\\Users\\Revathi\\Documents\\GenAIProjects\\All_RAG_Techniques\\data\\Understanding_Climate_Change.pdf', 'total_pages': 33, 'page': 14, 'page_label': '15'}, page_content='Chapter 10: Climate Change and Human Health \nHealth Impacts \nHeat-Related Illnesses \nRising temperatures and more frequent heatwaves increase the risk of heat-related illnesses, such as heat exhaustion and h