In [14]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS

In [15]:
groq_api_key=os.getenv("GROQ_API_KEY")
llm=ChatGroq(groq_api_key=groq_api_key,model_name="Llama3-8b-8192")

path="data/Understanding_Climate_Change.pdf"

In [16]:
from utility import replace_t_with_space,retrieve_context_per_question,show_context

In [17]:
def encode_pdf(path,chunk_size=1000,chunk_overlap=200):
    """
    Encodes a PDF book into a vector store using HuggingFace embeddings.

    Args:
        path: The path to the PDF file.
        chunk_size: The desired size of each text chunk.
        chunk_overlap: The amount of overlap between consecutive chunks.

    Returns:
        A FAISS vector store containing the encoded book content.
    """
    #Load the Pdf file 
    loader=PyPDFLoader(path)
    docs=loader.load()

    #Split the documents into chunks
    splitter=RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )

    texts=splitter.split_documents(docs)
    cleaned_texts=replace_t_with_space(texts)

    #Embeddings 
    embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

    #Create vector store
    vectorstore=FAISS.from_documents(cleaned_texts,embeddings)

    return vectorstore

In [19]:
chunk_vector_store=encode_pdf(path,chunk_size=1000,chunk_overlap=200)

In [20]:
#Convert vector store into retriever
retriever=chunk_vector_store.as_retriever(search_kwargs={"K":3})

In [21]:
## Test retriever
test_query="What is the main cause of climate change?"
context=retrieve_context_per_question(test_query,retriever)
show_context(context)

Context 1:
Chapter 2: Causes of Climate Change 
Greenhouse Gases 
The primary cause of recent climate change is the increase in greenhouse gases in the 
atmosphere. Greenhouse gases, such as carbon dioxide (CO2), methane (CH4), and nitrous 
oxide (N2O), trap heat from the sun, creating a "greenhouse effect." This effect is essential 
for life on Earth, as it keeps the planet warm enough to support life. However, human 
activities have intensified this natural process, leading to a warmer climate. 
Fossil Fuels 
Burning fossil fuels for energy releases large amounts of CO2. This includes coal, oil, and 
natural gas used for electricity, heating, and transportation. The industrial revolution marked 
the beginning of a significant increase in fossil fuel consumption, which continues to rise 
today. 
Coal


Context 2:
Most of these climate changes are attributed to very small variations in Earth's orbit that 
change the amount of solar energy our planet receives. During the Holocene epoch,

In [43]:
#Evaluating Retriever with Deepeval
from evaluation_rag import evaluate_rag
results = evaluate_rag(retriever,2)

In [44]:
results

{'questions': ['1. **Science-focused Question:**',
  "   - Explain the role of greenhouse gases in the Earth's climate system and discuss how human activities have altered the natural greenhouse effect, leading to global climate change. Include examples of specific gases and their sources.",
  '',
  '2. **Policy and Social Impact Question:**',
  '   - Evaluate the effectiveness of international agreements, such as the Paris Agreement, in mitigating climate change. Discuss the challenges and successes these agreements face in achieving global cooperation and reducing greenhouse gas emissions.'],
 'results': ['```json\n{\n  "Relevance": 1,\n  "Completeness": 1,\n  "Conciseness": 1\n}\n```',
  '```json\n{\n  "Relevance": 5,\n  "Completeness": 4,\n  "Conciseness": 3\n}\n```',
  '```json\n{\n  "Relevance": 1,\n  "Completeness": 1,\n  "Conciseness": 1\n}\n```',
  '```json\n{\n  "Relevance": 5,\n  "Completeness": 4,\n  "Conciseness": 3\n}\n```',
  '```json\n{\n  "Relevance": 4,\n  "Completene

In [27]:
import json
from typing import List, Tuple, Dict, Any

from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [29]:
llm=ChatOpenAI(temperature=0, model_name="gpt-4-turbo-preview")

question_gen_prompt = PromptTemplate.from_template(
        """Generate {no_of_questions} diverse test questions about climate change and 
        return list of only questions. Do not include any header or subject in the question
        """
    )
question_chain = question_gen_prompt | llm | StrOutputParser()

questions = question_chain.invoke({"no_of_questions":2})

print(questions)

1. How does the melting of polar ice caps contribute to global sea level rise and what are the potential impacts on coastal communities?
2. Discuss the role of deforestation in the carbon cycle and its effects on global climate change.


In [31]:
list_of_questions=questions.split("\n")

In [32]:
list_of_questions

['1. How does the melting of polar ice caps contribute to global sea level rise and what are the potential impacts on coastal communities?',
 '2. Discuss the role of deforestation in the carbon cycle and its effects on global climate change.']