In [33]:
!pip install "giskard[llm]" --upgrade
!pip install llama-index PyMuPDF




In [34]:
!wget "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_LongerReport.pdf" -O "ipcc_report.pdf"

--2024-08-21 14:25:38--  https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_LongerReport.pdf
Resolving www.ipcc.ch (www.ipcc.ch)... 2606:4700:8de4:eefd:38c6:71:6814:fe03, 104.20.254.3, 104.20.255.3, ...
Connecting to www.ipcc.ch (www.ipcc.ch)|2606:4700:8de4:eefd:38c6:71:6814:fe03|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5218936 (5.0M) [application/pdf]
Saving to: ‘ipcc_report.pdf’


2024-08-21 14:25:39 (8.83 MB/s) - ‘ipcc_report.pdf’ saved [5218936/5218936]



In [12]:
import os
import giskard

import google.generativeai as genai

from giskard.llm.client.gemini import GeminiClient

genai.configure(api_key="api key")

giskard.llm.set_default_client(GeminiClient())

In [13]:
os.environ["GOOGLE_API_KEY"]="api key"

In [14]:
import pandas as pd
import warnings
pd.set_option("display.max_colwidth", 400)
warnings.filterwarnings('ignore')

In [15]:
from langchain_community.document_loaders import PyPDFLoader

In [27]:
!pip install pypdf





In [8]:
!pip install llama-index-llms-langchain



In [21]:
%pip install -qU langchain-text-splitters

Note: you may need to restart the kernel to use updated packages.


In [10]:
from langchain_community.embeddings import VertexAIEmbeddings
from langchain_google_vertexai import VertexAI


In [11]:
from llama_index.llms.langchain import LangChainLLM

contents: {
    "role": "user",
  }

llm = LangChainLLM(llm=VertexAI(model_name="gemini-pro"))


In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)



In [16]:
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.readers.file import PyMuPDFReader
from llama_index.core.base.llms.types import ChatMessage, MessageRole

loader = PyMuPDFReader()
ipcc_documents = loader.load(file_path="./ipcc_report.pdf")


In [17]:
splitter = SentenceSplitter(chunk_size=512)
index = VectorStoreIndex.from_documents(ipcc_documents, transformations=[splitter],embed_model=VertexAIEmbeddings())
chat_engine = index.as_chat_engine(llm=llm)



In [18]:
str(chat_engine.chat("How much will the global temperature rise by 2100?"))


'The global temperature will rise by 1.4 to 5.7 degrees Celsius by 2100. The amount of warming will depend on the amount of greenhouse gases that are emitted. If we emit a lot of greenhouse gases, the temperature will rise by 4.4 degrees Celsius. If we emit very few greenhouse gases, the temperature will rise by 1.4 degrees Celsius.'

In [23]:
from giskard.rag import KnowledgeBase, generate_testset, QATestset

text_nodes = splitter(ipcc_documents)
knowledge_base_df = pd.DataFrame([node.text for node in text_nodes], columns=["text"])
knowledge_base = KnowledgeBase(knowledge_base_df,embedding_model=VertexAIEmbeddings())




In [19]:
testset = QATestset.load("ipcc_testset.jsonl")

In [20]:
testset.to_pandas().head(5)


Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
450623f7-e644-4bfa-88d5-90f31dd15d99,What are the consequences of global warming exceeding 2°C for climate resilient development in some regions and sub-regions?,Climate resilient development will not be possible in some regions and sub-regions if global warming exceeds 2°C.,Document 196: Accelerated and equitable mitigation and adaptation bring beneﬁts from avoiding damages from climate \nchange and are critical to achieving sustainable development (high conﬁdence). Climate resilient development138 \npathways are progressively constrained by every increment of further warming (very high conﬁdence). There is a \nrapidly closing window of opportunity to secure a li...,[],"{'question_type': 'simple', 'seed_document_id': 196, 'topic': 'Climate Change Action'}"
79f98d3d-766b-4cbf-800f-03e87966e3e5,What is the projected decline in coral reefs with a global warming of 1.5°C?,Coral reefs are projected to decline by a further 70–90% at 1.5°C of global warming.,"Document 123: 71\nLong-Term Climate and Development Futures\nSection 3\n3.1.2 Impacts and Related Risks\nFor a given level of warming, many climate-related risks are \nassessed to be higher than in AR5 (high conﬁdence). Levels of \nrisk120 for all Reasons for Concern121 (RFCs) are assessed to become high \nto very high at lower global warming levels compared to what was \nassessed in AR5 (high...",[],"{'question_type': 'simple', 'seed_document_id': 123, 'topic': 'Climate Change Risks'}"
1ee224a2-62af-4877-b172-baec006512e6,What is the expected uncertainty range in the total potential for mitigation options according to the IPCC report?,The uncertainty in the total potential is typically 25–50%.,"Document 251: Where a gradual colour transition is shown, the breakdown of the potential into cost categories is not well known or depends heavily on factors such \nas geographical location, resource availability, and regional circumstances, and the colours indicate the range of estimates. The uncertainty in the total potential is typically 25–50%. \nWhen interpreting this ﬁgure, the following...",[],"{'question_type': 'simple', 'seed_document_id': 251, 'topic': 'Climate Change Action'}"
16264bd2-510a-4368-a9d6-0a5fef7feb65,What is the effect of increasing cumulative net CO2 emissions on the effectiveness of natural land and ocean carbon sinks?,The proportion of emissions taken up by land and ocean decreases with increasing cumulative net CO2 emissions.,"Document 166: While \nnatural land and ocean carbon sinks are projected to take up, in absolute \nterms, a progressively larger amount of CO2 under higher compared to \nlower CO2 emissions scenarios, they become less effective, that is, the \nproportion of emissions taken up by land and ocean decreases with \nincreasing cumulative net CO2 emissions (high conﬁdence). Additional \necosystem resp...",[],"{'question_type': 'simple', 'seed_document_id': 166, 'topic': 'Climate Change Projections'}"
c31c6857-c505-45ef-98e5-aa524c4b05e7,What does hatching represent on the maps depicting changes in maize yield and fisheries catch potential?,"Hatching indicates areas where less than 70% of the climate-crop model combinations agree on the sign of impact for maize yield, and where the two climate-fisheries models disagree in the direction of change for fisheries catch potential.","Document 135: Interquartile ranges of WGLs by 2081–2100 \nunder RCP2.6, RCP4.5 and RCP8.5. The presented index is consistent with common features found in many indices included within WGI and WGII assessments. (c) Impacts \non food production: (c1) Changes in maize yield at projected GWLs of 1.6°C to 2.4°C (2.0°C), 3.3°C to 4.8°C (4.1°C) and 3.9°C to 6.0°C (4.9°C). Median yield changes \nfrom ...",[],"{'question_type': 'simple', 'seed_document_id': 135, 'topic': 'Climate Change Assessment'}"


In [21]:
from giskard.rag import evaluate, RAGReport
from giskard.rag.metrics.ragas_metrics import ragas_context_recall, ragas_context_precision

In [24]:
def answer_fn(question, history=None):
    if history:
        answer = chat_engine.chat(question)
    else:
        answer = chat_engine.chat(question)
    return str(answer)

report = evaluate(answer_fn,
                testset=testset,
                knowledge_base=knowledge_base,
                metrics=[ragas_context_recall, ragas_context_precision])


Asking questions to the agent:   0%|          | 0/120 [00:00<?, ?it/s]

ValueError: Reached max iterations.

In [47]:
report = RAGReport.load("./ipcc_report.html")

FileNotFoundError: [Errno 2] No such file or directory: 'ipcc_report.html/knowledge_base_meta.json'

In [48]:
!ls

artifacts	     llm_evaluation_harness.ipynb  perplixityOfLLM.py
Bert.ipynb	     lmEvalHarness.py		   report.html
BLEU.py		     lmevalHarnessResult	   ROUGE.py
db		     Memory_Consumption.ipynb	   Untitled1.ipynb
Giskard.ipynb	     METEOR.py			   Untitled2.ipynb
Hallucintions.ipynb  multimodalModel.ipynb	   Untitled.ipynb
ipcc_report.pdf      nvdia.py			   WEAT.ipynb
ipcc_testset.jsonl   perplexity.py		   WeightsAndBias.ipynb
