In [5]:
from datasets import load_dataset

ds = load_dataset("climate_fever", split="test")


In [6]:
ds

Dataset({
    features: ['claim_id', 'claim', 'claim_label', 'evidences'],
    num_rows: 1535
})

In [7]:
from langchain.schema import Document

texts = ds["claim"]

documents = [Document(page_content=text) for text in texts]


In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000,
                                               chunk_overlap = 200)
splits = text_splitter.split_documents(documents)

In [9]:
print(splits[0])
print(splits[1])

page_content='Global warming is driving polar bears toward extinction'
page_content='The sun has gone into ‘lockdown’ which could cause freezing weather, earthquakes and famine, say scientists'


In [10]:
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings 

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004",google_api_key="AIzaSyBnyboXe3bFtkQ-KJqwHiAGEsCKTZCOr3U")  
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [11]:
print(vectorstore._collection.get())



In [12]:
retriever = vectorstore.as_retriever()

In [13]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")



In [14]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    google_api_key="AIzaSyBnyboXe3bFtkQ-KJqwHiAGEsCKTZCOr3U"
)

In [15]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [16]:
def format_docs(docs):
    return "\n".join(doc.page_content for doc in docs)

In [17]:
rag_chain = ({"context":retriever | format_docs, "question":RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser())

In [19]:
rag_chain.invoke("Can you verify the statement 'Climate change is not caused by human activity'?")

"The statement 'Climate change is not caused by human activity' is false. The scientific consensus, confirmed by observations, indicates that human-caused CO2 emissions are a primary driver of global warming. While some sources claim natural causes, the prevailing evidence supports human activity as the main contributor."