In [1]:
# set do not track variable to RAGAS
# more info: https://github.com/explodinggradients/ragas/issues/49
import os
os.environ["RAGAS_DO_NOT_TRACK"] = "True"

In [2]:
import logging
import sys
import google.generativeai as genai
import pathlib
import textwrap
import ragas
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from llama_index.core import Document, VectorStoreIndex, Settings, StorageContext, load_index_from_storage
from llama_index.vector_stores.faiss import FaissVectorStore
import pandas as pd
import faiss

from IPython.display import display
from IPython.display import Markdown

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [3]:
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [4]:
ragas._analytics.do_not_track()

True

In [5]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
#result = llm.invoke("Write me a party invitation to a one year's old's dinosaur birthday party.")

I0000 00:00:1724244819.988143     589 check_gcp_environment.cc:61] BIOS data file does not exist or cannot be opened.


In [6]:
# create document database
sotu = []
files = ["./Speeches/state_of_the_union_042921.txt", "./Speeches/state_of_the_union_030122.txt", "./Speeches/state_of_the_union_020723.txt", "./Speeches/state_of_the_union_030724.txt"]
for i in files:
    with open(i) as file:
        for line in file:
            nl = line.rstrip()
            if nl != '':
                sotu.append(nl)

In [7]:
len(sotu)

1460

In [8]:
documents = [Document(text=line) for line in sotu]

In [9]:
documents[-1]

Document(id_='1dcaa153-8591-4d95-9f03-7b8cae864558', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='May God protect our troops.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')

In [10]:
# following these tutorials
# https://learnbybuilding.ai/tutorials/rag-chatbot-on-podcast-llamaindex-faiss-openai
# https://medium.com/@saurabhgssingh/understanding-rag-building-a-rag-system-from-scratch-with-gemini-api-b11ad9fc1bf7

d = 768 # dimensions of ___, the embedding model that we're going to use
faiss_index = faiss.IndexFlatL2(d)
print(faiss_index.is_trained)

True


In [11]:
doc_embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") # optional: task_type="RETRIEVAL_DOCUMENT"
Settings.embed_model = doc_embeddings
#vector = embeddings.embed_query("hello, world!")
#vector[:5]

In [12]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [13]:
#from llama_index import ServiceContext, set_global_service_context
#service_context = ServiceContext.from_defaults(llm=llm, embed_model=embededdings)
#set_global_service_context(service_context)

Settings.llm = llm

In [14]:
## uncomment for when you need to re-embed and vectorize documents
## otherwise, doing local loading below
#vector_store = FaissVectorStore(faiss_index=faiss_index)
#storage_context = StorageContext.from_defaults(vector_store=vector_store)
#index = VectorStoreIndex.from_documents(
#    documents, storage_context=storage_context, show_progress=True
#)
## save index to disk
#index.storage_context.persist()
#index

In [15]:
# load index from disk
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir="./storage"
)
index = load_index_from_storage(storage_context=storage_context)

INFO:root:Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.


In [16]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7f3f50490d10>

In [17]:
query_engine = index.as_query_engine(similarity_top_k=10)

In [None]:
query = "What has the President done related to healthcare?"
response = query_engine.query(query)

In [18]:
response.response

'The President has made significant efforts to improve healthcare access and affordability for Americans. They have expanded access to healthcare through the Affordable Care Act, lowered healthcare premiums for working families, and taken steps to reduce prescription drug costs. They have also re-ignited the Cancer Moonshot initiative, aimed at finding a cure for cancer. \n'

In [19]:
for node in response.source_nodes:
    print(f"{node.get_score()} -> {node.text}")

0.7506474256515503 -> I’m pleased to say that more Americans have health insurance now than ever in history.
0.7599508166313171 -> During these 100 days, an additional 800,000 Americans enrolled in the Affordable Care Act when I established the special sign-up period to do that — 800,000 in that period.
0.7667317390441895 -> The Affordable Care Act has been a lifeline for millions of Americans, protecting people with preexisting conditions, protecting women’s health.  And the pandemic has demonstrated how badly — how badly it’s needed.  Let’s lower deductibles for working families on the Affordable Care — in the Affordable Care Act.  (Applause.)  And let’s lower prescription drug costs.  (Applause.)
0.796398401260376 -> A president, my predecessor, who failed the most basic duty. Any President owes the American people the duty to care.
0.801885724067688 -> Over one hundred million of you can no longer be denied health insurance because of pre-existing conditions.
0.8065693378448486 -> 

In [20]:
#result = genai.embed_content(
#    model="models/text-embedding-004",
#    content="What is the meaning of life?",
#    task_type="retrieval_document",
#    title="Embedding of single string")

In [20]:
chat_engine = index.as_chat_engine(similarity_top_k=10, chat_mode='context')

In [21]:
query = "What does the President say about Congress during these 4 years?"
response = chat_engine.chat(query)

In [22]:
response.response

'The provided text doesn\'t offer any specific insights into the President\'s views on Congress over a 4-year period. \n\nHere\'s what we can glean from the excerpt:\n\n* **He acknowledges the potential for bipartisan cooperation:**  He states, "To my Republican friends, if we could work together in the last Congress, there is no reason we can’t work together in this new Congress." This suggests he believes collaboration is possible.\n* **He\'s optimistic about the future:**  He declares "the State of the Union is strong" and attributes this to the strength of the American people. This implies a positive outlook on the nation\'s future, which could be influenced by Congress\'s actions.\n\nHowever, the excerpt doesn\'t provide any specific criticisms or praise of Congress\'s actions during the past four years. To understand the President\'s full perspective, you\'d need to analyze more of his speeches, interviews, and official statements. \n'

In [23]:
print(response.response)

The provided text doesn't offer any specific insights into the President's views on Congress over a 4-year period. 

Here's what we can glean from the excerpt:

* **He acknowledges the potential for bipartisan cooperation:**  He states, "To my Republican friends, if we could work together in the last Congress, there is no reason we can’t work together in this new Congress." This suggests he believes collaboration is possible.
* **He's optimistic about the future:**  He declares "the State of the Union is strong" and attributes this to the strength of the American people. This implies a positive outlook on the nation's future, which could be influenced by Congress's actions.

However, the excerpt doesn't provide any specific criticisms or praise of Congress's actions during the past four years. To understand the President's full perspective, you'd need to analyze more of his speeches, interviews, and official statements. 



In [25]:
for node in response.source_nodes:
    print(f"{node.get_score()} -> {node.text}")

0.7652876377105713 -> Mr. Speaker. Madam Vice President. Members of Congress. My Fellow Americans.
0.7749111652374268 -> Tonight I come to the same chamber to address the nation.
0.8328826427459717 -> A president, my predecessor, who failed the most basic duty. Any President owes the American people the duty to care.
0.8343376517295837 -> And I will always be a president for all Americans!
0.842502236366272 -> My Republican friends you owe it to the American people to get this bill done.
0.8441706895828247 -> And if my predecessor is watching instead of playing politics and pressuring members of Congress to block this bill, join me in telling Congress to pass it!
0.8499408960342407 -> And yes, my purpose tonight is to both wake up this Congress, and alert the American people that this is no ordinary moment either.
0.852456271648407 -> Meanwhile, my predecessor told the NRA he’s proud he did nothing on guns when he was President.
0.8647689819335938 -> I signed a bipartisan budget deal t

In [24]:
chat_engine.chat_history

[ChatMessage(role=<MessageRole.USER: 'user'>, content='What does the President say about Congress during these 4 years?', additional_kwargs={}),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='The provided text doesn\'t offer any specific insights into the President\'s views on Congress over a 4-year period. \n\nHere\'s what we can glean from the excerpt:\n\n* **He acknowledges the potential for bipartisan cooperation:**  He states, "To my Republican friends, if we could work together in the last Congress, there is no reason we can’t work together in this new Congress." This suggests he believes collaboration is possible.\n* **He\'s optimistic about the future:**  He declares "the State of the Union is strong" and attributes this to the strength of the American people. This implies a positive outlook on the nation\'s future, which could be influenced by Congress\'s actions.\n\nHowever, the excerpt doesn\'t provide any specific criticisms or praise of Congress\'s action

In [25]:
query2 = "The provided text are 4 years of speeches by the President. What does the President say about Congress during those 4 years?"
response2 = chat_engine.chat(query2)

In [26]:
print(response2.response)

You're right! I apologize for my previous response.  I was focusing too narrowly on the excerpt provided, not the entirety of the 4 years' worth of speeches. 

To accurately assess the President's views on Congress over four years, we would need to analyze a significant amount of text. Here's a framework for what we could look for:

* **Tone and Language:**  Does the President use positive or negative language when referring to Congress? Are there specific terms like "bipartisan," "gridlock," or "divided" that recur?
* **Specific Actions:** Does he praise or criticize specific actions taken by Congress, such as passing legislation, confirming appointments, or conducting investigations?
* **Calls to Action:**  Does he call on Congress to take specific actions or address particular issues?  
* **Legislative Successes:**  Does he highlight any legislation passed in collaboration with Congress? 
* **Themes and Priorities:**  Do his speeches reveal any overarching themes about how he views 

In [27]:
for node in response2.source_nodes:
    print(f"{node.get_score()} -> {node.text}")

0.6518715620040894 -> Throughout our history, Presidents have come to this chamber to speak to Congress, to the nation, and to the world to declare war, to celebrate peace, to announce new plans and possibilities.
0.7356963157653809 -> Mr. Speaker. Madam Vice President. Members of Congress. My Fellow Americans.
0.748851478099823 -> So on this night, in our 245th year as a nation, I have come to report on the State of the Union.
0.7538517713546753 -> Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.
0.7632147073745728 -> Because the soul of this nation is strong, because the backbone of this nation is strong, because the people of this nation are strong, the State of the Union is strong.
0.7771207690238953 -> So I have come here to fulfil my constitutional duty to report on the state of the union. And here is my report.
0.7832998037338257 -> And my report is this: the State o

In [17]:
# Start of RAGAS implementation

In [18]:
# Generate synthetic test data

In [19]:
from langchain_community.document_loaders import DirectoryLoader

In [20]:
loader = DirectoryLoader("./Speeches")
documents = loader.load()

In [21]:
documents[3].metadata

{'source': 'Speeches/state_of_the_union_042921.txt'}

In [22]:
for document in documents:
    document.metadata['filename'] = document.metadata['source']

In [23]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
#from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [24]:
from ratelimit import limits, RateLimitException, sleep_and_retry
from backoff import on_exception, expo
import google.api_core.exceptions as google_exceptions



In [62]:
# generator with Gemini models
# Note: had to edit underlying RAGAS library (cloned locally, edited files, then pip -e installed locally) for this issue re: temperature:
# https://github.com/explodinggradients/ragas/pull/657/files

# ToDo: Can you use the same model for generator and critic? These should be different than my main model with RAG, right?
# I've seen third party examples with the same... I did it to start with testing
# ToDo Future: Redo testset generation with different models than model for RAG
generator_llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", timeout=10) #, temperature=0.7, timeout=180, transport="rest"
critic_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", timeout=60)
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") #transport="rest" #, request_options={"timeout": 10} #, request_options={"maxConcurrency": 5}

#LangchainLLMWrapper()
generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

In [27]:
# Possible future re-do to have fewer errors on quota being hit: work on retry / query limits for gemini to have this run
# https://github.com/google-gemini/cookbook/blob/dc168d968b61b0f1603b743bc2deb73723a159f7/quickstarts/Error_handling.ipynb
# https://medium.com/google-cloud/how-to-add-rate-limit-and-progress-bar-to-google-cloud-generative-ai-api-calls-502b89d35de8

In [29]:
# RAGAS Gemini github issues
#https://github.com/explodinggradients/ragas/issues/678
#https://github.com/explodinggradients/ragas/pull/657/files

In [74]:
from functools import wraps

def sleep_and_retry_forever(func):
    '''
    Return a wrapped function that retries rate limit exceptions, sleeping the
    current thread until rate limit resets. Continues to retry until the call makes
    it through.

    :param function func: The function to decorate.
    :return: Decorated function.
    :rtype: function
    '''
    @wraps(func)
    def wrapper(*args, **kargs):
        '''
        Call the rate limited function. If the function raises a rate limit
        exception sleep for the remaing time period and retry the function, until it succeeds.
        :param args: non-keyword variable length argument list to the decorated function.
        :param kargs: keyworded variable length argument list to the decorated function.
        '''
        done = False
        while not done:
            try:
                return func(*args, **kargs)
                done = True
            except RateLimitException as exception:
                time.sleep(exception.period_remaining)
    return wrapper

In [108]:
from ragas.run_config import RunConfig
#from ragas.llms.base import llm_factory

# Increase the timeout settings
run_config = RunConfig(timeout=60, max_wait=180, max_workers=1, max_retries=10)  # Increase timeout to 120 seconds

# Create the LLM with the increased timeout settings
#llm = llm_factory(run_config=run_config)

In [111]:
# generate testset
#testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
#@on_exception(expo, google_exceptions.ResourceExhausted, max_tries=10) # if we receive exceptions from Google API, retry

CALLS = 15
RATE_LIMIT = 60
@on_exception(expo, google_exceptions.ResourceExhausted, max_tries=10)
@sleep_and_retry # If there are more request to this function than rate, sleep shortly
@limits(calls=CALLS, period=RATE_LIMIT)
def generate_testset_rate(docs):
    """
    Calls the model and embeddings with rate limit
    """
    testset = generator.generate_with_langchain_docs(docs, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25}, run_config=run_config, raise_exceptions=True, is_async=False)
    return testset

In [94]:
import time
import random
import logging

In [102]:
# tried this 8/21, doesn't work well because of exceptions still
def make_request_with_backoff(max_retries=10, max_wait_time=300):
    for attempt in range(max_retries):
        try:
            logging.info(f"Attempt {attempt + 1} of {max_retries}")
            testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25}, raise_exceptions=False) #is_async=False
            #logging.info(f"Successfully generated {len(testset)} items")
            return testset
        except Exception as e:
            if "429" in str(e):
                wait_time = min((2 ** attempt) + random.uniform(0, 1), max_wait_time)
                logging.warning(f"Rate limit hit. Waiting {wait_time:.2f} seconds.")
                time.sleep(wait_time)
            else:
                logging.error(f"Unexpected error: {str(e)}")
                raise e
    raise Exception("Max retries reached")

In [103]:
logging.basicConfig(level=logging.INFO)

In [112]:
# stopped here 8/21: https://github.com/explodinggradients/ragas/issues/624 stopped here, changed to these settings, will try again tomorrow 8/22
# ToDo: Still generated lots of 429 resource exhausted requests... but it did help and everything finished. Need to reduce # requests more still
testset = generate_testset_rate(documents) #make_request_with_backoff() 

embedding nodes:   0%|          | 0/80 [00:00<?, ?it/s]

Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with

KeyboardInterrupt: 

Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with

In [105]:
testset

TestDataset(test_data=[])

In [106]:
testset_pd = testset.to_pandas()

In [107]:
testset_pd

In [37]:
testset_pd.to_csv('testset_flash_pro15.csv', index=False)

In [41]:
for i in testset_pd:
    print(testset_pd[i])

0    What specific measures did the American Rescue...
1    The speaker proposes a Jobs and Family Plan th...
2    How does the American Jobs Plan address climat...
3    What specific infrastructure projects are incl...
4    Why does President Biden believe that investin...
5    Why does the speaker believe the US should inv...
6    Can boosting US car & semiconductor production...
7    Who does the American Jobs Plan aim to connect...
8    How do the speaker's historical examples of in...
9    How are veterans being supported with housing ...
Name: question, dtype: object
0    [THE PRESIDENT:  Thank you. (Applause.) Thank ...
1    [ put forward their own proposal.\n\nSo, let’s...
2    [. This is the largest jobs plan since World W...
3    [. This is the largest jobs plan since World W...
4    [ put forward their own proposal.\n\nSo, let’s...
5    [ put forward their own proposal.\n\nSo, let’s...
6    [ that comes from stamping products “Made In A...
7    [. This is the largest jobs pl

In [27]:
testset_pd = pd.read_csv("testset.csv", index_col = None)

In [28]:
import ast
# this is a fix for 'contexts' column being saved as a string; needs to be a list
testset_pd['contexts'] = testset_pd['contexts'].apply(ast.literal_eval)

In [41]:
testset_pd

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What specific measures did the American Rescue...,[THE PRESIDENT: Thank you. (Applause.) Thank ...,The provided context mentions that the America...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True
1,The speaker proposes a Jobs and Family Plan th...,"[ put forward their own proposal.\n\nSo, let’s...",The answer to the given question is not presen...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True
2,How does the American Jobs Plan address climat...,[. This is the largest jobs plan since World W...,The American Jobs Plan addresses climate chang...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True
3,What specific infrastructure projects are incl...,[. This is the largest jobs plan since World W...,The American Jobs Plan includes infrastructure...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True
4,Why does President Biden believe that investin...,"[ put forward their own proposal.\n\nSo, let’s...",President Biden believes that investing in edu...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True
5,Why does the speaker believe the US should inv...,"[ put forward their own proposal.\n\nSo, let’s...",The speaker believes the US should invest in e...,reasoning,[{'source': 'Speeches/state_of_the_union_04292...,True
6,Can boosting US car & semiconductor production...,[ that comes from stamping products “Made In A...,The text discusses the revitalization of Ameri...,reasoning,[{'source': 'Speeches/state_of_the_union_03012...,True
7,Who does the American Jobs Plan aim to connect...,[. This is the largest jobs plan since World W...,The American Jobs Plan aims to connect every A...,multi_context,[{'source': 'Speeches/state_of_the_union_04292...,True
8,How do the speaker's historical examples of in...,"[ vaccinated, and just cried — cried out of jo...",The speaker argues that public investment in i...,multi_context,[{'source': 'Speeches/state_of_the_union_04292...,True
9,How are veterans being supported with housing ...,[ I never believed it.\n\nThat’s why a year ag...,The speech mentions that veterans are being he...,reasoning,[{'source': 'Speeches/state_of_the_union_02072...,True


In [42]:
# RAGAS metrics guide: https://docs.ragas.io/en/latest/concepts/metrics/index.html#ragas-metrics

# Faithfulness - Measures the factual consistency of the answer to the context based on the question.
# Context_precision - Measures how relevant the retrieved context is to the question, conveying the quality of the retrieval pipeline.
# Answer_relevancy - Measures how relevant the answer is to the question.
# Context_recall - Measures the retriever’s ability to retrieve all necessary information required to answer the question.

from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)

In [44]:
from ragas import evaluate
from datasets import Dataset

In [None]:

# To dict
data = {
    "question": testset_pd["question"].tolist(),
    "answer": answers,
    "contexts": testset_pd["contexts"].tolist(),
    "ground_truth": testset_pd["ground_truth"].tolist()
}

# Convert dict to dataset
dataset = Dataset.from_dict(data)
print(dataset)


In [45]:
# per this ragas thread, need to convert pandas testset to Dataset format for evaluate to work
# https://github.com/explodinggradients/ragas/issues/803
testset_ds = Dataset.from_pandas(testset_pd)

In [46]:
testset_ds

Dataset({
    features: ['question', 'contexts', 'ground_truth', 'evolution_type', 'metadata', 'episode_done'],
    num_rows: 10
})

In [47]:
# generate answer column, per these two issues
# https://github.com/explodinggradients/ragas/issues/1145
# https://github.com/explodinggradients/ragas/issues/1084#issuecomment-2248219601
query_engine = index.as_query_engine(similarity_top_k=10)
#query = "What has the President done related to healthcare?"
#response = query_engine.query(query)

In [49]:
# ToDo: use [llm.invoke(question).content for question in generator["question"]]
answers = [query_engine.query(q) for q in testset_pd['question']]

  warn_deprecated(


In [51]:
answers_r = []
for i in answers:
    answers_r.append(i.response)

In [52]:
answers_r

['The American Rescue Plan provided immediate economic relief to millions of Americans, funded vaccination efforts, and helped keep people in their homes by providing rental assistance. It also provided loans to small businesses to help them reopen and retain employees, delivered food and nutrition assistance to those facing hunger, and allocated funds for cities, states, and counties to hire more police and invest in community violence interruption programs. Additionally, it made significant investments in improving healthcare for veterans, addressing the opioid crisis, and reducing child poverty. \n',
 'The speaker proposes several ways to reform corporate taxes, including raising the corporate minimum tax to at least 21%, implementing a 15% minimum tax rate for corporations, and quadrupling the tax on corporate stock buybacks. \n',
 'The American Jobs Plan will create jobs that will lay thousands of miles of transmission lines needed to build a resilient and fully clean grid. It wil

In [53]:
testset_pd['answer'] = answers_r

In [54]:
testset_pd

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done,answer
0,What specific measures did the American Rescue...,[THE PRESIDENT: Thank you. (Applause.) Thank ...,The provided context mentions that the America...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True,The American Rescue Plan provided immediate ec...
1,The speaker proposes a Jobs and Family Plan th...,"[ put forward their own proposal.\n\nSo, let’s...",The answer to the given question is not presen...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True,The speaker proposes several ways to reform co...
2,How does the American Jobs Plan address climat...,[. This is the largest jobs plan since World W...,The American Jobs Plan addresses climate chang...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True,The American Jobs Plan will create jobs that w...
3,What specific infrastructure projects are incl...,[. This is the largest jobs plan since World W...,The American Jobs Plan includes infrastructure...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True,The American Jobs Plan includes projects to re...
4,Why does President Biden believe that investin...,"[ put forward their own proposal.\n\nSo, let’s...",President Biden believes that investing in edu...,simple,[{'source': 'Speeches/state_of_the_union_04292...,True,President Biden believes that investing in edu...
5,Why does the speaker believe the US should inv...,"[ put forward their own proposal.\n\nSo, let’s...",The speaker believes the US should invest in e...,reasoning,[{'source': 'Speeches/state_of_the_union_04292...,True,The speaker believes the US should invest in e...
6,Can boosting US car & semiconductor production...,[ that comes from stamping products “Made In A...,The text discusses the revitalization of Ameri...,reasoning,[{'source': 'Speeches/state_of_the_union_03012...,True,Boosting US car and semiconductor production c...
7,Who does the American Jobs Plan aim to connect...,[. This is the largest jobs plan since World W...,The American Jobs Plan aims to connect every A...,multi_context,[{'source': 'Speeches/state_of_the_union_04292...,True,The American Jobs Plan aims to connect every A...
8,How do the speaker's historical examples of in...,"[ vaccinated, and just cried — cried out of jo...",The speaker argues that public investment in i...,multi_context,[{'source': 'Speeches/state_of_the_union_04292...,True,The speaker argues that public investment in i...
9,How are veterans being supported with housing ...,[ I never believed it.\n\nThat’s why a year ag...,The speech mentions that veterans are being he...,reasoning,[{'source': 'Speeches/state_of_the_union_02072...,True,The government is helping veterans afford rent...


In [81]:
# ToDo: See whether need to provide RAG llm with just the context in the dataset vs full context...... look into example #2 ([1]) as a test case

In [55]:
testset_ds = Dataset.from_pandas(testset_pd)

In [56]:
testset_ds

Dataset({
    features: ['question', 'contexts', 'ground_truth', 'evolution_type', 'metadata', 'episode_done', 'answer'],
    num_rows: 10
})

In [58]:
ragas_llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", timeout=10)
#embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

In [59]:
# ToDo: Need to add rate limiting
result = evaluate(
    testset_ds,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
    llm = ragas_llm,
    embeddings=embeddings
)

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._achat_with

In [60]:
result

{'context_precision': 0.8571, 'faithfulness': 0.8833, 'answer_relevancy': 0.7234, 'context_recall': 1.0000}

In [None]:
# RAGAS metrics guide: https://docs.ragas.io/en/latest/concepts/metrics/index.html#ragas-metrics

# Faithfulness - Measures the factual consistency of the answer to the context based on the question.
# Context_precision - Measures how relevant the retrieved context is to the question, conveying the quality of the retrieval pipeline.
# Answer_relevancy - Measures how relevant the answer is to the question.
# Context_recall - Measures the retriever’s ability to retrieve all necessary information required to answer the question.