In [1]:
# RAG with Gemini Flash 1.5 LLM and ARES evaluation
# Google Gemini: https://ai.google.dev/gemini-api/docs/models/gemini
# ARES: https://github.com/stanford-futuredata/ARES

In [2]:
# Establish RAG pipeline with Gemini

In [1]:
import os
from IPython.display import display, Markdown
import pandas as pd

import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from llama_index.core import Document, VectorStoreIndex, Settings, StorageContext, load_index_from_storage
from llama_index.vector_stores.faiss import FaissVectorStore
#from llama_index.llms.gemini import Gemini
#from llama_index.embeddings.gemini import GeminiEmbedding

import faiss

In [2]:
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [3]:
# set up local API key
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [4]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash") # old function

In [5]:
# create document database
# using 4 State of the Union speeches, all text from whitehouse.gov briefing room speeches posted online, including a title with the date of the speech
# Example from 2024:
# https://www.whitehouse.gov/briefing-room/speeches-remarks/2024/03/07/remarks-of-president-joe-biden-state-of-the-union-address-as-prepared-for-delivery-2/
sotu = []
newfiles = ["./Speeches/titleedits/state_of_the_union_042921.txt", "./Speeches/titleedits/state_of_the_union_030122.txt", "./Speeches/titleedits/state_of_the_union_020723.txt", "./Speeches/titleedits/state_of_the_union_030724.txt"]
for i in newfiles:
    with open(i) as file:
        for line in file:
            nl = line.rstrip()
            if nl != '':
                sotu.append(nl)

In [6]:
documents = [Document(text=line) for line in sotu]

In [7]:
# Example of a loaded Document line
documents[-1]

Document(id_='52982d5f-5ae1-4b66-9963-20fef542c978', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='May God protect our troops.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')

In [8]:
# Set up the faiss index
d = 768 # dimensions of the input vector of the embedding model that we're going to use; in this case, the google embedding model
faiss_index = faiss.IndexFlatL2(d)
print(faiss_index.is_trained)

True


In [9]:
#llm = Gemini(model="models/gemini-1.5-flash", api_key=os.environ["GOOGLE_API_KEY"])

In [10]:
# set up the embeddings
doc_embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") # optional: task_type="RETRIEVAL_DOCUMENT"
#doc_embeddings = GeminiEmbedding(model="models/text-embedding-004")
Settings.embed_model = doc_embeddings
Settings.llm = llm

In [11]:
# test llm
#response = llm.complete("Write the text for an invitation for a two year old's penguin themed birthday party.")
#print(response)

In [12]:
## uncomment for when you need to re-embed and vectorize documents
## otherwise, doing local loading below
#vector_store = FaissVectorStore(faiss_index=faiss_index)
#storage_context = StorageContext.from_defaults(vector_store=vector_store)
#index = VectorStoreIndex.from_documents(
#    documents, storage_context=storage_context, show_progress=True
#)

In [13]:
# save index to disk
#index.storage_context.persist()
#index

In [11]:
# load index from disk
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir="./storage"
)
# index id 'cef7ae30-ff1e-404a-bce6-85d59ca4b376' uses the speeches with a title that includes the date it was given
index = load_index_from_storage(storage_context=storage_context, index_id='cef7ae30-ff1e-404a-bce6-85d59ca4b376')

In [12]:
# set up query and chat engines
query_engine = index.as_query_engine(similarity_top_k=10)
chat_engine = index.as_chat_engine(similarity_top_k=10, chat_mode='context')

In [13]:
# Example query and response
#query = "What does the President say about his administration's first 100 days and covid-19?"
#response = query_engine.query(query) 

In [15]:
print(response.response)

The President is proud of his administration's progress in fighting the pandemic, citing the successful rollout of COVID-19 vaccines. He highlights that they have surpassed their goal of administering 100 million vaccine shots in 100 days, reaching over 220 million shots. 



In [21]:
# Let's try to get ARES to work with Gemini and our local RAG setup

In [13]:
from ares import ARES

vLLM not imported.


In [14]:
ues_idp_config = {
    "in_domain_prompts_dataset": "ARES_files/nq_few_shot_prompt_for_judge_scoring.tsv",
    "unlabeled_evaluation_set": "ARES_files/nq_unlabeled_output.tsv", 
    "model_choice" : "models/gemini-1.5-flash",
    "request_delay" : 60,
    "documents" : 3
} 

ares = ARES(ues_idp=ues_idp_config)

In [15]:
results = ares.ues_idp()
print(results)

Evaluating large subset with models/gemini-1.5-flash:   0%|          | 0/3 [00:00<?, ?it/s]

configured gemini for context relevance
Testing candidates
test_response
[[Yes]]

configured gemini for answer relevance
Testing response.text
[[Yes]]

configured gemini for answer faithfulness
Testing response.text
[[Yes]]

configured gemini for context relevance
Testing candidates
test_response
[[Yes]]

configured gemini for answer relevance
Testing response.text
[[Yes]]

configured gemini for answer faithfulness
Testing response.text
[[Yes]]

configured gemini for context relevance
Testing candidates
Attempt 1 failed with error: list index out of range
Testing candidates
Attempt 2 failed with error: list index out of range
Testing candidates
Attempt 3 failed with error: list index out of range
Testing candidates
Attempt 4 failed with error: list index out of range
Testing candidates
All attempts failed. Last error was: list index out of range
Number of times did not extract Yes or No: 0
{'Context Relevance Scores': 0.667, 'Answer Faithfulness Scores': 0.667, 'Answer Relevance Scores

In [18]:
results # exact scores repeated on second run

{'Context Relevance Scores': 0.667,
 'Answer Faithfulness Scores': 0.667,
 'Answer Relevance Scores': 0.667}

In [14]:
# document refers to a line in a document_filepath_file; unsure if this setup of 1 large document per line will work
synth_config = { 
    "document_filepaths": ["Speeches/titleedits/Speeches_Docs_ARES.tsv"], # requires tsv file...
    "few_shot_prompt_filename": "datasets/manual_dataset_complete_ares_synthetic.tsv",
    "synthetic_queries_filenames": ["results/synthetic_results.tsv"],
    "model_choice": "models/gemini-1.5-flash", # ex: "google/flan-t5-xxl" 
    "documents_sampled": 4, # was 10000
    "api_model": True
}

ares_synth = ARES(synthetic_query_generator=synth_config)

In [15]:
results = ares_synth.generate_synthetic_data() # stopped here, need to debug/edit to work with Gemini; next is embeddings error below
print(results)

Saving synthetic queries to:  ['results/synthetic_results.tsv']

| Starting Synthetic Query Generation |

Generating positive queries for the first 2 documents...


Generating positive synthetic queries for documents 0 to 2...:   0%|                              | 0/4 [00:00<?, ?it/s]

configured gemini for synthetic query generation


Generating positive synthetic queries for documents 0 to 2...:  25%|█████▌                | 1/4 [00:06<00:18,  6.09s/it]

configured gemini for synthetic query generation
Error generating synthetic queries: 429 Resource has been exhausted (e.g. check quota).


Generating positive synthetic queries for documents 0 to 2...:  50%|███████████           | 2/4 [01:12<01:22, 41.30s/it]

configured gemini for synthetic query generation
Error generating synthetic queries: 429 Resource has been exhausted (e.g. check quota).


Generating positive synthetic queries for documents 0 to 2...:  75%|████████████████▌     | 3/4 [02:17<00:52, 52.42s/it]

configured gemini for synthetic query generation
Error generating synthetic queries: 429 Resource has been exhausted (e.g. check quota).


Generating positive synthetic queries for documents 0 to 2...: 100%|██████████████████████| 4/4 [03:23<00:00, 50.93s/it]


Total queries generated before filtering: 48
Total queries after length filtering: 48
Total queries after deduplication: 30


Generating embeddings...:   0%|                                                                   | 0/2 [00:00<?, ?it/s]

using Gemini text embeddings


Generating embeddings...: 100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  7.45it/s]

using Gemini text embeddings


Generating embeddings...: 100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.97it/s]


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|                                                                                            | 0/30 [00:00<?, ?it/s]

using Gemini text embeddings


  3%|██▊                                                                                 | 1/30 [00:00<00:07,  4.07it/s]

using Gemini text embeddings


  7%|█████▌                                                                              | 2/30 [00:00<00:06,  4.24it/s]

using Gemini text embeddings


 10%|████████▍                                                                           | 3/30 [00:00<00:07,  3.58it/s]

using Gemini text embeddings


 13%|███████████▏                                                                        | 4/30 [00:01<00:06,  3.75it/s]

using Gemini text embeddings


 17%|██████████████                                                                      | 5/30 [00:01<00:06,  3.87it/s]

using Gemini text embeddings


 20%|████████████████▊                                                                   | 6/30 [00:01<00:06,  3.91it/s]

using Gemini text embeddings


 23%|███████████████████▌                                                                | 7/30 [00:01<00:05,  4.06it/s]

using Gemini text embeddings


 27%|██████████████████████▍                                                             | 8/30 [00:02<00:05,  4.14it/s]

using Gemini text embeddings


 30%|█████████████████████████▏                                                          | 9/30 [00:02<00:04,  4.24it/s]

using Gemini text embeddings


 33%|███████████████████████████▋                                                       | 10/30 [00:02<00:04,  4.17it/s]

using Gemini text embeddings


 37%|██████████████████████████████▍                                                    | 11/30 [00:02<00:04,  4.16it/s]

using Gemini text embeddings


 40%|█████████████████████████████████▏                                                 | 12/30 [00:02<00:04,  4.18it/s]

using Gemini text embeddings


 43%|███████████████████████████████████▉                                               | 13/30 [00:03<00:04,  4.12it/s]

using Gemini text embeddings


 47%|██████████████████████████████████████▋                                            | 14/30 [00:03<00:03,  4.11it/s]

using Gemini text embeddings


 50%|█████████████████████████████████████████▌                                         | 15/30 [00:03<00:03,  3.99it/s]

using Gemini text embeddings


 53%|████████████████████████████████████████████▎                                      | 16/30 [00:03<00:03,  4.01it/s]

using Gemini text embeddings


 57%|███████████████████████████████████████████████                                    | 17/30 [00:04<00:03,  4.11it/s]

using Gemini text embeddings


 60%|█████████████████████████████████████████████████▊                                 | 18/30 [00:04<00:02,  4.17it/s]

using Gemini text embeddings


 63%|████████████████████████████████████████████████████▌                              | 19/30 [00:04<00:02,  4.15it/s]

using Gemini text embeddings


 67%|███████████████████████████████████████████████████████▎                           | 20/30 [00:04<00:02,  4.22it/s]

using Gemini text embeddings


 70%|██████████████████████████████████████████████████████████                         | 21/30 [00:05<00:02,  3.77it/s]

using Gemini text embeddings


 73%|████████████████████████████████████████████████████████████▊                      | 22/30 [00:05<00:02,  3.91it/s]

using Gemini text embeddings


 77%|███████████████████████████████████████████████████████████████▋                   | 23/30 [00:05<00:01,  3.66it/s]

using Gemini text embeddings


 80%|██████████████████████████████████████████████████████████████████▍                | 24/30 [00:06<00:01,  3.85it/s]

using Gemini text embeddings


 83%|█████████████████████████████████████████████████████████████████████▏             | 25/30 [00:06<00:01,  3.99it/s]

using Gemini text embeddings


 87%|███████████████████████████████████████████████████████████████████████▉           | 26/30 [00:06<00:01,  3.95it/s]

using Gemini text embeddings


 90%|██████████████████████████████████████████████████████████████████████████▋        | 27/30 [00:06<00:00,  3.71it/s]

using Gemini text embeddings


 93%|█████████████████████████████████████████████████████████████████████████████▍     | 28/30 [00:07<00:00,  3.72it/s]

using Gemini text embeddings


 97%|████████████████████████████████████████████████████████████████████████████████▏  | 29/30 [00:07<00:00,  3.42it/s]

using Gemini text embeddings


100%|███████████████████████████████████████████████████████████████████████████████████| 30/30 [00:07<00:00,  3.88it/s]


Total queries after filtering: 30
Generating negative queries for the remaining 2 documents...
Saved synthetic queries to: results/synthetic_results.tsv

| Synthetic query generation completed. |

Total queries saved: 6 (Positive: 2, Duplicate: 2, Negative: 2)

| Beginning answer generation! |

Generating answers for 2 queries...


Generating answers... (models/gemini-1.5-flash):   0%|                                            | 0/2 [00:00<?, ?it/s]

configured gemini for synthetic answer generation


Generating answers... (models/gemini-1.5-flash): 100%|████████████████████████████████████| 2/2 [00:01<00:00,  1.69it/s]

configured gemini for synthetic answer generation


Generating answers... (models/gemini-1.5-flash): 100%|████████████████████████████████████| 2/2 [00:02<00:00,  1.27s/it]

Generated answers for 2 queries.
Saved positive answers to: results/synthetic_results.tsv
Generating negative answers for the second chunk of queries...
Saved answers to: results/synthetic_results.tsv
Completed synthetic generation!
Saved synthetic queries file to: results/synthetic_results.tsv

| Answer generation and processing completed. |

None





In [1]:
# todo: try out ARES ues idp config with my dataset/RAG
# try out ARES with their testsets

In [17]:
ues_idp_config = {
    "in_domain_prompts_dataset": "ARES_files/nq_few_shot_prompt_for_judge_scoring.tsv",
    "unlabeled_evaluation_set": "ARES_files/nq_unlabeled_output.tsv", 
    "model_choice" : "models/gemini-1.5-flash",
    "request_delay" : 60,
    "documents" : 3
} 

ares = ARES(ues_idp=ues_idp_config)

In [None]:
results = ares.ues_idp()
print(results)

In [15]:
ppi_config = { 
    "evaluation_datasets": ['ARES_files/nq_unlabeled_output.tsv'], 
    "few_shot_examples_filepath": "ARES_files/nq_few_shot_prompt_for_judge_scoring.tsv",
    "llm_judge": "models/gemini-1.5-flash",
    "labels": ["Context_Relevance_Label"], 
    "gold_label_paths": ["ARES_files/nq_labeled_output.tsv"]
    #"checkpoints": ["None"] #["/home/amber/ARES/checkpoints/ares_context_relevance_general_checkpoint_V1.1.pt"]
}

ares_ppi = ARES(ppi=ppi_config)

In [16]:
results = ares_ppi.evaluate_RAG()
print(results)

--------------------------------------------------------
Evaluation Sets: ['ARES_files/nq_unlabeled_output.tsv']
Checkpoints: False
Labels: ['Context_Relevance_Label']
--------------------------------------------------------
configured gemini api key for LLMJudge
Performing Model scoring!


  0%|                                                                                          | 0/4421 [00:00<?, ?it/s]

configured gemini for context relevance


  0%|                                                                                  | 2/4421 [00:00<20:22,  3.62it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  0%|                                                                                  | 3/4421 [00:00<24:23,  3.02it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance
Attempt 1 failed with error: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.
Attempt 2 failed with error: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.
Attempt 3 failed with error: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.
Attempt 4 failed with error: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check t

  0%|                                                                              | 4/4421 [04:03<111:13:31, 90.65s/it]

All attempts failed. Last error was: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.
configured gemini for context relevance


  0%|                                                                               | 5/4421 [04:03<72:47:48, 59.35s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  0%|                                                                               | 6/4421 [04:04<48:52:38, 39.85s/it]

Testing response.text
[[No]] 

Testing candidates
['[[No]] \n']
configured gemini for context relevance


  0%|▏                                                                              | 7/4421 [04:04<33:18:37, 27.17s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  0%|▏                                                                              | 8/4421 [04:05<23:00:58, 18.78s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  0%|▏                                                                              | 9/4421 [04:05<16:01:42, 13.08s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  0%|▏                                                                             | 10/4421 [04:06<11:15:28,  9.19s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  0%|▏                                                                              | 11/4421 [04:06<7:57:19,  6.49s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  0%|▏                                                                              | 12/4421 [04:07<5:42:49,  4.67s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  0%|▏                                                                              | 13/4421 [04:07<4:10:27,  3.41s/it]

Testing response.text
[[Yes]] 

Testing candidates
['[[Yes]] \n']
configured gemini for context relevance


  0%|▎                                                                              | 14/4421 [04:08<3:06:12,  2.54s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  0%|▎                                                                              | 15/4421 [04:08<2:19:10,  1.90s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  0%|▎                                                                              | 16/4421 [04:08<1:46:19,  1.45s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  0%|▎                                                                              | 17/4421 [04:09<1:23:22,  1.14s/it]

Testing response.text
[[Yes]] 

Testing candidates
['[[Yes]] \n']
configured gemini for context relevance


  0%|▎                                                                              | 18/4421 [04:09<1:07:19,  1.09it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  0%|▎                                                                              | 19/4421 [04:10<1:00:40,  1.21it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  0%|▎                                                                                | 20/4421 [04:10<51:26,  1.43it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  0%|▍                                                                                | 21/4421 [04:11<45:01,  1.63it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance
Attempt 1 failed with error: 429 Resource has been exhausted (e.g. check quota).


  0%|▍                                                                             | 22/4421 [05:11<22:43:04, 18.59s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▍                                                                             | 23/4421 [05:12<16:05:05, 13.17s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▍                                                                             | 24/4421 [05:12<11:26:38,  9.37s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▍                                                                              | 25/4421 [05:13<8:09:32,  6.68s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▍                                                                              | 26/4421 [05:13<5:51:34,  4.80s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▍                                                                              | 27/4421 [05:14<4:17:17,  3.51s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▌                                                                              | 28/4421 [05:14<3:11:29,  2.62s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▌                                                                              | 29/4421 [05:15<2:22:49,  1.95s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▌                                                                              | 30/4421 [05:15<1:48:55,  1.49s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▌                                                                              | 31/4421 [05:15<1:25:15,  1.17s/it]

Testing response.text
[[No]] 

Testing candidates
['[[No]] \n']
configured gemini for context relevance


  1%|▌                                                                              | 32/4421 [05:16<1:10:52,  1.03it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▌                                                                              | 33/4421 [05:16<1:00:50,  1.20it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▌                                                                                | 34/4421 [05:17<51:35,  1.42it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▋                                                                                | 35/4421 [05:17<45:03,  1.62it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▋                                                                                | 36/4421 [05:18<40:30,  1.80it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance
Attempt 1 failed with error: 429 Resource has been exhausted (e.g. check quota).


  1%|▋                                                                             | 37/4421 [06:18<22:37:12, 18.57s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▋                                                                             | 38/4421 [06:19<16:01:02, 13.16s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▋                                                                             | 39/4421 [06:19<11:21:33,  9.33s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▋                                                                              | 40/4421 [06:20<8:08:12,  6.69s/it]

Testing response.text
[[No]] 

Testing candidates
['[[No]] \n']
configured gemini for context relevance


  1%|▋                                                                              | 41/4421 [06:20<5:50:36,  4.80s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▊                                                                              | 42/4421 [06:20<4:14:21,  3.49s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▊                                                                              | 43/4421 [06:21<3:06:47,  2.56s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▊                                                                              | 44/4421 [06:21<2:19:52,  1.92s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▊                                                                              | 45/4421 [06:22<1:46:51,  1.47s/it]

Testing response.text
[[Yes]] 

Testing candidates
['[[Yes]] \n']
configured gemini for context relevance


  1%|▊                                                                              | 46/4421 [06:22<1:26:00,  1.18s/it]

Testing response.text
[[No]] 

Testing candidates
['[[No]] \n']
configured gemini for context relevance


  1%|▊                                                                              | 47/4421 [06:23<1:09:06,  1.05it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▉                                                                                | 48/4421 [06:23<57:19,  1.27it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▉                                                                                | 49/4421 [06:23<49:05,  1.48it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▉                                                                                | 50/4421 [06:24<43:17,  1.68it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|▉                                                                                | 51/4421 [06:24<37:11,  1.96it/s]

Testing response.text
[[No]] 

Testing candidates
['[[No]] \n']
configured gemini for context relevance


  1%|▉                                                                                | 52/4421 [06:25<37:01,  1.97it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance
Attempt 1 failed with error: 429 Resource has been exhausted (e.g. check quota).


  1%|▉                                                                             | 53/4421 [07:25<22:27:41, 18.51s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|▉                                                                             | 54/4421 [07:26<15:52:08, 13.08s/it]

Testing response.text
[[Yes]] 

Testing candidates
['[[Yes]] \n']
configured gemini for context relevance


  1%|▉                                                                             | 55/4421 [07:26<11:15:15,  9.28s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|█                                                                              | 56/4421 [07:26<8:01:32,  6.62s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|█                                                                              | 57/4421 [07:27<5:48:08,  4.79s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|█                                                                              | 58/4421 [07:27<4:12:32,  3.47s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|█                                                                              | 59/4421 [07:28<3:05:45,  2.56s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|█                                                                              | 60/4421 [07:28<2:18:53,  1.91s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|█                                                                              | 61/4421 [07:29<1:46:08,  1.46s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|█                                                                              | 62/4421 [07:29<1:23:12,  1.15s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|█▏                                                                             | 63/4421 [07:29<1:07:08,  1.08it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  1%|█▏                                                                               | 64/4421 [07:30<55:54,  1.30it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|█▏                                                                               | 65/4421 [07:30<48:03,  1.51it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  1%|█▏                                                                               | 66/4421 [07:31<42:34,  1.70it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▏                                                                               | 67/4421 [07:31<38:40,  1.88it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  2%|█▏                                                                               | 68/4421 [07:31<35:59,  2.02it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance
Attempt 1 failed with error: 429 Resource has been exhausted (e.g. check quota).


  2%|█▏                                                                            | 69/4421 [08:32<22:24:20, 18.53s/it]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  2%|█▏                                                                            | 70/4421 [08:32<15:49:35, 13.09s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▎                                                                            | 71/4421 [08:33<11:13:37,  9.29s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▎                                                                             | 72/4421 [08:33<8:00:19,  6.63s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▎                                                                             | 73/4421 [08:34<5:45:08,  4.76s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▎                                                                             | 74/4421 [08:34<4:10:21,  3.46s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▎                                                                             | 75/4421 [08:35<3:04:10,  2.54s/it]

Testing response.text
[[No]] 

Testing candidates
['[[No]] \n']
configured gemini for context relevance


  2%|█▎                                                                             | 76/4421 [08:35<2:17:43,  1.90s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▍                                                                             | 77/4421 [08:35<1:45:17,  1.45s/it]

Testing response.text
[[No]] 

Testing candidates
['[[No]] \n']
configured gemini for context relevance


  2%|█▍                                                                             | 78/4421 [08:36<1:22:35,  1.14s/it]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▍                                                                             | 79/4421 [08:36<1:06:41,  1.09it/s]

Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for context relevance


  2%|█▍                                                                             | 80/4421 [08:37<1:00:00,  1.21it/s]

Testing response.text
[[Yes]] 

Testing candidates
['[[Yes]] \n']
configured gemini for context relevance


  2%|█▍                                                                               | 81/4421 [08:37<50:53,  1.42it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▌                                                                               | 82/4421 [08:38<48:57,  1.48it/s]

Testing response.text
[[No]]

Testing candidates
['[[No]]\n']
configured gemini for context relevance


  2%|█▌                                                                               | 83/4421 [08:38<47:32,  1.52it/s]

Testing response.text
[[Yes]] 

Testing candidates
['[[Yes]] \n']
configured gemini for context relevance
Attempt 1 failed with error: 429 Resource has been exhausted (e.g. check quota).


  2%|█▍                                                                             | 83/4421 [09:15<8:03:28,  6.69s/it]


KeyboardInterrupt: 

In [16]:
%tb

SystemExit: TOGETHER_API_KEY environment variable not set. Please set the variable.