In [1]:
# RAG with Gemini Flash 1.5 LLM and ARES evaluation
# Google Gemini: https://ai.google.dev/gemini-api/docs/models/gemini
# ARES: https://github.com/stanford-futuredata/ARES

In [2]:
# Establish RAG pipeline with Gemini

In [1]:
import os
from IPython.display import display, Markdown
import pandas as pd

import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from llama_index.core import Document, VectorStoreIndex, Settings, StorageContext, load_index_from_storage
from llama_index.vector_stores.faiss import FaissVectorStore
#from llama_index.llms.gemini import Gemini
#from llama_index.embeddings.gemini import GeminiEmbedding

import faiss

In [2]:
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [3]:
# set up local API key
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [4]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash") # old function

In [5]:
# create document database
# using 4 State of the Union speeches, all text from whitehouse.gov briefing room speeches posted online, including a title with the date of the speech
# Example from 2024:
# https://www.whitehouse.gov/briefing-room/speeches-remarks/2024/03/07/remarks-of-president-joe-biden-state-of-the-union-address-as-prepared-for-delivery-2/
sotu = []
newfiles = ["./Speeches/titleedits/state_of_the_union_042921.txt", "./Speeches/titleedits/state_of_the_union_030122.txt", "./Speeches/titleedits/state_of_the_union_020723.txt", "./Speeches/titleedits/state_of_the_union_030724.txt"]
for i in newfiles:
    with open(i) as file:
        for line in file:
            nl = line.rstrip()
            if nl != '':
                sotu.append(nl)

In [6]:
documents = [Document(text=line) for line in sotu]

In [7]:
# Example of a loaded Document line
documents[-1]

Document(id_='e982b1b7-8145-4dd8-8be5-240f36964602', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='May God protect our troops.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')

In [8]:
# Set up the faiss index
d = 768 # dimensions of the input vector of the embedding model that we're going to use; in this case, the google embedding model
faiss_index = faiss.IndexFlatL2(d)
print(faiss_index.is_trained)

True


In [9]:
#llm = Gemini(model="models/gemini-1.5-flash", api_key=os.environ["GOOGLE_API_KEY"])

In [10]:
# set up the embeddings
doc_embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") # optional: task_type="RETRIEVAL_DOCUMENT"
#doc_embeddings = GeminiEmbedding(model="models/text-embedding-004")
Settings.embed_model = doc_embeddings
Settings.llm = llm

In [11]:
# test llm
#response = llm.complete("Write the text for an invitation for a two year old's penguin themed birthday party.")
#print(response)

In [12]:
## uncomment for when you need to re-embed and vectorize documents
## otherwise, doing local loading below
#vector_store = FaissVectorStore(faiss_index=faiss_index)
#storage_context = StorageContext.from_defaults(vector_store=vector_store)
#index = VectorStoreIndex.from_documents(
#    documents, storage_context=storage_context, show_progress=True
#)

In [13]:
# save index to disk
#index.storage_context.persist()
#index

In [11]:
# load index from disk
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir="./storage"
)
# index id 'cef7ae30-ff1e-404a-bce6-85d59ca4b376' uses the speeches with a title that includes the date it was given
index = load_index_from_storage(storage_context=storage_context, index_id='cef7ae30-ff1e-404a-bce6-85d59ca4b376')

In [12]:
# set up query and chat engines
query_engine = index.as_query_engine(similarity_top_k=10)
chat_engine = index.as_chat_engine(similarity_top_k=10, chat_mode='context')

In [13]:
# Example query and response
#query = "What does the President say about his administration's first 100 days and covid-19?"
#response = query_engine.query(query) 

In [15]:
print(response.response)

The President is proud of his administration's progress in fighting the pandemic, citing the successful rollout of COVID-19 vaccines. He highlights that they have surpassed their goal of administering 100 million vaccine shots in 100 days, reaching over 220 million shots. 



In [21]:
# Let's try to get ARES to work with Gemini and our local RAG setup

In [13]:
from ares import ARES

vLLM not imported.


In [13]:
ues_idp_config = {
    "in_domain_prompts_dataset": "ARES_files/nq_few_shot_prompt_for_judge_scoring.tsv",
    "unlabeled_evaluation_set": "ARES_files/nq_unlabeled_output.tsv", 
    "model_choice" : "models/gemini-1.5-flash",
    "request_delay" : 60,
    "documents" : 3
} 

ares = ARES(ues_idp=ues_idp_config)

In [18]:
results = ares.ues_idp()
print(results)

Evaluating large subset with models/gemini-1.5-flash:   0%|          | 0/3 [00:00<?, ?it/s]

configured gemini for context relevance
Testing response.text
[[Yes]] 

Testing candidates
['[[Yes]] \n']
configured gemini for answer relevance
Testing response.text
[[Yes]]

configured gemini for answer faithfulness
Testing response.text
[[Yes]]

configured gemini for context relevance
Testing response.text
[[Yes]]

Testing candidates
['[[Yes]]\n']
configured gemini for answer relevance
Testing response.text
[[Yes]]

configured gemini for answer faithfulness
Testing response.text
[[Yes]]

configured gemini for context relevance
Attempt 1 failed with error: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.
Attempt 2 failed with error: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the re

In [18]:
results # exact scores repeated on second run

{'Context Relevance Scores': 0.667,
 'Answer Faithfulness Scores': 0.667,
 'Answer Relevance Scores': 0.667}

In [14]:
# stopped here: Need to generate synthetic question/answer/document file

# document refers to a line in a document_filepath_file; unsure if this setup of 1 large document per line will work
synth_config = { 
    "document_filepaths": ["Speeches/titleedits/Speeches_Docs6.tsv"], # requires tsv file...
    "few_shot_prompt_filename": "datasets/manual_dataset_complete_ares_synthetic.tsv",
    "synthetic_queries_filenames": ["results/synthetic_results.tsv"],
    "model_choice": "models/gemini-1.5-flash", # ex: "google/flan-t5-xxl" 
    "documents_sampled": 4, # was 10000
    "api_model": True
}

ares_synth = ARES(synthetic_query_generator=synth_config)

In [15]:
results = ares_synth.generate_synthetic_data() # stopped here, need to debug/edit to work with Gemini; next is embeddings error below
print(results)

Saving synthetic queries to:  ['results/synthetic_results.tsv']

| Starting Synthetic Query Generation |

Generating positive queries for the first 2 documents...


Generating positive synthetic queries for documents 0 to 2...:   0%|                              | 0/4 [00:00<?, ?it/s]

configured gemini for synthetic query generation


Generating positive synthetic queries for documents 0 to 2...:  25%|█████▌                | 1/4 [00:06<00:18,  6.09s/it]

configured gemini for synthetic query generation
Error generating synthetic queries: 429 Resource has been exhausted (e.g. check quota).


Generating positive synthetic queries for documents 0 to 2...:  50%|███████████           | 2/4 [01:12<01:22, 41.30s/it]

configured gemini for synthetic query generation
Error generating synthetic queries: 429 Resource has been exhausted (e.g. check quota).


Generating positive synthetic queries for documents 0 to 2...:  75%|████████████████▌     | 3/4 [02:17<00:52, 52.42s/it]

configured gemini for synthetic query generation
Error generating synthetic queries: 429 Resource has been exhausted (e.g. check quota).


Generating positive synthetic queries for documents 0 to 2...: 100%|██████████████████████| 4/4 [03:23<00:00, 50.93s/it]


Total queries generated before filtering: 48
Total queries after length filtering: 48
Total queries after deduplication: 30


Generating embeddings...:   0%|                                                                   | 0/2 [00:00<?, ?it/s]

using Gemini text embeddings


Generating embeddings...: 100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  7.45it/s]

using Gemini text embeddings


Generating embeddings...: 100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.97it/s]


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|                                                                                            | 0/30 [00:00<?, ?it/s]

using Gemini text embeddings


  3%|██▊                                                                                 | 1/30 [00:00<00:07,  4.07it/s]

using Gemini text embeddings


  7%|█████▌                                                                              | 2/30 [00:00<00:06,  4.24it/s]

using Gemini text embeddings


 10%|████████▍                                                                           | 3/30 [00:00<00:07,  3.58it/s]

using Gemini text embeddings


 13%|███████████▏                                                                        | 4/30 [00:01<00:06,  3.75it/s]

using Gemini text embeddings


 17%|██████████████                                                                      | 5/30 [00:01<00:06,  3.87it/s]

using Gemini text embeddings


 20%|████████████████▊                                                                   | 6/30 [00:01<00:06,  3.91it/s]

using Gemini text embeddings


 23%|███████████████████▌                                                                | 7/30 [00:01<00:05,  4.06it/s]

using Gemini text embeddings


 27%|██████████████████████▍                                                             | 8/30 [00:02<00:05,  4.14it/s]

using Gemini text embeddings


 30%|█████████████████████████▏                                                          | 9/30 [00:02<00:04,  4.24it/s]

using Gemini text embeddings


 33%|███████████████████████████▋                                                       | 10/30 [00:02<00:04,  4.17it/s]

using Gemini text embeddings


 37%|██████████████████████████████▍                                                    | 11/30 [00:02<00:04,  4.16it/s]

using Gemini text embeddings


 40%|█████████████████████████████████▏                                                 | 12/30 [00:02<00:04,  4.18it/s]

using Gemini text embeddings


 43%|███████████████████████████████████▉                                               | 13/30 [00:03<00:04,  4.12it/s]

using Gemini text embeddings


 47%|██████████████████████████████████████▋                                            | 14/30 [00:03<00:03,  4.11it/s]

using Gemini text embeddings


 50%|█████████████████████████████████████████▌                                         | 15/30 [00:03<00:03,  3.99it/s]

using Gemini text embeddings


 53%|████████████████████████████████████████████▎                                      | 16/30 [00:03<00:03,  4.01it/s]

using Gemini text embeddings


 57%|███████████████████████████████████████████████                                    | 17/30 [00:04<00:03,  4.11it/s]

using Gemini text embeddings


 60%|█████████████████████████████████████████████████▊                                 | 18/30 [00:04<00:02,  4.17it/s]

using Gemini text embeddings


 63%|████████████████████████████████████████████████████▌                              | 19/30 [00:04<00:02,  4.15it/s]

using Gemini text embeddings


 67%|███████████████████████████████████████████████████████▎                           | 20/30 [00:04<00:02,  4.22it/s]

using Gemini text embeddings


 70%|██████████████████████████████████████████████████████████                         | 21/30 [00:05<00:02,  3.77it/s]

using Gemini text embeddings


 73%|████████████████████████████████████████████████████████████▊                      | 22/30 [00:05<00:02,  3.91it/s]

using Gemini text embeddings


 77%|███████████████████████████████████████████████████████████████▋                   | 23/30 [00:05<00:01,  3.66it/s]

using Gemini text embeddings


 80%|██████████████████████████████████████████████████████████████████▍                | 24/30 [00:06<00:01,  3.85it/s]

using Gemini text embeddings


 83%|█████████████████████████████████████████████████████████████████████▏             | 25/30 [00:06<00:01,  3.99it/s]

using Gemini text embeddings


 87%|███████████████████████████████████████████████████████████████████████▉           | 26/30 [00:06<00:01,  3.95it/s]

using Gemini text embeddings


 90%|██████████████████████████████████████████████████████████████████████████▋        | 27/30 [00:06<00:00,  3.71it/s]

using Gemini text embeddings


 93%|█████████████████████████████████████████████████████████████████████████████▍     | 28/30 [00:07<00:00,  3.72it/s]

using Gemini text embeddings


 97%|████████████████████████████████████████████████████████████████████████████████▏  | 29/30 [00:07<00:00,  3.42it/s]

using Gemini text embeddings


100%|███████████████████████████████████████████████████████████████████████████████████| 30/30 [00:07<00:00,  3.88it/s]


Total queries after filtering: 30
Generating negative queries for the remaining 2 documents...
Saved synthetic queries to: results/synthetic_results.tsv

| Synthetic query generation completed. |

Total queries saved: 6 (Positive: 2, Duplicate: 2, Negative: 2)

| Beginning answer generation! |

Generating answers for 2 queries...


Generating answers... (models/gemini-1.5-flash):   0%|                                            | 0/2 [00:00<?, ?it/s]

configured gemini for synthetic answer generation


Generating answers... (models/gemini-1.5-flash): 100%|████████████████████████████████████| 2/2 [00:01<00:00,  1.69it/s]

configured gemini for synthetic answer generation


Generating answers... (models/gemini-1.5-flash): 100%|████████████████████████████████████| 2/2 [00:02<00:00,  1.27s/it]

Generated answers for 2 queries.
Saved positive answers to: results/synthetic_results.tsv
Generating negative answers for the second chunk of queries...
Saved answers to: results/synthetic_results.tsv
Completed synthetic generation!
Saved synthetic queries file to: results/synthetic_results.tsv

| Answer generation and processing completed. |

None



