In [1]:
# RAG with Gemini Flash 1.5 LLM and ARES evaluation
# Google Gemini: https://ai.google.dev/gemini-api/docs/models/gemini
# ARES: https://github.com/stanford-futuredata/ARES

In [2]:
# Establish RAG pipeline with Gemini

In [3]:
import os
from IPython.display import display, Markdown
import pandas as pd

import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from llama_index.core import Document, VectorStoreIndex, Settings, StorageContext, load_index_from_storage
from llama_index.vector_stores.faiss import FaissVectorStore
#from llama_index.llms.gemini import Gemini
#from llama_index.embeddings.gemini import GeminiEmbedding

import faiss

In [4]:
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [5]:
# set up local API key
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [6]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash") # old

In [7]:
# create document database
# using 4 State of the Union speeches, all text from whitehouse.gov briefing room speeches posted online, including a title with the date of the speech
# Example from 2024:
# https://www.whitehouse.gov/briefing-room/speeches-remarks/2024/03/07/remarks-of-president-joe-biden-state-of-the-union-address-as-prepared-for-delivery-2/
sotu = []
newfiles = ["./Speeches/titleedits/state_of_the_union_042921.txt", "./Speeches/titleedits/state_of_the_union_030122.txt", "./Speeches/titleedits/state_of_the_union_020723.txt", "./Speeches/titleedits/state_of_the_union_030724.txt"]
for i in newfiles:
    with open(i) as file:
        for line in file:
            nl = line.rstrip()
            if nl != '':
                sotu.append(nl)

In [8]:
documents = [Document(text=line) for line in sotu]

In [9]:
# Example of a loaded Document line
documents[-1]

Document(id_='6b97bcac-79d6-4bf9-a0ed-1b9ec8a23f5f', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='May God protect our troops.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')

In [10]:
# Set up the faiss index
d = 768 # dimensions of the input vector of the embedding model that we're going to use; in this case, the google embedding model
faiss_index = faiss.IndexFlatL2(d)
print(faiss_index.is_trained)

True


In [11]:
#llm = Gemini(model="models/gemini-1.5-flash", api_key=os.environ["GOOGLE_API_KEY"])

In [12]:
# set up the embeddings
doc_embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") # optional: task_type="RETRIEVAL_DOCUMENT"
#doc_embeddings = GeminiEmbedding(model="models/text-embedding-004")
Settings.embed_model = doc_embeddings
Settings.llm = llm

In [12]:
# test llm
#response = llm.complete("Write the text for an invitation for a two year old's penguin themed birthday party.")
#print(response)

## Waddle On Over! 🐧

You're invited to celebrate [Child's Name]'s 2nd Birthday!

Join us for a penguin-tastic party filled with fun, games, and treats!

**Date:** [Date of party]
**Time:** [Time of party]
**Location:** [Location of party]

**Dress Code:** Come dressed in your best penguin attire! 🐧

**RSVP:** Please let us know if you can join the fun by [RSVP date]

We can't wait to celebrate with you!

**[Contact information]** 

**P.S.**  Please note that this is a child-friendly event. 



In [13]:
## uncomment for when you need to re-embed and vectorize documents
## otherwise, doing local loading below
#vector_store = FaissVectorStore(faiss_index=faiss_index)
#storage_context = StorageContext.from_defaults(vector_store=vector_store)
#index = VectorStoreIndex.from_documents(
#    documents, storage_context=storage_context, show_progress=True
#)

In [14]:
# save index to disk
#index.storage_context.persist()
#index

In [13]:
# load index from disk
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir="./storage"
)
# index id 'cef7ae30-ff1e-404a-bce6-85d59ca4b376' uses the speeches with a title that includes the date it was given
index = load_index_from_storage(storage_context=storage_context, index_id='cef7ae30-ff1e-404a-bce6-85d59ca4b376')

In [14]:
# set up query and chat engines
query_engine = index.as_query_engine(similarity_top_k=10)
chat_engine = index.as_chat_engine(similarity_top_k=10, chat_mode='context')

In [15]:
# Example query and response
query = "What does the President say about his administration's first 100 days and covid-19?"
response = query_engine.query(query)

  warn_deprecated(


In [16]:
print(response.response)

The President states that America has made great progress in fighting the pandemic, exceeding his goal of administering 100 million COVID-19 vaccine shots in 100 days by providing over 220 million shots.  He views this as one of the greatest logistical achievements in the country's history. 



In [21]:
# Let's try to get ARES to work with Gemini and our local RAG setup

In [15]:
from ares import ARES

vLLM not imported.


In [16]:
ues_idp_config = {
    "in_domain_prompts_dataset": "ARES_files/nq_few_shot_prompt_for_judge_scoring.tsv",
    "unlabeled_evaluation_set": "ARES_files/nq_unlabeled_output.tsv", 
    "model_choice" : "models/gemini-1.5-flash",
    "request_delay" : 60
} 

ares = ARES(ues_idp=ues_idp_config)

In [17]:
results = ares.ues_idp()
print(results)

Evaluating large subset with models/gemini-1.5-flash:   0%|          | 0/6189 [00:00<?, ?it/s]

configured gemini for context relevance
Attempt 1 failed with error: "Unable to determine the intended type of the `dict`. For `Content`, a 'parts' key is expected. For `Part`, either an 'inline_data' or a 'text' key is expected. For `Blob`, both 'mime_type' and 'data' keys are expected. However, the provided dictionary has the following keys: ['Content']"
Attempt 2 failed with error: "Unable to determine the intended type of the `dict`. For `Content`, a 'parts' key is expected. For `Part`, either an 'inline_data' or a 'text' key is expected. For `Blob`, both 'mime_type' and 'data' keys are expected. However, the provided dictionary has the following keys: ['Content']"


KeyboardInterrupt: 