In [1]:
# set do not track variable to RAGAS
# more info: https://github.com/explodinggradients/ragas/issues/49
import os
os.environ["RAGAS_DO_NOT_TRACK"] = "True"

In [2]:
import logging
import sys
import google.generativeai as genai
import pathlib
import textwrap
import ragas
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from llama_index.core import Document, VectorStoreIndex, Settings, StorageContext, load_index_from_storage
#from llama_index import VectorStoreIndex, StorageContext
from llama_index.vector_stores.faiss import FaissVectorStore
import pandas as pd
import faiss

from IPython.display import display
from IPython.display import Markdown

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [3]:
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [4]:
ragas._analytics.do_not_track()

True

In [5]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
#result = llm.invoke("Write me a party invitation to a one year's old's dinosaur birthday party.")

I0000 00:00:1723490604.703276   30961 check_gcp_environment.cc:61] BIOS data file does not exist or cannot be opened.


In [6]:
# create document database
sotu = []
with open("state_of_the_union_030724.txt") as file:
    for line in file:
        nl = line.rstrip()
        if nl != '':
            sotu.append(nl)

In [7]:
sotu[:10]

['Good evening.',
 'Mr. Speaker. Madam Vice President. Members of Congress. My Fellow Americans.',
 'In January 1941, President Franklin Roosevelt came to this chamber to speak to the nation.',
 'He said, “I address you at a moment unprecedented in the history of the Union.”',
 'Hitler was on the march. War was raging in Europe.',
 'President Roosevelt’s purpose was to wake up the Congress and alert the American people that this was no ordinary moment.',
 'Freedom and democracy were under assault in the world.',
 'Tonight I come to the same chamber to address the nation.',
 'Now it is we who face an unprecedented moment in the history of the Union.',
 'And yes, my purpose tonight is to both wake up this Congress, and alert the American people that this is no ordinary moment either.']

In [8]:
documents = [Document(text=line) for line in sotu]

In [9]:
documents[:2]

[Document(id_='034192b4-6167-41ae-8393-b782dc955ed7', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Good evening.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='52254334-50f9-4b29-b6b1-444c0cca069b', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Mr. Speaker. Madam Vice President. Members of Congress. My Fellow Americans.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')]

In [10]:
# following these tutorials
# https://learnbybuilding.ai/tutorials/rag-chatbot-on-podcast-llamaindex-faiss-openai
# https://medium.com/@saurabhgssingh/understanding-rag-building-a-rag-system-from-scratch-with-gemini-api-b11ad9fc1bf7

d = 768 # dimensions of ___, the embedding model that we're going to use
faiss_index = faiss.IndexFlatL2(d)
print(faiss_index.is_trained)

True


In [11]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
Settings.embed_model = embeddings
#vector = embeddings.embed_query("hello, world!")
#vector[:5]

In [12]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [13]:
#from llama_index import ServiceContext, set_global_service_context
#service_context = ServiceContext.from_defaults(llm=llm, embed_model=embededdings)
#set_global_service_context(service_context)

Settings.llm = llm

In [14]:
# uncomment for when you need to re-embed and vectorize documents
# otherwise, doing local loading below
#vector_store = FaissVectorStore(faiss_index=faiss_index)
#storage_context = StorageContext.from_defaults(vector_store=vector_store)
#index = VectorStoreIndex.from_documents(
#    documents, storage_context=storage_context, show_progress=True
#)
# save index to disk
#index.storage_context.persist()
#index

In [15]:
# load index from disk
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir="./storage"
)
index = load_index_from_storage(storage_context=storage_context)

INFO:root:Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.


In [26]:
query_engine = index.as_query_engine(similarity_top_k=10)

In [27]:
query = "What is the President planning related to healthcare?"
response = query_engine.query(query)

In [28]:
response.response

'The President is advocating for several healthcare reforms, including capping the cost of insulin at $35 a month, capping prescription drug costs at $2,000 a year, and giving Medicare the power to negotiate lower prices for drugs. They also mention saving taxpayers money by cutting the federal deficit and reducing health care premiums for working families. \n'

In [30]:
for node in response.source_nodes:
    print(f"{node.get_score()} -> {node.text}")

0.9039260149002075 -> Over one hundred million of you can no longer be denied health insurance because of pre-existing conditions.
0.924292266368866 -> And now I want to cap the cost of insulin at $35 a month for every American who needs it!
0.9246326684951782 -> A president, my predecessor, who failed the most basic duty. Any President owes the American people the duty to care.
0.9358830451965332 -> That’s why we’re launching the first-ever White House Initiative on Women’s Health Research, led by Jill who is doing an incredible job as First Lady.
0.9369264245033264 -> Folks Obamacare, known as the Affordable Care Act is still a very big deal.
0.9414553046226501 -> Now I want to cap prescription drug costs at $2,000 a year for everyone!
0.9473403692245483 -> It’s saving taxpayers money cutting the federal deficit by $160 Billion because Medicare will no longer have to pay exorbitant prices to Big Pharma.
0.9535688161849976 -> I enacted tax credits that save $800 per person per year re

In [None]:
#result = genai.embed_content(
#    model="models/text-embedding-004",
#    content="What is the meaning of life?",
#    task_type="retrieval_document",
#    title="Embedding of single string")

In [31]:
chat_engine = index.as_chat_engine(similarity_top_k=10, chat_mode='context')

In [35]:
query = "What does the President say about Congress?"
response = chat_engine.chat(query)

In [36]:
response.response

'The President says several things about Congress:\n\n* **He urges them to "get this bill done"**, referencing a bill he believes is important for the American people. \n* **He criticizes them for playing politics** and potentially blocking the bill.\n* **He calls on them to "wake up"** and recognize the importance of the current moment.\n* **He tells Congress they must "stand up to Putin"** and pass the Bipartisan National Security Bill. \n'

In [37]:
print(response.response)

The President says several things about Congress:

* **He urges them to "get this bill done"**, referencing a bill he believes is important for the American people. 
* **He criticizes them for playing politics** and potentially blocking the bill.
* **He calls on them to "wake up"** and recognize the importance of the current moment.
* **He tells Congress they must "stand up to Putin"** and pass the Bipartisan National Security Bill. 



In [38]:
for node in response.source_nodes:
    print(f"{node.get_score()} -> {node.text}")

0.7652876377105713 -> Mr. Speaker. Madam Vice President. Members of Congress. My Fellow Americans.
0.7749111652374268 -> Tonight I come to the same chamber to address the nation.
0.8328826427459717 -> A president, my predecessor, who failed the most basic duty. Any President owes the American people the duty to care.
0.8343376517295837 -> And I will always be a president for all Americans!
0.842502236366272 -> My Republican friends you owe it to the American people to get this bill done.
0.8441706895828247 -> And if my predecessor is watching instead of playing politics and pressuring members of Congress to block this bill, join me in telling Congress to pass it!
0.8499408960342407 -> And yes, my purpose tonight is to both wake up this Congress, and alert the American people that this is no ordinary moment either.
0.852456271648407 -> Meanwhile, my predecessor told the NRA he’s proud he did nothing on guns when he was President.
0.8647689819335938 -> I signed a bipartisan budget deal t

In [39]:
chat_engine.chat_history

[ChatMessage(role=<MessageRole.USER: 'user'>, content='What does the President say about his predecessor?', additional_kwargs={}),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='', additional_kwargs={}),
 ChatMessage(role=<MessageRole.USER: 'user'>, content='What does the President say about Congress?', additional_kwargs={}),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='The President says several things about Congress:\n\n* **He urges them to "get this bill done"**, referencing a bill he believes is important for the American people. \n* **He criticizes them for playing politics** and potentially blocking the bill.\n* **He calls on them to "wake up"** and recognize the importance of the current moment.\n* **He tells Congress they must "stand up to Putin"** and pass the Bipartisan National Security Bill. \n', additional_kwargs={})]

In [40]:
query2 = "What else does he say about that bill?"
response2 = chat_engine.chat(query2)

In [41]:
print(response2.response)

Here's what the President says about the bill he wants Congress to pass:

* **It would save lives and bring order to the border.** This suggests it's related to immigration or border security.
* **The Border Patrol Union endorsed the bill.** This adds credibility to the bill's potential effectiveness.
* **He believes a majority of the House and Senate would endorse it.** This indicates he believes it has bipartisan support.

It's important to note that the specific details of the bill are not provided in the context.  We only know it's a "Bipartisan National Security Bill" and the President strongly advocates for its passage. 



In [42]:
for node in response2.source_nodes:
    print(f"{node.get_score()} -> {node.text}")

0.7411553263664246 -> My Republican friends you owe it to the American people to get this bill done.
0.7534066438674927 -> And if my predecessor is watching instead of playing politics and pressuring members of Congress to block this bill, join me in telling Congress to pass it!
0.7781415581703186 -> I’m told my predecessor called Republicans in Congress and demanded they block the bill. He feels it would be a political win for me and a political loser for him.
0.782292902469635 -> I believe that given the opportunity a majority of the House and Senate would endorse it as well.
0.8261321783065796 -> Here at home I’ve signed over 400 bipartisan bills.
0.8330406546592712 -> This bill would save lives and bring order to the border.
0.83488529920578 -> I say this to Congress: we must stand up to Putin. Send me the Bipartisan National Security Bill.
0.8600276708602905 -> But my predecessor and many in this chamber want to take that protection away by repealing the Affordable Care Act I won’