In [None]:
import os
import logging
import sys
from IPython.display import display, Markdown
import pandas as pd
from typing import List
from pydantic import BaseModel, ConfigDict
import instructor
from datasets import Dataset

import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from llama_index.core import Document, VectorStoreIndex, Settings, StorageContext, load_index_from_storage
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss

import deepeval
from deepeval.models import DeepEvalBaseLLM, DeepEvalBaseEmbeddingModel
from deepeval.test_case import LLMTestCase
from deepeval.dataset import EvaluationDataset
from deepeval.synthesizer import Synthesizer
from deepeval import evaluate
from deepeval.evaluate import TestResult, print_test_result
from deepeval.metrics import (
    AnswerRelevancyMetric,
    ContextualPrecisionMetric,
    ContextualRecallMetric,
    ContextualRelevancyMetric,
    FaithfulnessMetric
)
from deepeval.metrics.ragas import (
    RagasMetric,
    RAGASAnswerRelevancyMetric,
    RAGASFaithfulnessMetric, 
    RAGASContextualRecallMetric,
    RAGASContextualPrecisionMetric,
    RAGASContextualRelevancyMetric
)

In [3]:
# Environmental variable to opt out of DeepEval tracking telemetry data
os.environ["DEEPEVAL_TELEMETRY_OPT_OUT"] = "YES"

In [4]:
deepeval.telemetry_opt_out()

True

In [5]:
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [6]:
# set up local API key
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [7]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

In [7]:
# create document database
# using 4 State of the Union speeches, all text from whitehouse.gov briefing room speeches posted online, including a title with the date of the speech
# Example from 2024:
# https://www.whitehouse.gov/briefing-room/speeches-remarks/2024/03/07/remarks-of-president-joe-biden-state-of-the-union-address-as-prepared-for-delivery-2/
sotu = []
newfiles = ["./Speeches/titleedits/state_of_the_union_042921.txt", "./Speeches/titleedits/state_of_the_union_030122.txt", "./Speeches/titleedits/state_of_the_union_020723.txt", "./Speeches/titleedits/state_of_the_union_030724.txt"]
for i in newfiles:
    with open(i) as file:
        for line in file:
            nl = line.rstrip()
            if nl != '':
                sotu.append(nl)

In [8]:
documents = [Document(text=line) for line in sotu]

In [12]:
documents[0]

Document(id_='830598f6-ba9e-4767-80ab-b946afb17118', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='State of the Union Address given by President Biden on April 29, 2021', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')

In [7]:
# Set up the faiss index
d = 768 # dimensions of the input vector of the embedding model that we're going to use; in this case, the google embedding model
faiss_index = faiss.IndexFlatL2(d)
print(faiss_index.is_trained)

True


In [8]:
# set up the embeddings
doc_embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") # optional: task_type="RETRIEVAL_DOCUMENT"
Settings.embed_model = doc_embeddings
Settings.llm = llm

In [9]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [16]:
## uncomment for when you need to re-embed and vectorize documents
## otherwise, doing local loading below
#vector_store = FaissVectorStore(faiss_index=faiss_index)
#storage_context = StorageContext.from_defaults(vector_store=vector_store)
#index = VectorStoreIndex.from_documents(
#    documents, storage_context=storage_context, show_progress=True
#)


Parsing nodes:   0%|          | 0/1464 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/1464 [00:00<?, ?it/s]

In [17]:
index.index_id

'3d3c99c5-aa1c-42d7-a9ce-c4bb12fbc6d5'

In [18]:
## save index to disk
index.storage_context.persist(persist_dir="./storage")
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7f128c3bdb10>

In [19]:
index.index_id

'3d3c99c5-aa1c-42d7-a9ce-c4bb12fbc6d5'

In [None]:
# index id '3d3c99c5-aa1c-42d7-a9ce-c4bb12fbc6d5' in persist_dir "./storage" uses all 4 of the speeches with a title that includes the date it was given
# index id 'c9cfc851-156f-41a3-96a9-ff1516a65a8e' in persist_dir "./storage2021" just uses the 2021 speech with a title that includes the date it was given
# index id '354f0f60-3eb3-46c1-a31a-b854a7f4536c' in persist_dir "./storage2022" just uses the 2022 speech with a title that includes the date it was given
# index id '48050557-05c2-4e05-a610-75aa414348a7' in persist_dir "./storage2023" just uses the 2023 speech with a title that includes the date it was given
# index id 'cab605dd-ff55-4514-9571-c682fc1fd4b2' in persist_dir "./storage2024" just uses the 2024 speech with a title that includes the date it was given

In [10]:
# load index from disk
indexid = '3d3c99c5-aa1c-42d7-a9ce-c4bb12fbc6d5'
storagedir = "./storage"
vector_store = FaissVectorStore.from_persist_dir(storagedir)
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir=storagedir
)

index = load_index_from_storage(storage_context=storage_context, index_id=indexid)

INFO:root:Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
INFO:llama_index.core.indices.loading:Loading indices with ids: ['3d3c99c5-aa1c-42d7-a9ce-c4bb12fbc6d5']
Loading indices with ids: ['3d3c99c5-aa1c-42d7-a9ce-c4bb12fbc6d5']


In [11]:
# set up query and chat engines
query_engine = index.as_query_engine(similarity_top_k=10)
chat_engine = index.as_chat_engine(similarity_top_k=10, chat_mode='context')

In [12]:
# DeepEval requires a json response. In practice, this has led to malformed json returned from the llm, even with as simple of a schema as this
class Response(BaseModel):
    response: str

In [13]:
# Non Open-AI requiere a custom LLM class for using DeepEval
class CustomGeminiFlash(DeepEvalBaseLLM):
    def __init__(self):
        self.model = genai.GenerativeModel(model_name="models/gemini-1.5-flash")
        model_config = ConfigDict(protected_namespaces=())

    def load_model(self):
        return self.model

    def generate(self, prompt: str, schema: BaseModel) -> BaseModel: 
        client = self.load_model()
        instructor_client = instructor.from_gemini(
            client=client,
            mode=instructor.Mode.GEMINI_JSON,
        )
        resp = instructor_client.messages.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
            response_model=schema
        )
        return resp

    async def a_generate(self, prompt: str, schema: BaseModel) -> BaseModel:
        return self.generate(prompt, schema)

    def get_model_name(self):
        return "Gemini 1.5 Flash"

In [14]:
# similarly, a custom embedding model class is required for non Open-AI embeddings
class CustomGeminiEmbeddingModel(DeepEvalBaseEmbeddingModel):
    def __init__(self):
        model_config  = ConfigDict(protected_namespaces=())

    def load_model(self):
        return GoogleGenerativeAIEmbeddings(
            model="models/text-embedding-004"
        )

    def embed_text(self, text: str) -> List[float]:
        embedding_model = self.load_model()
        return embedding_model.embed_query(text)

    def embed_texts(self, texts: List[str]) -> List[List[float]]:
        embedding_model = self.load_model()
        return embedding_model.embed_documents(texts)

    async def a_embed_text(self, text: str) -> List[float]:
        embedding_model = self.load_model()
        return await embedding_model.aembed_query(text)

    async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
        embedding_model = self.load_model()
        return await embedding_model.aembed_documents(texts)

    def get_model_name(self):
        "Custom Gemini Embeddings"

In [15]:
custom_geminiflash = CustomGeminiFlash()
custom_geminiembeddings = CustomGeminiEmbeddingModel()

In [None]:
# Generate a synthetic dataset of "Goldens" (aka a dataset with 'input', 'context', 'source_file' columns -- not 'Retrieval_Context') with DeepEval
dataset = EvaluationDataset()
synthesizer = Synthesizer(model=custom_geminiflash, embedder=custom_geminiembeddings)
dataset.generate_goldens_from_docs(
    synthesizer=synthesizer,
    document_paths=['Speeches/titleedits/state_of_the_union_042921.txt', 'Speeches/titleedits/state_of_the_union_030122.txt', 
                    'Speeches/titleedits/state_of_the_union_020723.txt', 'Speeches/titleedits/state_of_the_union_030724.txt'],
    max_goldens_per_document=2,
    include_expected_output=True
)

dataset.save_as(file_type="csv", directory=".")

In [22]:
# after dataset is generated, need to generate the answer column

In [16]:
# Below code uses the resulting testset without generated answers to generate new answers

testset_pd = pd.read_csv("datasets/labeled_dataset/synth_combined_24.csv", index_col = None)

In [17]:
testset_pd

Unnamed: 0,Query,Expected_Output,Contexts_QueryGen,Source_File
0,Compare the tax breaks for the wealthy and cor...,The text highlights two contrasting policies: ...,or raise the retirement age I will stop them!...,Speeches/titleedits/state_of_the_union_030724.txt
1,Advocate for closing corporate tax loopholes t...,I agree! It's not fair that big corporations m...,or raise the retirement age I will stop them!...,Speeches/titleedits/state_of_the_union_030724.txt
2,Identify and elaborate on the key policies ena...,The speaker has enacted or proposed policies i...,and biggest corporations no longer get all th...,Speeches/titleedits/state_of_the_union_030724.txt
3,Analyze the political motivations behind the o...,The President claims that his predecessor pres...,and biggest corporations no longer get all th...,Speeches/titleedits/state_of_the_union_030724.txt
4,Imagine a year where America's economy is thri...,The President's speech paints a picture of a s...,no place in America! \n\nHistory is watching....,Speeches/titleedits/state_of_the_union_030724.txt
5,How does the president's statement about overt...,The president states that his predecessor brag...,no place in America! \n\nHistory is watching....,Speeches/titleedits/state_of_the_union_030724.txt


In [39]:
testset_pd = testset_pd.rename(columns={"input": "Query", "actual_output": "Answer", "expected_output": "Expected_Output", "context": "Contexts", "source_file": "Source_File"})

In [40]:
testset_pd

Unnamed: 0,Query,Answer,Expected_Output,Contexts,Source_File
0,How has the American Rescue Plan impacted citi...,,The American Rescue Plan has delivered food an...,suffering from an autoimmune disease — wrote ...,Speeches/titleedits/state_of_the_union_042921.txt
1,If the American Rescue Plan had been enacted e...,,The speech does not directly address whether t...,suffering from an autoimmune disease — wrote ...,Speeches/titleedits/state_of_the_union_042921.txt
2,Compare the economic policies advocated by the...,,The president advocates for economic policies ...,keep the economy going strong by giving worke...,Speeches/titleedits/state_of_the_union_030122.txt
3,Imagine if increasing the minimum wage to $15 ...,,The impact of raising the minimum wage to $15 ...,keep the economy going strong by giving worke...,Speeches/titleedits/state_of_the_union_030122.txt
4,Compare President Biden's statements about wor...,,President Biden emphasizes his desire to work ...,State of the Union Address given by President ...,Speeches/titleedits/state_of_the_union_020723.txt
5,"Imagine the US facing economic instability, gl...",,President Biden's address emphasizes unity and...,State of the Union Address given by President ...,Speeches/titleedits/state_of_the_union_020723.txt
6,Imagine a year where America's economy is thri...,,The President's speech paints a picture of a s...,no place in America! \n\nHistory is watching....,Speeches/titleedits/state_of_the_union_030724.txt
7,How does the president's statement about overt...,,The president states that his predecessor brag...,no place in America! \n\nHistory is watching....,Speeches/titleedits/state_of_the_union_030724.txt


In [None]:
# generate answer column, per these two issues
# https://github.com/explodinggradients/ragas/issues/1145
# https://github.com/explodinggradients/ragas/issues/1084#issuecomment-2248219601

query_engine = index.as_query_engine(similarity_top_k=10)
answers = [query_engine.query(q) for q in testset_pd['Query']]

In [19]:
# parse out new 'answer' and 'contexts' columns
answers_r = []
context_n = []
for i in answers:
    answers_r.append(i.response)
    context_n.append([c.node.get_content() for c in i.source_nodes])
  
#testset_pd = testset_pd.rename(columns={"Contexts":"Contexts_QueryGen"})
testset_pd['Contexts_2024'] = context_n
testset_pd['Answer'] = answers_r

In [20]:
testset_pd

Unnamed: 0,Query,Expected_Output,Contexts_QueryGen,Source_File,Contexts_2024,Answer
0,Compare the tax breaks for the wealthy and cor...,The text highlights two contrasting policies: ...,or raise the retirement age I will stop them!...,Speeches/titleedits/state_of_the_union_030724.txt,[The last administration enacted a $2 Trillion...,The text argues that a previous administration...
1,Advocate for closing corporate tax loopholes t...,I agree! It's not fair that big corporations m...,or raise the retirement age I will stop them!...,Speeches/titleedits/state_of_the_union_030724.txt,[The way to make the tax code fair is to make ...,"The current tax code is unfair, favoring large..."
2,Identify and elaborate on the key policies ena...,The speaker has enacted or proposed policies i...,and biggest corporations no longer get all th...,Speeches/titleedits/state_of_the_union_030724.txt,[I enacted tax credits that save $800 per pers...,The speaker highlights several key policies en...
3,Analyze the political motivations behind the o...,The President claims that his predecessor pres...,and biggest corporations no longer get all th...,Speeches/titleedits/state_of_the_union_030724.txt,[The result was a bipartisan bill with the tou...,The opposition to the bipartisan border securi...
4,Imagine a year where America's economy is thri...,The President's speech paints a picture of a s...,no place in America! \n\nHistory is watching....,Speeches/titleedits/state_of_the_union_030724.txt,"[Unemployment at 50-year lows., 800,000 new ma...",Despite a robust economy with low unemployment...
5,How does the president's statement about overt...,The president states that his predecessor brag...,no place in America! \n\nHistory is watching....,Speeches/titleedits/state_of_the_union_030724.txt,"[Meanwhile, my predecessor told the NRA he’s p...",Empty Response


In [21]:
testset_pd.to_csv('datasets/labeled_dataset/synth_24_answers.csv', index=False)