In [1]:
# Import packages
import openai
import logging
from dotenv import load_dotenv
import os
import numpy as np

from llama_index.core import Document

from trulens.core import Feedback, Select
from trulens.apps.llamaindex import TruLlama
from trulens.providers.openai import OpenAI as TrulensOpenAI

In [2]:
# Load the .env file
load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
# Setting logging level for pypdf
logger = logging.getLogger("pypdf")
logger.setLevel(logging.ERROR)

In [4]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["./data/tax_appeal/a1compsales.pdf",
                 "./data/tax_appeal/A-Guide-to-Tax-Appeal-Hearings.pdf",
                 "./data/tax_appeal/FAQs.Summary-Hearing-update.pdf",
                 "./data/tax_appeal/Petition-of-Appeal.pdf",
                 "./data/tax_appeal/What-to-expect-at-Hearing.3.29.2022.pdf",
                "./data/tax_appeal/Instructions.pdf",
                 ]
).load_data()

In [5]:
print(f"Type of whole document: {type(documents)}, \n")
print(f"Length of document: {len(documents)} \n")
print(f"Type of first document: {type(documents[0])} \n")
print(f"Contents of last document: {documents[25]}")

Type of whole document: <class 'list'>, 

Length of document: 26 

Type of first document: <class 'llama_index.core.schema.Document'> 

Contents of last document: Doc ID: c154df07-914f-4b62-9d65-5ac760525411
Text: the date of the service of the judgment (date of mailing). If
the assessed value of the property subject to the appeal exceeds
$1,000,000, a taxpayer or taxing district may file a petition of
appeal with the county board of taxation or a complaint with the Tax
Court directly in accordance with amendatory legislation and Tax Court
rules. The Tax ...


## Basic RAG pipeline

#### Data Ingestion Phase

In [6]:
# Create single document
document = Document(text="\n\n".join([doc.text for doc in documents]))

In [7]:
# Create chucks, embeddings and index
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI

Settings.llm  = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_documents([document],
                                        llm=Settings.llm,
                                        embed_model=Settings.embed_model)

In [8]:
# what are the fields in the does the vector store
index.vector_store.model_fields

{'stores_text': FieldInfo(annotation=bool, required=False, default=False),
 'is_embedding_query': FieldInfo(annotation=bool, required=False, default=True),
 'data': FieldInfo(annotation=SimpleVectorStoreData, required=False, default_factory=SimpleVectorStoreData)}

#### Retrieval and Synthesis

In [9]:
# Create query engine
query_engine = index.as_query_engine()

In [10]:
response = query_engine.query(
    "What are steps to take to appeal property taxes?"
)
print(str(response))

Steps to take to appeal property taxes include preparing evidence and papers, providing comparable sales with explanations, submitting photographs of the property and comparable sales, attending the hearing or requesting a reschedule with valid reasons, and ensuring all evidence is submitted at least 7 days before the hearing date. It is important to be concise, focused, and prepared to discuss major points during the hearing. Additionally, following the guidelines for filing the appeal, including submitting required forms and fees, is crucial in the process.


#### For a simple example this result could suffice but to put this model in production we need to validate the response from the LLM based on the context provided.
#### There several ways to evaluation the accuracy of a RAG application. TruLens package is one way which offers a scalable automated way to assessing  accuracy using the RAG Triad: Context Relevance, Response Relevance and Groundedness

## Evaluation RAG Application using TruLens

In [11]:
# Load evaluation question
evaluation_questions = []
with open('./data/eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        evaluation_questions.append(item)

What are steps to take to appeal property taxes?
How long does the tax appeal process take?
Generally is the tax appeal process challenging?
Where can one find comparable sales to build a string appeals case?
Can a tax appeal be denied for no reason?
Can a tax appeal lead to an increase in property texes?
What is the likelihood that a tax appeal is accepted?


In [12]:
from trulens.core import TruSession

session = TruSession()
session.reset_database()

🦑 Initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


In [13]:
# Initialize provider class
provider = TrulensOpenAI()

# select context to be used in feedback. the location of context is app specific.

context = TruLlama.select_context(query_engine)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(context.collect())  # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(
    provider.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.calls[-1].rets.source_nodes[:].node.text.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input context will be set to __record__.calls[-1].rets.source_nodes[:].node.text .


In [14]:
tru_query_engine_recorder = TruLlama(
    query_engine,
    app_name = "TaxAppeal",
    app_version="base",
    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance],
)

instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.base.embeddings.base.BaseEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.TransformComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.instrumentation.DispatcherSpanMixin'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'abc

In [15]:
# Evaluation each question and record results
with tru_query_engine_recorder as recording:
    for question in evaluation_questions:
        response = query_engine.query(question)

calling <function BaseQueryEngine.query at 0x141a55c60> with (<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine object at 0x141a1e210>, 'What are steps to take to appeal property taxes?')
calling <function RetrieverQueryEngine.retrieve at 0x142c88540> with (<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine object at 0x141a1e210>, QueryBundle(query_str='What are steps to take to appeal property taxes?', image_path=None, custom_embedding_strs=None, embedding=None))
calling <function BaseRetriever.retrieve at 0x141cf5580> with (<llama_index.core.indices.vector_store.retrievers.retriever.VectorIndexRetriever object at 0x153566b10>, QueryBundle(query_str='What are steps to take to appeal property taxes?', image_path=None, custom_embedding_strs=None, embedding=None))
calling <function VectorIndexRetriever._retrieve at 0x142407e20> with (<llama_index.core.indices.vector_store.retrievers.retriever.VectorIndexRetriever object at 0x153566b10>, Qu



calling <function BaseQueryEngine.query at 0x141a55c60> with (<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine object at 0x141a1e210>, 'Where can one find comparable sales to build a string appeals case?')
calling <function RetrieverQueryEngine.retrieve at 0x142c88540> with (<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine object at 0x141a1e210>, QueryBundle(query_str='Where can one find comparable sales to build a string appeals case?', image_path=None, custom_embedding_strs=None, embedding=None))
calling <function BaseRetriever.retrieve at 0x141cf5580> with (<llama_index.core.indices.vector_store.retrievers.retriever.VectorIndexRetriever object at 0x153566b10>, QueryBundle(query_str='Where can one find comparable sales to build a string appeals case?', image_path=None, custom_embedding_strs=None, embedding=None))
calling <function VectorIndexRetriever._retrieve at 0x142407e20> with (<llama_index.core.indices.vector_store.retrievers.

In [16]:
from trulens.dashboard import run_dashboard

run_dashboard(session)

Starting dashboard ...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://10.201.232.41:55258 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

![Local Image](images/trulens_output.png)