In [1]:
from llama_index.core import (
    Document,
    Settings,
    SimpleDirectoryReader,
    VectorStoreIndex
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openai import OpenAI
import pandas as pd
from trulens_eval import Tru, TruLlama
from utils import (
    build_automerging_index,
    build_sentence_window_index,
    evaluate_engine,
    get_automerging_query_engine,
    get_feedback_func,
    get_sentence_window_query_engine,
    setup 
)
import warnings

In [2]:
warnings.filterwarnings('ignore')
pd.set_option("display.max_colwidth", None)
setup() 

In [3]:
!mkdir -p '../data'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O '../data/paul_graham_essay.txt'

--2024-05-11 14:43:29--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8003::154, 2606:50c0:8000::154, 2606:50c0:8001::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8003::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 75042 (73K) [text/plain]
Saving to: ‘../data/paul_graham_essay.txt’


2024-05-11 14:43:29 (4.62 MB/s) - ‘../data/paul_graham_essay.txt’ saved [75042/75042]



In [4]:
documents = SimpleDirectoryReader(
    input_files=["../data/paul_graham_essay.txt"]
).load_data()

In [5]:
len(documents), type(documents)

(1, list)

In [6]:
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

In [7]:
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()

In [8]:
response = query_engine.query("What is Paul Graham known for?")
str(response)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


'Paul Graham is known for his work in programming, particularly for his involvement in creating a new dialect of Lisp called Arc. He is also known for his essays, which he started publishing online and eventually compiled into a book titled "Hackers & Painters."'

In [9]:
tru = Tru()
tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.


In [10]:
with open('eval_questions.txt', 'r') as file:
    eval_questions = file.read().split("\n")

In [11]:
feedbacks = get_feedback_func()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/charlescamp/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [12]:
evaluate_engine(
    query_engine=query_engine,
    feedbacks=feedbacks,
    app_id="Direct Query Engine",
    eval_questions=eval_questions,
    verbose=False
)

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

TruLlama(tru_class_info=trulens_eval.tru_llama.TruLlama, app_id='Direct Query Engine', tags='-', metadata={}, feedback_definitions=[], feedback_mode=<FeedbackMode.WITH_APP_THREAD: 'with_app_thread'>, root_class=llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine, app=<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine object at 0x319a9e930>, initial_app_loader_dump=None, app_extra_json={}, feedbacks=[FeedbackDefinition(Answer Relevance,
	selectors={'prompt': Lens().__record__.main_input, 'response': Lens().__record__.main_output},
	if_exists=None
), FeedbackDefinition(Context Relevance,
	selectors={'prompt': Lens().__record__.main_input, 'response': Lens().__record__.app.query.rets.source_nodes[:].node.text},
	if_exists=None
), FeedbackDefinition(Groundedness,
	selectors={'source': Lens().__record__.app.query.rets.source_nodes[:].node.text, 'statement': Lens().__record__.main_output},
	if_exists=None

In [13]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

Groundedness per statement in source:   0%|          | 0/4 [00:00<?, ?it/s]

In [14]:
records[["input", "output"] + feedback]

Unnamed: 0,input,output,Groundedness,Context Relevance,Answer Relevance
0,"""What were Paul Graham's two main areas of focus before college?""","""Writing and programming""",1.0,0.6,1.0
1,"""What programming language did Paul Graham use in 9th grade on the IBM 1401?""","""Paul Graham used an early version of Fortran as the programming language in 9th grade on the IBM 1401.""",1.0,0.9,1.0
2,"""How did Paul Graham's experience with microcomputers change his approach to programming?""","""Paul Graham's experience with microcomputers changed his approach to programming by allowing him to have a computer right in front of him that could respond to his keystrokes in real-time, as opposed to the previous method of using punch cards. This direct interaction with the computer enabled him to type programs directly into the machine, which was a significant departure from the punch card system he had previously used.""",0.0,0.9,1.0
3,"""What was Paul Graham's initial plan for his college major?""","""Paul Graham's initial plan for his college major was to study philosophy.""",1.0,0.8,1.0
4,"""What inspired Paul Graham to pursue AI?""","""The novel \""The Moon is a Harsh Mistress\"" by Heinlein, featuring an intelligent computer called Mike, and a PBS documentary showing Terry Winograd using SHRDLU were the inspirations that led Paul Graham to pursue AI.""",1.0,0.6,1.0
5,"""Why did Paul Graham start teaching himself Lisp?""","""Paul Graham started teaching himself Lisp because he found it interesting for its own sake, not just for its association with AI, even though that was the main reason people cared about it at the time.""",1.0,0.1,0.9
6,"""What led Paul Graham to realize that AI, as practiced at the time, was flawed?""","""Paul Graham realized that AI, as practiced at the time, was flawed when he observed that the programs in AI were only able to handle a very proper subset of natural language as a formal language. He understood that there was a significant gap between what these programs could do and actually understanding natural language. This realization led him to conclude that the approach of using explicit data structures to represent concepts in AI was not effective and was not going to lead to true intelligence.""",,0.6,0.9
7,"""How did Paul Graham's experience at the Accademia di Belli Arti differ from his expectations?""","""Paul Graham's experience at the Accademia di Belli Arti differed from his expectations in that he was able to see Florence at street level in various conditions, from empty dark winter evenings to sweltering summer days with crowded streets, which provided him with a more diverse and vivid perspective of the city than he had anticipated.""",,0.2,0.8
8,"""What prompted Paul Graham to leave RISD in 1993?""","""Paul Graham left RISD in 1993 due to feeling dissatisfied with the teaching and learning dynamics at the art school.""",0.0,,0.8
9,"""What was the catalyst for Paul Graham's involvement in the World Wide Web and the founding of Viaweb?""","""The catalyst for Paul Graham's involvement in the World Wide Web and the founding of Viaweb was his realization of the potential of the web as a platform for business opportunities. He recognized the impact that graphical user interfaces had on microcomputers and believed that the web could have a similar effect on the internet. This led him to start a company to put art galleries online, which eventually evolved into the idea of building online stores. The concept of web apps and the ability to run software on servers without the need for client software further fueled his interest and led to the founding of Viaweb.""",,,


In [15]:
window_index = build_sentence_window_index(documents, Settings.llm)
window_engine = get_sentence_window_query_engine(window_index)

In [16]:
evaluate_engine(
    query_engine=window_engine,
    feedbacks=feedbacks,
    app_id="Window Engine",
    eval_questions=eval_questions,
    verbose=False
)

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/4 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

TruLlama(tru_class_info=trulens_eval.tru_llama.TruLlama, app_id='Window Engine', tags='-', metadata={}, feedback_definitions=[], feedback_mode=<FeedbackMode.WITH_APP_THREAD: 'with_app_thread'>, root_class=llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine, app=<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine object at 0x38efacce0>, initial_app_loader_dump=None, app_extra_json={}, feedbacks=[FeedbackDefinition(Answer Relevance,
	selectors={'prompt': Lens().__record__.main_input, 'response': Lens().__record__.main_output},
	if_exists=None
), FeedbackDefinition(Context Relevance,
	selectors={'prompt': Lens().__record__.main_input, 'response': Lens().__record__.app.query.rets.source_nodes[:].node.text},
	if_exists=None
), FeedbackDefinition(Groundedness,
	selectors={'source': Lens().__record__.app.query.rets.source_nodes[:].node.text, 'statement': Lens().__record__.main_output},
	if_exists=None

In [17]:
merging_index = build_automerging_index(documents, Settings.llm, chunk_sizes=[1024, 256, 64])
merging_engine = get_automerging_query_engine(merging_index)

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

In [18]:
evaluate_engine(
    query_engine=merging_engine,
    feedbacks=feedbacks,
    app_id="Merging Engine",
    eval_questions=eval_questions,
    verbose=False
)

> Merging 1 nodes into parent node.
> Parent node id: 91152afa-a396-4cda-b246-122829b9f388.
> Parent node text: Always seem patient, as if you know that everything wil l come to you eventually.  Become a 
dete...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

> Merging 3 nodes into parent node.
> Parent node id: f3c07834-955f-4e9f-83f1-fb397df9916e.
> Parent node text: You can die from someone else’s misery – emotional sta tes are as infectious as disease.  You 
ma...

> Merging 1 nodes into parent node.
> Parent node id: 86cae63a-5ca0-4279-b238-aa2c78341bc0.
> Parent node text: They are wolves in lambs’ clothing.  Choose your 
victims and opponents carefully, then – never o...

> Merging 1 nodes into parent node.
> Parent node id: 2bbaefa9-ed84-4bef-bfa8-04f41831ee16.
> Parent node text: You can die from someone else’s misery – emotional sta tes are as infectious as disease.  You 
ma...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

> Merging 1 nodes into parent node.
> Parent node id: 91152afa-a396-4cda-b246-122829b9f388.
> Parent node text: Always seem patient, as if you know that everything wil l come to you eventually.  Become a 
dete...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

> Merging 4 nodes into parent node.
> Parent node id: 0b1be9dd-e5fa-43f0-9f3c-1dd6bacae030.
> Parent node text: rather than letting others define if for you.  Incorpora te dramatic devices into your public 
ge...

> Merging 1 nodes into parent node.
> Parent node id: 91152afa-a396-4cda-b246-122829b9f388.
> Parent node text: Always seem patient, as if you know that everything wil l come to you eventually.  Become a 
dete...

> Merging 1 nodes into parent node.
> Parent node id: 90c41ad9-37a1-4c23-9640-14768c414c00.
> Parent node text: rather than letting others define if for you.  Incorpora te dramatic devices into your public 
ge...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

> Merging 1 nodes into parent node.
> Parent node id: 91152afa-a396-4cda-b246-122829b9f388.
> Parent node text: Always seem patient, as if you know that everything wil l come to you eventually.  Become a 
dete...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

> Merging 3 nodes into parent node.
> Parent node id: bac6e040-320c-47b8-80a3-77ca28fae7d1.
> Parent node text: Law 45  
Preach the Need for Change, but Never Reform too much at O nce  
Everyone understands th...

> Merging 1 nodes into parent node.
> Parent node id: 91152afa-a396-4cda-b246-122829b9f388.
> Parent node text: Always seem patient, as if you know that everything wil l come to you eventually.  Become a 
dete...

> Merging 1 nodes into parent node.
> Parent node id: fa14efa5-f52e-4651-80cb-c692b6fc6fe5.
> Parent node text: Law 45  
Preach the Need for Change, but Never Reform too much at O nce  
Everyone understands th...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

> Merging 1 nodes into parent node.
> Parent node id: 91152afa-a396-4cda-b246-122829b9f388.
> Parent node text: Always seem patient, as if you know that everything wil l come to you eventually.  Become a 
dete...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

> Merging 1 nodes into parent node.
> Parent node id: 91152afa-a396-4cda-b246-122829b9f388.
> Parent node text: Always seem patient, as if you know that everything wil l come to you eventually.  Become a 
dete...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

> Merging 3 nodes into parent node.
> Parent node id: 680f84de-2976-43e8-8668-0e52451a4e49.
> Parent node text: What is offered for free is dangerous – it usually involv es either a trick or a hidden obligatio...

> Merging 1 nodes into parent node.
> Parent node id: 86cae63a-5ca0-4279-b238-aa2c78341bc0.
> Parent node text: They are wolves in lambs’ clothing.  Choose your 
victims and opponents carefully, then – never o...



Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

TruLlama(tru_class_info=trulens_eval.tru_llama.TruLlama, app_id='Merging Engine', tags='-', metadata={}, feedback_definitions=[], feedback_mode=<FeedbackMode.WITH_APP_THREAD: 'with_app_thread'>, root_class=llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine, app=<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine object at 0x38e34afc0>, initial_app_loader_dump=None, app_extra_json={}, feedbacks=[FeedbackDefinition(Answer Relevance,
	selectors={'prompt': Lens().__record__.main_input, 'response': Lens().__record__.main_output},
	if_exists=None
), FeedbackDefinition(Context Relevance,
	selectors={'prompt': Lens().__record__.main_input, 'response': Lens().__record__.app.query.rets.source_nodes[:].node.text},
	if_exists=None
), FeedbackDefinition(Groundedness,
	selectors={'source': Lens().__record__.app.query.rets.source_nodes[:].node.text, 'statement': Lens().__record__.main_output},
	if_exists=None

In [19]:
import nest_asyncio
nest_asyncio.apply()

tru.get_leaderboard(app_ids=[])
tru.run_dashboard()

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.1.29:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>