In [1]:

import os
import openai
from dotenv import load_dotenv

load_dotenv()  # Load from .env file
openai.api_key = os.getenv("OPENAI_API_KEY")

In [2]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=[
        "./testdocument/introduction-to-natural-language-processing.pdf"
    ]
).load_data()


In [3]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

52 

<class 'llama_index.core.schema.Document'>
Doc ID: 61f78f2b-3fdb-4e85-9fd4-cc6fcd7a6cfd
Text: Introduction to natural language processing R. Kibble CO3354
2013 Undergraduate study in  Computing and related programmes This is
an extract from a subject guide for an undergraduate course offered as
part of the  University of London International Programmes in
Computing. Materials for these programmes  are developed by academics
at Goldsmiths...


# Starting basic RAG Pipelines

In [4]:
import re
from llama_index.core import SimpleDirectoryReader, Document

document = Document(text="\n\n".join([doc.text for doc in documents]))


In [5]:
from llama_index.core import VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Define models
llm = OpenAI(model="gpt-4o", temperature=0.4)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")  # Change model if needed

# Update global settings
Settings.llm = llm
Settings.embed_model = embed_model

# Create index
index = VectorStoreIndex.from_documents([document])


In [6]:
query_engine = index.as_query_engine()

In [7]:
response = query_engine.query(
    "What are the main aspects of atomic habits?"
)
print(str(response))

The context does not provide information about atomic habits. Therefore, I cannot provide an answer based on the given context.


## Evaluation setup using TruLens

In [8]:
eval_questions = [
    "Who introduced the notions of finite-state machines and context-free grammar (CFG) to linguistics?",
    "How did linguists test formal rules of grammar according to Chomsky’s approach?",
    "What has contributed to making the vision of computers understanding ordinary language and holding conversations with humans more plausible in the 21st century?",
    "Why is center-embedding significant in grammars, and what does it allow?",
    "What is a corpus in the context of natural language processing, and what are the three broad categories of corpora?",
    "How are corpora used in modern lexicography, and how do they influence dictionary entries?",
    "How are corpora utilized in grammatical research, and what advancements have been made with the use of computational tools?"
]

In [9]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

  from trulens_eval import Tru


🦑 Initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


In [10]:
import numpy as np
from trulens_eval import (
    Feedback,
    TruLlama,
    Select,
    OpenAI
)

provider = OpenAI(model_engine="gpt-4o")
import nest_asyncio

nest_asyncio.apply()

# Define a groundedness feedback function
groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(TruLlama.select_source_nodes().node.text)
    .on_output()
)
# Question/answer relevance between overall question and answer.
qa_relevance = (
    Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input()
    .on_output()
)

# Context relevance between question and each context chunk.
qs_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(TruLlama.select_source_nodes().node.text)
    .aggregate(np.mean)  # choose a different aggregation method if you wish
)

feedbacks = [qa_relevance, qs_relevance, groundedness]

def get_trulens_recorder(query_engine, feedbacks, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
    )
    return tru_recorder

def get_prebuilt_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
        )
    return tru_recorder

✅ In Groundedness, input source will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input context will be set to __record__.calls[-1].rets.source_nodes[:].node.text .


In [11]:
tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="Direct Query Engine")

instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.embeddings.multi_modal_base.MultiModalEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.base.embeddings.base.BaseEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.TransformComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base

In [None]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)





In [13]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [14]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,...,Context Relevance_calls,Groundedness_calls,Context Relevance feedback cost in USD,Groundedness feedback cost in USD,app_name,app_version,latency,total_tokens,total_cost,cost_currency
0,app_hash_6e8221fde876d15698298cea8c0d1bd6,"{'tru_class_info': {'name': 'TruLlama', 'modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_a0b70912cc75b7a4eb3c1be00517ff6d,How are corpora utilized in grammatical resear...,Corpora are utilized in grammatical research a...,-,{'record_id': 'record_hash_a0b70912cc75b7a4eb3...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-03-18T18:26:47.413616"", ""...",...,,,,,Direct Query Engine,base,4.97491,2261,0.00719,USD
1,app_hash_6e8221fde876d15698298cea8c0d1bd6,"{'tru_class_info': {'name': 'TruLlama', 'modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_b94d8431cc0bedace80a9699e495b2ef,"How are corpora used in modern lexicography, a...",Corpora are used in modern lexicography to ana...,-,{'record_id': 'record_hash_b94d8431cc0bedace80...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-03-18T18:26:26.986804"", ""...",...,,,,,Direct Query Engine,base,20.258526,2204,0.006432,USD
2,app_hash_6e8221fde876d15698298cea8c0d1bd6,"{'tru_class_info': {'name': 'TruLlama', 'modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_74c32e23dc30c6f4850f578fb33c8fdc,What is a corpus in the context of natural lan...,"In the context of natural language processing,...",-,{'record_id': 'record_hash_74c32e23dc30c6f4850...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-03-18T18:26:25.213527"", ""...",...,,,,,Direct Query Engine,base,1.547485,1763,0.00485,USD
3,app_hash_6e8221fde876d15698298cea8c0d1bd6,"{'tru_class_info': {'name': 'TruLlama', 'modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_196a9faf76879afb4fc64798f9dbd196,Why is center-embedding significant in grammar...,Center-embedding is significant in grammars be...,-,{'record_id': 'record_hash_196a9faf76879afb4fc...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-03-18T18:26:23.213436"", ""...",...,,,,,Direct Query Engine,base,1.737089,1789,0.005043,USD
4,app_hash_6e8221fde876d15698298cea8c0d1bd6,"{'tru_class_info': {'name': 'TruLlama', 'modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_810ea530457516d8656d72554dd38809,What has contributed to making the vision of c...,The progress in computer processing speed and ...,-,{'record_id': 'record_hash_810ea530457516d8656...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-03-18T18:26:21.283726"", ""...",...,,,,,Direct Query Engine,base,1.680696,2182,0.006115,USD


In [15]:
tru.run_dashboard()

Starting dashboard ...



  tru.run_dashboard()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://localhost:57180 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>