In [1]:

import os
import openai
from dotenv import load_dotenv

load_dotenv()  # Load from .env file
openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=[
        "./testdocument/introduction-to-natural-language-processing.pdf"
    ]
).load_data()


In [3]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

52 

<class 'llama_index.core.schema.Document'>
Doc ID: e40ec79c-be4e-4c47-8794-5fadfa5dbccb
Text: Introduction to natural language processing R. Kibble CO3354
2013 Undergraduate study in  Computing and related programmes This is
an extract from a subject guide for an undergraduate course offered as
part of the  University of London International Programmes in
Computing. Materials for these programmes  are developed by academics
at Goldsmiths...


# Starting basic RAG Pipelines

In [4]:
import re
from llama_index.core import SimpleDirectoryReader, Document

document = Document(text="\n\n".join([doc.text for doc in documents]))


In [5]:
from llama_index.core import VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Define models
llm = OpenAI(model="gpt-4o", temperature=0.4)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")  # Change model if needed

# Update global settings
Settings.llm = llm
Settings.embed_model = embed_model

# Create index
index = VectorStoreIndex.from_documents([document])


In [6]:
query_engine = index.as_query_engine()

In [7]:
response = query_engine.query(
    "What are the main aspects of atomic habits?"
)
print(str(response))

The main aspects of atomic habits focus on making small, incremental changes that compound over time to produce significant results. The concept emphasizes the importance of building habits through gradual improvements and consistency. It involves understanding the cue-routine-reward loop to effectively create and maintain positive habits while breaking negative ones. The approach also highlights the significance of identity change, suggesting that to change a habit, one must first change their self-perception and beliefs about themselves.


## Evaluation setup using TruLens

In [8]:
eval_questions = [
    "Who introduced the notions of finite-state machines and context-free grammar (CFG) to linguistics?",
    "How did linguists test formal rules of grammar according to Chomsky’s approach?",
    "What has contributed to making the vision of computers understanding ordinary language and holding conversations with humans more plausible in the 21st century?"]


In [9]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

  from trulens_eval import Tru


🦑 Initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


In [10]:
import numpy as np
from trulens_eval import (
    Feedback,
    TruLlama,
    Select,
    OpenAI
)

provider = OpenAI(model_engine="gpt-4o")
import nest_asyncio

nest_asyncio.apply()

# Define a groundedness feedback function
groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(TruLlama.select_source_nodes().node.text)
    .on_output()
)
# Question/answer relevance between overall question and answer.
qa_relevance = (
    Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input()
    .on_output()
)

# Context relevance between question and each context chunk.
qs_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(TruLlama.select_source_nodes().node.text)
    .aggregate(np.mean)  # choose a different aggregation method if you wish
)

feedbacks = [qa_relevance, qs_relevance, groundedness]

def get_trulens_recorder(query_engine, feedbacks, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
    )
    return tru_recorder

def get_prebuilt_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
        )
    return tru_recorder

✅ In Groundedness, input source will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input context will be set to __record__.calls[-1].rets.source_nodes[:].node.text .


In [11]:
tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="Direct Query Engine")

instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.embeddings.multi_modal_base.MultiModalEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.base.embeddings.base.BaseEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.TransformComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base

In [12]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)



In [13]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [14]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Answer Relevance_calls,Answer Relevance feedback cost in USD,app_name,app_version,latency,total_tokens,total_cost,cost_currency
0,app_hash_6e8221fde876d15698298cea8c0d1bd6,"{'tru_class_info': {'name': 'TruLlama', 'modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_cbe57e915a1483fbb72c3c4df5911335,What has contributed to making the vision of c...,The vision of computers understanding ordinary...,-,{'record_id': 'record_hash_cbe57e915a1483fbb72...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-03-17T21:07:10.327483"", ""...",2025-03-17T21:07:14.691596,,,,Direct Query Engine,base,4.361008,2195,0.006245,USD
1,app_hash_6e8221fde876d15698298cea8c0d1bd6,"{'tru_class_info': {'name': 'TruLlama', 'modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_ab929ad56982fed8dc960c0174d2c738,How did linguists test formal rules of grammar...,Linguists tested formal rules of grammar accor...,-,{'record_id': 'record_hash_ab929ad56982fed8dc9...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-03-17T21:07:07.295187"", ""...",2025-03-17T21:07:09.767801,1.0,[{'args': {'prompt': 'How did linguists test f...,0.001995,Direct Query Engine,base,2.470307,2126,0.005848,USD
2,app_hash_6e8221fde876d15698298cea8c0d1bd6,"{'tru_class_info': {'name': 'TruLlama', 'modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_06a56c9c995049629dcd33210481de45,Who introduced the notions of finite-state mac...,The notions of finite-state machines and conte...,-,{'record_id': 'record_hash_06a56c9c995049629dc...,"{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-03-17T21:07:04.694343"", ""...",2025-03-17T21:07:06.816321,1.0,[{'args': {'prompt': 'Who introduced the notio...,0.003183,Direct Query Engine,base,2.119847,2100,0.005573,USD


In [15]:
tru.run_dashboard()

Starting dashboard ...



  tru.run_dashboard()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://localhost:55219 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>