In [1]:
import os
import openai

In [2]:
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from trulens_eval import Tru

tru = Tru()
tru.reset_database() # reset logged tables and logs

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


# Data

In [6]:
from llama_index import SimpleDirectoryReader

docs = SimpleDirectoryReader(
    input_files=["./eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

from llama_index import Document

doc = Document(text="\n\n".\
                    join([doc.text for doc in docs]))

# Triad of Metrics

## Setup

In [7]:
from utils_triad_of_metrics import build_sentence_window_index

from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1) 

# Parser 
sentence_index = build_sentence_window_index(
    doc,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index"
)

In [8]:
from utils_triad_of_metrics import get_sentence_window_query_engine

sentence_window_engine = get_sentence_window_query_engine(sentence_index)

In [9]:
response = sentence_window_engine.query(
    "How do you create your AI portfolio?")
response.response

'To create an AI portfolio, you should showcase your expertise and experience in the field of artificial intelligence. This can include highlighting your knowledge of various AI technologies, programming languages, and algorithms. It is important to demonstrate your ability to develop AI models and solutions, as well as any relevant projects or research you have worked on. Additionally, including any certifications or courses you have completed in AI can help strengthen your portfolio.'

## Feedback Function

In [10]:
import nest_asyncio

nest_asyncio.apply() # for app

In [11]:
from trulens_eval import OpenAI as fOpenAI

provider = fOpenAI() # the model for the evaluator llm

### Answer Relevance

In [12]:
from trulens_eval import Feedback

f_qa_relevance = Feedback(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance"
).on_input().on_output()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .


### Context Relevance

In [13]:
from trulens_eval import TruLlama
context_selection = TruLlama.select_source_nodes().node.text

In [14]:
import numpy as np

In [15]:
f_qs_relevance = Feedback(
    provider.relevance_with_cot_reasons,
    name = "Context Relevance",
).on_input().on(context_selection).aggregate(np.mean)

✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .


### Groundedness

In [16]:
from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider = provider)

In [17]:
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons,
             name="Groundedness"
            )
    .on(context_selection)
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [18]:
from trulens_eval import TruLlama
from trulens_eval import FeedbackMode

tru_recorder = TruLlama(
    sentence_window_engine,
    app_id="App_1",
    feedbacks=[
        f_qa_relevance,
        f_qs_relevance,
        f_groundedness
    ]
)

In [19]:
eval_questions = []
with open('eval_questions_careerinAI.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        sentence_window_engine.query(question)

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
tru.run_dashboard()