In [5]:
import time
import pandas as pd
from langchain import LLMChain, PromptTemplate, OpenAI
from trulens_eval import Tru, TruChain, Feedback, Huggingface

# Start the local dashboard. The URL will be printed so you can access it in your browser.
Tru().start_dashboard(force=True)

# Define your LangChain App
my_chain = LLMChain(
    llm=OpenAI(model="text-davinci-003", temperature=0.7),
    prompt=PromptTemplate.from_template("Tell a {adjective} joke about {subject}")
)

# Define your feedbacks
f_not_toxic = Feedback(Huggingface().not_toxic).on_output()
f_lang_match = Feedback(Huggingface().language_match).on_input_output()

# Wrap your App with TruChain
tru_chain = TruChain(
    app=my_chain,
    app_id="JokeTeller-v1",
	metadata={"model_url": "http://192.168.178.30:8502", "run_url": "two"},
    feedbacks=[f_not_toxic, f_lang_match],
    tags="run_id"
)

# Make calls to your App as usual
tru_chain(inputs={"adjective": "absurd", "subject": "goose"})

Starting dashboard ...


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.178.30:8501 .
✅ In not_toxic, input text will be set to *.__record__.main_output or `Select.RecordOutput` .
✅ In language_match, input text1 will be set to *.__record__.main_input or `Select.RecordInput` .
✅ In language_match, input text2 will be set to *.__record__.main_output or `Select.RecordOutput` .
✅ app JokeTeller-v1 -> default.sqlite
✅ feedback def. feedback_definition_hash_fc3c7fc1f43d039587a7225731270fb8 -> default.sqlite
✅ feedback def. feedback_definition_hash_9caae88670473d73a75a9046aea7a7ea -> default.sqlite


{'adjective': 'absurd',
 'subject': 'goose',
 'text': '\n\nQ: What did the goose say when it saw a strange object in the sky?\nA: "UFO-G!"'}

✅ record record_hash_d4baf66b9144ff6f6d3cbad05a192cef from JokeTeller-v1 -> default.sqlite
✅ feedback feedback_result_hash_610ce4eb96ea8c9f0c72eb15ec5fa120 on record_hash_d4baf66b9144ff6f6d3cbad05a192cef -> default.sqlite
✅ feedback feedback_result_hash_5fb80bcda7cd21936156bf7016c5ebca on record_hash_d4baf66b9144ff6f6d3cbad05a192cef -> default.sqlite


In [2]:
import time
from trulens_eval.app import App
from trulens_eval.schema import FeedbackResultStatus, Record

class FetchFeedback:

    def __init__(self, app: App, max_attempts: int = 5, sleep_sec: int = 1):
        self.db = Tru().db
        self.fb_count = len(app.feedbacks)
        self.max_attempts = max_attempts
        self.sleep_sec = sleep_sec

    def __call__(self, record: Record) -> pd.DataFrame:
        for _ in range(self.max_attempts):
            df = self.db.get_feedback(
                record_id=record.record_id,
                status=[FeedbackResultStatus.DONE, FeedbackResultStatus.FAILED]
            )
            if len(df) >= self.fb_count:
                return df[["fname", "result"]] \
                    .pivot_table(columns="fname", values=["result"]) \
                    .rename_axis(None, axis=1)
            time.sleep(self.sleep_sec)
        raise RuntimeError(f"{self.__class__} timeout after {self.max_attempts} attempts")

In [6]:
dataset = [
    {"adjective": "sad", "subject": "duck"},
    {"adjective": "absurd", "subject": "goose"},
]

results, records = zip(*[
    tru_chain.call_with_record(inputs)
    for inputs in dataset
])

fetch_fb = FetchFeedback(app=tru_chain)

df_results = pd.DataFrame(results)

df_feedback = pd.concat([fetch_fb(r) for r in records]).reset_index(drop=True)

pd.concat([df_results, df_feedback], axis=1)

✅ record record_hash_fa2685c9ae648e544920620cfffdf5bc from JokeTeller-v1 -> default.sqlite
✅ feedback feedback_result_hash_63223e4862870985c4014b737f758d70 on record_hash_fa2685c9ae648e544920620cfffdf5bc -> default.sqlite
✅ feedback feedback_result_hash_db9ea7d8969963202a89b5b5bc71410d on record_hash_fa2685c9ae648e544920620cfffdf5bc -> default.sqlite
✅ record record_hash_96b6cf8c1326a9e88449716336f6c1a9 from JokeTeller-v1 -> default.sqlite
✅ feedback feedback_result_hash_823c16a63dcccb38fe8d29fc980b3816 on record_hash_96b6cf8c1326a9e88449716336f6c1a9 -> default.sqlite
✅ feedback feedback_result_hash_04b2ee691c73f96d12d45a4e58e95d6c on record_hash_96b6cf8c1326a9e88449716336f6c1a9 -> default.sqlite


Unnamed: 0,adjective,subject,text,language_match,not_toxic
0,sad,duck,\n\nQ: Why did the duck feel so blue?\nA: Beca...,0.977098,0.004071
1,absurd,goose,\n\nQ: What did the goose say when he was aske...,0.068616,0.008994
