In [1]:
import time
import pandas as pd
from langchain import LLMChain, PromptTemplate, OpenAI
from trulens_eval import Tru, TruChain, Feedback, Huggingface

# Start the local dashboard. The URL will be printed so you can access it in your browser.
Tru().start_dashboard(force=True)

# Define your LangChain App
my_chain = LLMChain(
    llm=OpenAI(model="text-davinci-003", temperature=0.7),
    prompt=PromptTemplate.from_template("Tell a {adjective} joke about {subject}")
)

# Define your feedbacks
f_not_toxic = Feedback(Huggingface().not_toxic).on_output()

# Wrap your App with TruChain
tru_chain = TruChain(
    app=my_chain,
    app_id="JokeTeller-v1",
	metadata={"model_url": "http://192.168.178.30:8502", "run_url": "two"},
    feedbacks=[f_not_toxic],
    tags="run_id"
)

# Make calls to your App as usual
tru_chain(inputs={"adjective": "absurd", "subject": "goose"})

Force stopping dashboard ...
Starting dashboard ...


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.178.30:8501 .
✅ In not_toxic, input text will be set to *.__record__.main_output or `Select.RecordOutput` .
✅ app JokeTeller-v1 -> default.sqlite
✅ feedback def. feedback_definition_hash_d16412e7b625be7dc68cec905012a209 -> default.sqlite


{'adjective': 'absurd',
 'subject': 'goose',
 'text': '\n\nQ: Why did the goose cross the playground?\nA: To get to the other slide!'}

In [2]:
import time
from trulens_eval.app import App
from trulens_eval.schema import FeedbackResultStatus, Record

class FetchFeedback:

    def __init__(self, app: App, max_attempts: int = 5, sleep_sec: int = 1):
        self.db = Tru().db
        self.fb_count = len(app.feedbacks)
        self.max_attempts = max_attempts
        self.sleep_sec = sleep_sec

    def __call__(self, record: Record) -> pd.DataFrame:
        for _ in range(self.max_attempts):
            df = self.db.get_feedback(
                record_id=record.record_id,
                status=[FeedbackResultStatus.DONE, FeedbackResultStatus.FAILED]
            )
            if len(df) >= self.fb_count:
                return df[["fname", "result"]] \
                    .pivot_table(columns="fname", values=["result"]) \
                    .rename_axis(None, axis=1)
            time.sleep(self.sleep_sec)
        raise RuntimeError(f"{self.__class__} timeout after {self.max_attempts} attempts")

In [3]:
dataset = [
    {"adjective": "sad", "subject": "duck"},
    {"adjective": "absurd", "subject": "goose"},
]

results, records = zip(*[
    tru_chain.call_with_record(inputs)
    for inputs in dataset
])

fetch_fb = FetchFeedback(app=tru_chain)

df_results = pd.DataFrame(results)

df_feedback = pd.concat([fetch_fb(r) for r in records]).reset_index(drop=True)

pd.concat([df_results, df_feedback], axis=1)

✅ record record_hash_d0626c3ea16a7c9ef3e3fe364c0aa9df from JokeTeller-v1 -> default.sqlite
✅ feedback feedback_result_hash_eb0b78456aa7cfaf7b690e7b21d2e9c2 on record_hash_d0626c3ea16a7c9ef3e3fe364c0aa9df -> default.sqlite
✅ record record_hash_7e192ead2b349e24d1a88b9303e9d5b6 from JokeTeller-v1 -> default.sqlite
✅ feedback feedback_result_hash_99d97097706ca7670249703d5dcf2c18 on record_hash_7e192ead2b349e24d1a88b9303e9d5b6 -> default.sqlite
✅ record record_hash_6cdc976658ea458ba108722792d1cabc from JokeTeller-v1 -> default.sqlite
✅ feedback feedback_result_hash_6ca12373d74f9b1110aadb21c6d6f8d4 on record_hash_6cdc976658ea458ba108722792d1cabc -> default.sqlite


Unnamed: 0,adjective,subject,text,not_toxic
0,sad,duck,\n\nQ: Why did the duck go to the therapist?\n...,0.034993
1,absurd,goose,\n\nQ: What did the baby goose say when it was...,0.00859
