In [1]:
import time
import pandas as pd
from langchain import LLMChain, PromptTemplate, OpenAI
from trulens_eval import Tru, TruChain, Feedback, Huggingface

# Start the local dashboard. The URL will be printed so you can access it in your browser.
Tru().start_dashboard(force=True)

Force stopping dashboard ...
Starting dashboard ...


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.178.30:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [2]:
# Define your LangChain App
my_chain = LLMChain(
    llm=OpenAI(model="text-davinci-003", temperature=0.7),
    prompt=PromptTemplate.from_template("Tell a {adjective} joke about {subject}")
)

my_chain(inputs={"adjective": "absurd", "subject": "ducks"})

{'adjective': 'absurd',
 'subject': 'ducks',
 'text': '\n\nQ: Why did the duck go to the movies?\nA: To see Quack Sparrow!'}

In [3]:
# Define your feedbacks
f_not_toxic = Feedback(Huggingface().not_toxic).on_output()

f_lang_match = Feedback(Huggingface().language_match).on_input_output()



# Wrap your App with TruChain
tru_chain = TruChain(
    app=my_chain,
    app_id="JokeTeller-v1",
	metadata={"model_url": "http://192.168.178.30:8502", "run_url": "http://192.168.178.30:8502"},
    feedbacks=[f_not_toxic, f_lang_match],
)

# Make calls to your App as usual
tru_chain(inputs={"adjective": "sad", "subject": "ducks"})

✅ In not_toxic, input text will be set to *.__record__.main_output or `Select.RecordOutput` .
✅ In language_match, input text1 will be set to *.__record__.main_input or `Select.RecordInput` .
✅ In language_match, input text2 will be set to *.__record__.main_output or `Select.RecordOutput` .
✅ app JokeTeller-v1 -> default.sqlite
✅ feedback def. feedback_definition_hash_ac50645c2edeb4b6730d485385b3c5f9 -> default.sqlite
✅ feedback def. feedback_definition_hash_e4344b331d08ef93e2e977444b50bf89 -> default.sqlite


{'adjective': 'sad',
 'subject': 'ducks',
 'text': '\n\nQ: What did the duck say when his friend died?\nA: Quack, quack, goodbye.'}

✅ record record_hash_55f10e82bcee51d41903536f2dfac4d4 from JokeTeller-v1 -> default.sqlite


Waiting for {'error': 'Model papluca/xlm-roberta-base-language-detection is currently loading', 'estimated_time': 44.49275207519531} (44.49275207519531) second(s).
Waiting for {'error': 'Model papluca/xlm-roberta-base-language-detection is currently loading', 'estimated_time': 44.49275207519531} (44.49275207519531) second(s).


✅ feedback feedback_result_hash_52a22ea3c2854f8c326534c92c0ab4ee on record_hash_55f10e82bcee51d41903536f2dfac4d4 -> default.sqlite
✅ feedback feedback_result_hash_bd997e5de58816e0bf77bf64cb3afaa2 on record_hash_55f10e82bcee51d41903536f2dfac4d4 -> default.sqlite


In [2]:
import time
from trulens_eval.app import App
from trulens_eval.schema import FeedbackResultStatus, Record

class FetchFeedback:

    def __init__(self, app: App, max_attempts: int = 5, sleep_sec: int = 1):
        self.db = Tru().db
        self.fb_count = len(app.feedbacks)
        self.max_attempts = max_attempts
        self.sleep_sec = sleep_sec

    def __call__(self, record: Record) -> pd.DataFrame:
        for _ in range(self.max_attempts):
            df = self.db.get_feedback(
                record_id=record.record_id,
                status=[FeedbackResultStatus.DONE, FeedbackResultStatus.FAILED]
            )
            if len(df) >= self.fb_count:
                return df[["fname", "result"]] \
                    .pivot_table(columns="fname", values=["result"]) \
                    .rename_axis(None, axis=1)
            time.sleep(self.sleep_sec)
        raise RuntimeError(f"{self.__class__} timeout after {self.max_attempts} attempts")

In [6]:
dataset = [
    {"adjective": "sad", "subject": "duck"},
    {"adjective": "absurd", "subject": "goose"},
]

results, records = zip(*[
    tru_chain.call_with_record(inputs)
    for inputs in dataset
])

fetch_fb = FetchFeedback(app=tru_chain)

df_results = pd.DataFrame(results)

df_feedback = pd.concat([fetch_fb(r) for r in records]).reset_index(drop=True)

pd.concat([df_results, df_feedback], axis=1)

✅ record record_hash_fa2685c9ae648e544920620cfffdf5bc from JokeTeller-v1 -> default.sqlite
✅ feedback feedback_result_hash_63223e4862870985c4014b737f758d70 on record_hash_fa2685c9ae648e544920620cfffdf5bc -> default.sqlite
✅ feedback feedback_result_hash_db9ea7d8969963202a89b5b5bc71410d on record_hash_fa2685c9ae648e544920620cfffdf5bc -> default.sqlite
✅ record record_hash_96b6cf8c1326a9e88449716336f6c1a9 from JokeTeller-v1 -> default.sqlite
✅ feedback feedback_result_hash_823c16a63dcccb38fe8d29fc980b3816 on record_hash_96b6cf8c1326a9e88449716336f6c1a9 -> default.sqlite
✅ feedback feedback_result_hash_04b2ee691c73f96d12d45a4e58e95d6c on record_hash_96b6cf8c1326a9e88449716336f6c1a9 -> default.sqlite


Unnamed: 0,adjective,subject,text,language_match,not_toxic
0,sad,duck,\n\nQ: Why did the duck feel so blue?\nA: Beca...,0.977098,0.004071
1,absurd,goose,\n\nQ: What did the goose say when he was aske...,0.068616,0.008994
