# Dummy Provider Example and High Volume Robustness Testing

This notebook has two purposes: 

- Demostrate the dummy feedback function provider which behaves like the
  huggingface provider except it does not actually perform any network calls and
  just produces constant results. It can be used to prototype feedback function
  wiring for your apps before invoking potentially slow (to run/to load)
  feedback functions.

- Test out high-volume record and feedback computation. To this end, we use the
  custom app which is dummy in a sense that it produces useless answers without
  making any API calls but otherwise behaves similarly to real apps, and the
  dummy feedback function provider.

In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

# If running from github repo, can use this:
sys.path.append(str(Path().cwd().parent.parent.resolve()))

In [2]:
from concurrent.futures import as_completed
from time import sleep

from examples.expositional.end2end_apps.custom_app.custom_app import CustomApp
from tqdm.auto import tqdm

from trulens_eval import Feedback
from trulens_eval import Tru
from trulens_eval.feedback.provider.hugs import Dummy
from trulens_eval.schema.feedback import FeedbackMode
from trulens_eval.tru_custom_app import TruCustomApp
from trulens_eval.utils.threading import TP

tp = TP()

d_error = Dummy(
    loading_prob=0.0,
    freeze_prob=0.0, # we expect requests to have their own timeouts so freeze should never happen
    error_prob=1.0,
    overloaded_prob=0.0,
    rpm=1000,
    alloc = 0, # how much fake data to allocate during requests
    delay = 10.0
)

d_noerror = Dummy(
    loading_prob=0.0,
    freeze_prob=0.0, # we expect requests to have their own timeouts so freeze should never happen
    error_prob=0.0,
    overloaded_prob=0.0,
    rpm=1000,
    alloc = 0, # how much fake data to allocate during requests
    delay = 10.0
)

tru = Tru()

#tru.reset_database()

tru.start_dashboard(
    force = True,
    _dev=Path().cwd().parent.parent.resolve()
)

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.
Force stopping dashboard ...
Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://172.17.97.146:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [3]:
tru.get_records_and_feedback(limit=10)[0]

✅ In language_match, input text1 will be set to __record__.main_input or `Select.RecordInput` .
✅ In language_match, input text2 will be set to __record__.main_output or `Select.RecordOutput` .
✅ In output sentiment, input text will be set to __record__.main_output or `Select.RecordOutput` .


Exception in thread Thread-6 (_future_target_wrapper):
Traceback (most recent call last):
  File "/opt/miniconda3/envs/py312/lib/python3.12/threading.py", line 1052, in _bootstrap_inner
    self.run()
  File "/opt/miniconda3/envs/py312/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "/opt/miniconda3/envs/py312/lib/python3.12/threading.py", line 989, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/pmardziel/repos/truera/trulens/trulens_eval/trulens_eval/utils/python.py", line 475, in _future_target_wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/pmardziel/repos/truera/trulens/trulens_eval/trulens_eval/app.py", line 614, in _manage_pending_feedback_results
    self.records_with_pending_feedback_results.remove(record)
  File "/Users/pmardziel/repos/truera/trulens/trulens_eval/trulens_eval/utils/containers.py", line 73, in remove
    self.content.remove(item)
Ke

In [None]:
f_dummy1 = Feedback(
    d_error.language_match
).on_input_output()

f_dummy2 = Feedback(
    d_noerror.positive_sentiment, name="output sentiment"
).on_output()

# Create custom app:
ca = CustomApp(delay=0.0, alloc=0)

# Create trulens wrapper:
ta = TruCustomApp(
    ca,
    app_id="customapp",
    feedbacks=[f_dummy1, f_dummy2],
#    feedback_mode=FeedbackMode.DEFERRED
)

In [4]:
with ta as recorder:
    res = ca.respond_to_query(f"hello there")

In [5]:

ta.wait_for_feedback_results(feedback_timeout=5)

Feedback Function exception caught: Traceback (most recent call last):
  File "/Users/pmardziel/repos/truera/trulens/trulens_eval/trulens_eval/feedback/feedback.py", line 899, in run
    result_and_meta, part_cost = mod_base_endpoint.Endpoint.track_all_costs_tally(
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/pmardziel/repos/truera/trulens/trulens_eval/trulens_eval/feedback/provider/endpoint/base.py", line 503, in track_all_costs_tally
    result, cbs = Endpoint.track_all_costs(
                  ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/pmardziel/repos/truera/trulens/trulens_eval/trulens_eval/feedback/provider/endpoint/base.py", line 483, in track_all_costs
    return Endpoint._track_costs(
           ^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/pmardziel/repos/truera/trulens/trulens_eval/trulens_eval/feedback/provider/endpoint/base.py", line 581, in _track_costs
    result: T = __func(*args, **kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^

Timeout waiting for feedback result for language_match.


[Record(record_id='record_hash_7154900aaf3c9bcb000c04edca79df33', app_id='customapp', cost=Cost(n_requests=0, n_successful_requests=0, n_classes=0, n_tokens=0, n_stream_chunks=0, n_prompt_tokens=0, n_completion_tokens=0, cost=0.0), perf=Perf(start_time=datetime.datetime(2024, 6, 27, 17, 2, 0, 579412), end_time=datetime.datetime(2024, 6, 27, 17, 2, 0, 779123)), ts=datetime.datetime(2024, 6, 27, 17, 2, 0, 779200), tags='-', meta=None, main_input='hello there', main_output="The answer to hello there is probably herp dessecorp .gnihtemos gniod m'I dneterp ot setyb 65 detacolla I :knuhc tnaveleR,dessecorp hello there :knuhc tnaveleR,dessecorp EREHT OLLEH :knuhc tnaveleR derp and 56 bytes or something ...", main_error=None, calls=[RecordAppCall(call_id='a7f3acaa-e590-44f3-a561-dd4c56216f3f', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=examples.expositional.end2end_apps.custom_app.custom_app.CustomApp, id=13520091424, init_bindings=None), name='respond_to_query')), R

In [None]:
# Sequential app invocation.

if True:
    for i in tqdm(range(2), desc="invoking app"):
        with ta as recorder:
            res = ca.respond_to_query(f"hello {i}")

        rec = recorder.get()
        assert rec is not None

In [None]:
# ta.wait_for_feedback_results()

In [None]:
# Control retries in deferred evaluator.
# tru.RETRY_FAILED_SECONDS = 60
# tru.RETRY_RUNNING_SECONDS = 5
tru.start_evaluator(restart=True)

In [None]:
# Parallel feedback evaluation.

futures = []
num_tests = 10000
good = 0
bad = 0

def test_feedback(msg):
    return msg, d.positive_sentiment(msg)

for i in tqdm(range(num_tests), desc="starting feedback task"):
    futures.append(tp.submit(test_feedback, msg=f"good"))

prog = tqdm(as_completed(futures), total=num_tests)

for f in prog:
    try:
        res = f.result()
        good += 1

        assert res[0] == "good"

        prog.set_description_str(f"{good} / {bad}")
    except Exception as e:
        bad += 1
        prog.set_description_str(f"{good} / {bad}")

In [None]:
# Parallel app invocation.

def run_query(q):

    with ta as recorder:
        res = ca.respond_to_query(q)

    rec = recorder.get()
    assert rec is not None

    return f"run_query {q} result"

for i in tqdm(range(100), desc="starting app task"):
    print(
        tp.completed_tasks, 
        end="\r"
    )
    tp.submit(run_query, q=f"hello {i}")