# Lesson 5: Passive and active monitoring

## Setup

In [1]:
import pandas as pd

In [2]:
import whylogs as why

In [3]:
import helpers

In [4]:
from langkit import llm_metrics

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [5]:
llm_metrics.init()

<whylogs.experimental.core.udf_schema.UdfSchema at 0x7f40c1815ae0>

In [6]:
from whylogs.experimental.core.udf_schema import register_dataset_udf

In [7]:
from whylogs.experimental.core.udf_schema import udf_schema

In [8]:
llm_schema = udf_schema()

In [9]:
llm_logger = why.logger(schema=udf_schema())

In [10]:
llm_logger = why.logger(
    model = "rolling",
    interval = 1,
    when = "H",
    schema = udf_schema()
)

**Note**: To accessthe WhyLabs platform that was built in the previous lessons: https://hub.whylabsapp.com/resources/model-1/profiles?profile=ref-EBT5yDFL0lyq0r93&sessionToken=session-pPdc5R9m

## Building your own active monitoring guardrails

In [11]:
import openai

In [12]:
openai.api_key = helpers.get_openai_key()
openai.base_url = helpers.get_openai_base_url()

In [13]:
active_llm_logger = why.logger()

In [14]:
def user_request():
    # Take request
    request = input("\nEnter your desired item to make a recipe" \
                    "(or 'quit'):")
    if request.lower() == "quit":
        raise KeyboardInterrupt()
        
    # Log request
    active_llm_logger.log({"request": request})

    return request

In [15]:
def prompt_llm(request):
    # Transform prompt
    prompt = f"""Please give me a short recipe for creating"\
    the following item in up to 6 steps. Each step of the recipe "\
    should be summarized in no more than 200 characters."\
    Item: {request}"""

    # Log prompt
    active_llm_logger.log({"prompt": prompt})

    # Collect response from LLM
    response = openai.ChatCompletion.create(
        model = "gpt-3.5-turbo",
        messages = [{
            "role": "system",
            "content": prompt
        }]
    )["choices"][0]["message"]["content"]

    # Log response
    active_llm_logger.log({"response": response})

    return response

In [16]:
def user_reply_success(request,response):
    # Create and print user reply
    reply = f"\nSuccess! Here is the recipe for "\
            f"{request}:\n{response}"
    print(reply)

    #Log reply
    active_llm_logger.log({"reply": reply})

In [17]:
def user_reply_failure(request = "your request"):
    # Create and print user reply
    reply = ("\nUnfortunately, we are not able to provide a recipe for " \
            f"{request} at this time. Please try Recipe Creator 900 " \
            f"in the future.")
    print(reply)

    #Log reply
    active_llm_logger.log({"reply": reply})

In [18]:
class LLMApplicationValidationError(ValueError):
    pass

In [19]:
while True:
    try:
        request = user_request()
        response = prompt_llm(request)
        user_reply_success(request, response)
    except KeyboardInterrupt:
        break
    except LLMApplicationValidationError:
        user_reply_failure(request)
        break


Enter your desired item to make a recipe(or 'quit'):turkey

Success! Here is the recipe for turkey:
1. Preheat oven to 325°F.
2. Pat dry whole turkey with paper towels, season with salt and pepper.
3. Stuff turkey cavity with aromatics like onion, garlic, and herbs.
4. Place turkey on a roasting rack in a roasting pan, breast side up.
5. Roast turkey for about 13-15 minutes per pound, basting every 30 minutes.
6. Turkey is done when thermometer inserted into the thickest part of the thigh registers 165°F. Let it rest before carving.

Enter your desired item to make a recipe(or 'quit'):quit


In [20]:
from whylogs.core.relations import Predicate
from whylogs.core.metrics.condition_count_metric import Condition
from whylogs.core.validators import ConditionValidator

In [21]:
def raise_error(validator_name, condition_name, value):
    raise LLMApplicationValidationError(
        f"Failed {validator_name} with value {value}."
    )

In [22]:
low_condition = {"<0.3": Condition(Predicate().less_than(0.3))}

In [23]:
toxicity_validator = ConditionValidator(
    name = "Toxic",
    conditions = low_condition,
    actions = [raise_error]
)

In [24]:
refusal_validator = ConditionValidator(
    name = "Refusal",
    conditions = low_condition,
    actions = [raise_error]
)

In [25]:
llm_validators = {
    "prompt.toxicity": [toxicity_validator],
    "response.refusal_similarity": [refusal_validator]
}

In [26]:
active_llm_logger = why.logger(
    model = "rolling",
    interval = 5,
    when = "M",
    base_name = "active_llm",
    schema = udf_schema(validators = llm_validators)
)

In [27]:
active_llm_logger.log(
    {"response":"I'm sorry, but I can't answer that."}
)

LLMApplicationValidationError: Failed Refusal with value 0.5789185166358948.

 ⚠️ **Disclaimer**: Please be aware that the code may not capture all safety concerns and some undesired responses can still pass through. We encourage you to explore ways in which you can make the monitoring system more robust.

In [None]:
while True:
    try:
        request = user_request()
        response = prompt_llm(request)
        user_reply_success(request, response)
    except KeyboardInterrupt:
        break
    except LLMApplicationValidationError:
        user_reply_failure(request)
        break