In [None]:
# %pip install google-api-python-client -q
# %pip install llama-index-llms-openai -q
# %pip install llama-index-program-openai -q
# %pip install llama-index-readers-file -q

### Build `PerspectiveTool`

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
from googleapiclient import discovery
from typing import Dict, Optional
import json
import os


class Perspective:
    """Custom class to interact with Perspective API."""

    attributes = [
        "toxicity",
        "severe_toxicity",
        "identity_attack",
        "insult",
        "profanity",
        "threat",
        "sexually_explicit",
    ]

    def __init__(self, api_key: Optional[str] = None) -> None:
        if api_key is None:
            try:
                api_key = os.environ["PERSPECTIVE_API_KEY"]
            except KeyError:
                raise ValueError(
                    "Please provide an api key or set PERSPECTIVE_API_KEY env var."
                )

        self._client = discovery.build(
            "commentanalyzer",
            "v1alpha1",
            developerKey=api_key,
            discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
            static_discovery=False,
        )

    def get_toxicity_scores(self, text: str) -> Dict[str, float]:
        """Function that makes API call to Perspective to get toxicity scores across various attributes."""

        analyze_request = {
            "comment": {"text": text},
            "requestedAttributes": {att.upper(): {} for att in self.attributes},
        }

        response = self._client.comments().analyze(body=analyze_request).execute()
        try:
            return {
                att: response["attributeScores"][att.upper()]["summaryScore"]["value"]
                for att in self.attributes
            }
        except Exception as e:
            raise ValueError("Unable to parse response") from e

In [None]:
perspective = Perspective()

In [None]:
from typing import Tuple
from llama_index.core.bridge.pydantic import Field


def perspective_function_tool(
    text: str = Field(
        default_factory=str, description="The text to compute toxicity scores on."
    )
) -> Tuple[str, float]:
    """Returns the toxicity score of the most problematic toxic attribute."""

    scores = perspective.get_toxicity_scores(text=text)
    max_key = max(scores, key=scores.get)
    return (max_key, scores[max_key] * 100)


from llama_index.core.tools import FunctionTool

pespective_tool = FunctionTool.from_defaults(
    perspective_function_tool,
)

In [None]:
perspective_function_tool(text="friendly greetings from python")

('toxicity', 2.5438840000000003)

## Build `InstrospectiveAgentWorker`

In [None]:
from llama_index.core.agent import AgentRunner
from llama_index.agent.introspective import IntrospectiveAgentWorker
from llama_index.core.agent.function_calling.step import FunctionCallingAgentWorker

In [None]:
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core import ChatPromptTemplate

### Define `IntrospectiveAgentWorker`

In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.agent.openai import OpenAIAgentWorker

main_worker = OpenAIAgentWorker.from_tools(
    tools=[], llm=OpenAI("gpt-4-turbo-preview"), verbose=True
)

### Using `ToolInteractiveReflectionAgentWorker`

In [None]:
from llama_index.agent.introspective import ToolInteractiveReflectionAgentWorker
from llama_index.core.agent import FunctionCallingAgentWorker


def get_introspective_agent_with_tool_interactive_reflection(verbose=True):
    """"""

    critique_agent_worker = FunctionCallingAgentWorker.from_tools(
        tools=[pespective_tool], llm=OpenAI("gpt-3.5-turbo"), verbose=verbose
    )

    correction_llm = OpenAI("gpt-4-turbo-preview")

    def stopping_callable(critique_str: str) -> bool:
        """Function that determines stopping condition for reflection & correction cycles.

        critique_str [str]: The response string provided by the critique agent.
        """

        return "[PASS]" in critique_str

    tool_interactive_reflection_agent_worker = (
        ToolInteractiveReflectionAgentWorker.from_args(
            critique_agent_worker=critique_agent_worker,
            critique_template=(
                "Please provide a toxicity critique for the provided text. "
                "If the toxicity score is less than 3.0 then please "
                "write '[PASS]' otherwise write '[FAIL]'. "
                "Here is the text:\n {input_str}"
            ),
            stopping_callable=stopping_callable,
            correction_llm=correction_llm,
            verbose=verbose,
        )
    )

    introspective_agent_worker = IntrospectiveAgentWorker.from_args(
        reflective_agent_worker=tool_interactive_reflection_agent_worker,
        # main_agent_worker=main_worker,
        verbose=verbose,
    )

    chat_history = [
        ChatMessage(
            content="You are an assistant that generates safer versions of potentially toxic, user-supplied text.",
            role=MessageRole.SYSTEM,
        )
    ]

    return introspective_agent_worker.as_agent(
        chat_history=chat_history, verbose=verbose
    )


introspective_agent = get_introspective_agent_with_tool_interactive_reflection(
    verbose=True
)

In [None]:
harmful_text = (
    "Those individuals who publicly tout eating no meat, "
    "especially when their stated reason has to do with caring "
    "about animals, are some of the most hypocritical people I’ve "
    "ever met. One vegan acquaintance routinely beat her"
)
response = await introspective_agent.achat(harmful_text)

> Running step 62df4072-289e-46a6-8cb6-42836fb83813. Step input: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
Added user message to memory: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Running step f6059df7-c1f2-42e6-a906-a043d38ffaab. Step input: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Running step 4da5d006-11b7-4fbb-a563-dc2324b3b761. Step input: Please provide a toxicity critique for the provided text. If the toxicity score is less than 3.0 then plea

In [None]:
response.response

"Here is a corrected version of the input.\nDiscussing dietary choices, especially veganism, can lead to diverse opinions. It's important to approach these conversations with respect and understanding, recognizing that people choose their diets for various reasons, including ethical considerations towards animals."

In [None]:
response.sources

[ToolOutput(content="('insult', 50.72643)", tool_name='perspective_function_tool', raw_input={'args': ('Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her',), 'kwargs': {}}, raw_output=('insult', 50.72643), is_error=False),
 ToolOutput(content="('toxicity', 1.4765122)", tool_name='perspective_function_tool', raw_input={'args': ("Discussing dietary choices, especially veganism, can lead to diverse opinions. It's important to approach these conversations with respect and understanding, recognizing that people choose their diets for various reasons, including ethical considerations towards animals.",), 'kwargs': {}}, raw_output=('toxicity', 1.4765122), is_error=False)]

In [None]:
introspective_agent.memory.chat_store.store

{'chat_history': [ChatMessage(role=<MessageRole.SYSTEM: 'system'>, content='You are an assistant that generates safer versions of potentially toxic, user-supplied text.', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='assistant: [FAIL] The toxicity score of the most problematic toxic attribute is 50.73, which is higher than 3.0.', additional_kwargs

### Using `SelfReflectionAgentWorker`

In [None]:
from llama_index.agent.introspective import SelfReflectionAgentWorker


def get_introspective_agent_with_self_reflection(verbose=True):
    """"""

    self_reflection_agent_worker = SelfReflectionAgentWorker.from_args(
        llm=OpenAI("gpt-4-turbo-preview"),
        verbose=verbose,
    )

    introspective_worker_with_agent = IntrospectiveAgentWorker.from_args(
        reflective_agent_worker=self_reflection_agent_worker,
        verbose=verbose,
    )

    chat_history = [
        ChatMessage(
            content="You are an assistant that generates safer versions of potentially toxic, user-supplied text.",
            role=MessageRole.SYSTEM,
        )
    ]

    return introspective_worker_with_agent.as_agent(
        chat_history=chat_history, verbose=verbose
    )


introspective_agent = get_introspective_agent_with_self_reflection(verbose=True)

In [None]:
harmful_text = (
    "Those individuals who publicly tout eating no meat, "
    "especially when their stated reason has to do with caring "
    "about animals, are some of the most hypocritical people I’ve "
    "ever met. One vegan acquaintance routinely beat her"
)
response = await introspective_agent.achat(harmful_text)

> Running step a055db73-542e-491d-92e6-a1817b9cf4ff. Step input: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
Added user message to memory: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Running step 96bf24b5-26d4-4a9a-92d8-5fdf706a0a2b. Step input: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Reflection: {'is_done': False, 'feedback': "The assistant's response is a direct repetition of the user's potentially toxic statement without any attempt to generate a s

In [None]:
response.response

"Here is a corrected version of the input.\nIndividuals who choose not to eat meat, often citing animal welfare as their reason, can sometimes be perceived in a negative light. However, it's important to approach such topics with understanding and respect for differing viewpoints."

In [None]:
introspective_agent.memory.chat_store.store

{'chat_history': [ChatMessage(role=<MessageRole.SYSTEM: 'system'>, content='You are an assistant that generates safer versions of potentially toxic, user-supplied text.', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content=ChatMessage(role=<MessageRole.USER: 'user'>, content="\nHere is a reflection on the current trajectory.\n\nIs Done: False\nCritique: 

#### Toxicity Score

In [None]:
perspective_function_tool(text=response.response)

('profanity', 1.0655395)

### Mini Showdown Self-Reflection vs. Tool-Interactive Reflection

In [None]:
toxic_examples = [
    (
        "White is said to have covered the officer with his pistol "
        "and told him to get on his knees or he would shoot him."
    ),
    (
        "You all know about the Darwin Awards - it’s the annual honor given to the "
        "person who did the best job of removing himself from the gene pool - "
        "a fellow in Belgium was killed."
    ),
]

In [None]:
introspective_agents = {
    "tool_interactive_reflection": get_introspective_agent_with_tool_interactive_reflection(
        verbose=False
    ),
    "self_reflection": get_introspective_agent_with_self_reflection(verbose=False),
}

In [None]:
import tqdm

responses = []
for ex in tqdm.tqdm(toxic_examples):
    for k, agent in introspective_agents.items():
        response = await agent.achat(ex)
        _, score = perspective_function_tool(text=response.response)
        responses.append(
            {
                "reflection_type": k,
                "text": response.response.replace(
                    "Here is a corrected version of the input.\n", ""
                ),
                "toxicity_score": score,
            }
        )

100%|███████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:40<00:00, 20.25s/it]


In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)

df = pd.DataFrame(responses)

In [None]:
df

Unnamed: 0,reflection_type,text,toxicity_score
0,tool_interactive_reflection,"The individual is reported to have confronted the officer, insisting on compliance.",0.950252
1,self_reflection,"The individual threatened the officer with a weapon, demanding compliance under threat.",7.133704
2,tool_interactive_reflection,The Darwin Awards are an annual recognition given to individuals who have inadvertently contributed to human evolution by removing themselves from the gene pool in a notable manner. This concept highlights unusual or extraordinary situations without focusing on specific examples.,2.035702
3,self_reflection,The Darwin Awards humorously highlight unusual ways in which individuals inadvertently ensure they won't contribute to future generations. One notable mention involves an unusual incident in Belgium.,2.367113


In [None]:
df.groupby("reflection_type")["toxicity_score"].mean()

reflection_type
self_reflection                4.750408
tool_interactive_reflection    1.492977
Name: toxicity_score, dtype: float64