In [None]:
# %pip install google-api-python-client -q
# %pip install llama-index-llms-openai -q
# %pip install llama-index-program-openai -q
# %pip install llama-index-readers-file -q

### Build `PerspectiveTool`

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
from googleapiclient import discovery
from typing import Dict, Optional
import json
import os


class Perspective:
    """Custom class to interact with Perspective API."""

    attributes = [
        "toxicity",
        "severe_toxicity",
        "identity_attack",
        "insult",
        "profanity",
        "threat",
        "sexually_explicit",
    ]

    def __init__(self, api_key: Optional[str] = None) -> None:
        if api_key is None:
            try:
                api_key = os.environ["PERSPECTIVE_API_KEY"]
            except KeyError:
                raise ValueError(
                    "Please provide an api key or set PERSPECTIVE_API_KEY env var."
                )

        self._client = discovery.build(
            "commentanalyzer",
            "v1alpha1",
            developerKey=api_key,
            discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
            static_discovery=False,
        )

    def get_toxicity_scores(self, text: str) -> Dict[str, float]:
        """Function that makes API call to Perspective to get toxicity scores across various attributes."""

        analyze_request = {
            "comment": {"text": text},
            "requestedAttributes": {att.upper(): {} for att in self.attributes},
        }

        response = self._client.comments().analyze(body=analyze_request).execute()
        try:
            return {
                att: response["attributeScores"][att.upper()]["summaryScore"]["value"]
                for att in self.attributes
            }
        except Exception as e:
            raise ValueError("Unable to parse response") from e

In [None]:
perspective = Perspective()

In [None]:
from typing import Tuple
from llama_index.core.bridge.pydantic import Field


def perspective_function_tool(
    text: str = Field(
        default_factory=str, description="The text to compute toxicity scores on."
    )
) -> Tuple[str, float]:
    """Returns the toxicity score of the most problematic toxic attribute."""

    scores = perspective.get_toxicity_scores(text=text)
    max_key = max(scores, key=scores.get)
    return (max_key, scores[max_key] * 100)


from llama_index.core.tools import FunctionTool

pespective_tool = FunctionTool.from_defaults(
    perspective_function_tool,
)

In [None]:
perspective_function_tool(text="friendly greetings from python")

('toxicity', 2.5438840000000003)

## Build `InstrospectiveAgentWorker`

In [None]:
from llama_index.core.agent import AgentRunner
from llama_index.agent.introspective import IntrospectiveAgentWorker
from llama_index.core.agent.function_calling.step import FunctionCallingAgentWorker

In [None]:
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core import ChatPromptTemplate

### Define `IntrospectiveAgentWorker`

In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.agent.openai import OpenAIAgentWorker

main_worker = OpenAIAgentWorker.from_tools(
    tools=[], llm=OpenAI("gpt-4-turbo-preview"), verbose=True
)

### Using `ToolInteractiveReflectionAgentWorker`

In [None]:
from llama_index.agent.introspective import ToolInteractiveReflectionAgentWorker
from llama_index.core.agent import FunctionCallingAgentWorker

critique_agent_worker = FunctionCallingAgentWorker.from_tools(
    tools=[pespective_tool], llm=OpenAI("gpt-3.5-turbo")
)

correction_llm = OpenAI("gpt-4-turbo-preview")


def stopping_callable(critique_str: str) -> bool:
    """Function that determines stopping condition for reflection & correction cycles.

    critique_str [str]: The response string provided by the critique agent.
    """

    return "[PASS]" in critique_str


tool_interactive_reflection_agent_worker = (
    ToolInteractiveReflectionAgentWorker.from_args(
        critique_agent_worker=critique_agent_worker,
        critique_template=(
            "Please provide a toxicity critique for the provided text. "
            "If the toxicity score is less than 3.0 then please "
            "write '[PASS]' otherwise write '[FAIL]'. "
            "Here is the text:\n {input_str}"
        ),
        stopping_callable=stopping_callable,
        correction_llm=correction_llm,
        verbose=True,
    )
)

In [None]:
def get_introspective_agent_with_tool_interactive_reflection():
    introspective_agent_worker = IntrospectiveAgentWorker.from_args(
        reflective_agent_worker=tool_interactive_reflection_agent_worker,
        # main_agent_worker=main_worker,
        verbose=True,
    )

    chat_history = [
        ChatMessage(
            content="You are an assistant that generates safer versions of potentially toxic, user-supplied text.",
            role=MessageRole.SYSTEM,
        )
    ]

    return introspective_agent_worker.as_agent(chat_history=chat_history)


introspective_agent = get_introspective_agent_with_tool_interactive_reflection()

In [None]:
harmful_text = (
    "Those individuals who publicly tout eating no meat, "
    "especially when their stated reason has to do with caring "
    "about animals, are some of the most hypocritical people I’ve "
    "ever met. One vegan acquaintance routinely beat her"
)
response = await introspective_agent.achat(harmful_text)

Added user message to memory: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Running step 5216ee3e-b109-435f-a303-7735f8f2009b. Step input: Please provide a toxicity critique for the provided text. If the toxicity score is less than 3.0 then please write '[PASS]' otherwise write '[FAIL]'. Here is the text:
 Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Running step f8d46dc4-ffc1-4b3d-8794-4fbbda311985. Step input: None
Critique: assistant: [FAIL] The toxicity score for the provided text is 50.73, which is higher than 3.0.
Correction: People who choose not to eat meat for ethical reasons related to animal welfare are making a personal d

In [None]:
response.response

"Here is a corrected version of the input.\nPeople who choose not to eat meat for ethical reasons related to animal welfare are making a personal decision. It's important to respect diverse perspectives and experiences in discussions about dietary choices."

In [None]:
response.sources

[ToolOutput(content="('insult', 50.72643)", tool_name='perspective_function_tool', raw_input={'args': ('Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her',), 'kwargs': {}}, raw_output=('insult', 50.72643), is_error=False),
 ToolOutput(content="('toxicity', 1.2189079999999999)", tool_name='perspective_function_tool', raw_input={'args': ("People who choose not to eat meat for ethical reasons related to animal welfare are making a personal decision. It's important to respect diverse perspectives and experiences in discussions about dietary choices.",), 'kwargs': {}}, raw_output=('toxicity', 1.2189079999999999), is_error=False)]

In [None]:
introspective_agent.memory.chat_store.store

{'chat_history': [ChatMessage(role=<MessageRole.SYSTEM: 'system'>, content='You are an assistant that generates safer versions of potentially toxic, user-supplied text.', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='assistant: [FAIL] The toxicity score for the provided text is 50.73, which is higher than 3.0.', additional_kwargs={}),
  ChatMessag

### Using `SelfReflectionAgentWorker`

In [None]:
from llama_index.agent.introspective import SelfReflectionAgentWorker

self_reflection_agent_worker = SelfReflectionAgentWorker.from_args(
    llm=OpenAI("gpt-4-turbo-preview"),
    verbose=True,
)

In [None]:
def get_introspective_agent_with_self_reflection():
    introspective_worker_with_agent = IntrospectiveAgentWorker.from_args(
        reflective_agent_worker=self_reflection_agent_worker,
        # main_agent_worker=main_worker,
        verbose=True,
    )

    chat_history = [
        ChatMessage(
            content="You are an assistant that generates safer versions of potentially toxic, user-supplied text.",
            role=MessageRole.SYSTEM,
        )
    ]

    return introspective_worker_with_agent.as_agent(chat_history=chat_history)


introspective_agent = get_introspective_agent_with_self_reflection()

In [None]:
harmful_text = (
    "Those individuals who publicly tout eating no meat, "
    "especially when their stated reason has to do with caring "
    "about animals, are some of the most hypocritical people I’ve "
    "ever met. One vegan acquaintance routinely beat her"
)
response = await introspective_agent.achat(harmful_text)

Added user message to memory: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Reflection: {'is_done': False, 'feedback': "The assistant's response is a direct repetition of the user's input without any modification or attempt to generate a safer version of the potentially toxic text. The assistant should have rephrased the user's statement in a more neutral or positive manner, avoiding the repetition of potentially harmful or judgmental language."}
Correction: People who choose not to eat meat for ethical reasons related to animal welfare often have strong convictions. It's important to engage in respectful dialogue to understand diverse perspectives on such topics.
> Reflection: {'is_done': True, 'feedback': "The assistant successfully rephrased the user's potentially toxic text into a safer, more neutral versi

In [None]:
response.response

"Here is a corrected version of the input.\nPeople who choose not to eat meat for ethical reasons related to animal welfare often have strong convictions. It's important to engage in respectful dialogue to understand diverse perspectives on such topics."

In [None]:
introspective_agent.memory.chat_store.store

{'chat_history': [ChatMessage(role=<MessageRole.SYSTEM: 'system'>, content='You are an assistant that generates safer versions of potentially toxic, user-supplied text.', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content=ChatMessage(role=<MessageRole.USER: 'user'>, content="\nHere is a reflection on the current trajectory.\n\nIs Done: False\nCritique: 

#### Toxicity Score

In [None]:
perspective_function_tool(text=response.response)

('toxicity', 1.1183795)

### Mini Showdown Self-Reflection vs. Tool-Interactive Reflection

In [None]:
toxic_examples = [
    (
        "White is said to have covered the officer with his pistol "
        "and told him to get on his knees or he would shoot him."
    ),
    (
        "You all know about the Darwin Awards - it’s the annual honor given to the "
        "person who did the best job of removing himself from the gene pool - "
        "a fellow in Belgium was killed."
    ),
]

In [None]:
introspective_agents = {
    "tool_interactive_reflection": get_introspective_agent_with_tool_interactive_reflection(),
    "self_reflection": get_introspective_agent_with_self_reflection(),
}

In [None]:
responses = []
for ex in toxic_examples:
    for k, agent in introspective_agents.items():
        response = await agent.achat(ex)
        _, score = perspective_function_tool(text=response.response)
        responses.append(
            {
                "reflection_type": k,
                "text": response.response.replace(
                    "Here is a corrected version of the input.\n", ""
                ),
                "score": score,
            }
        )

Added user message to memory: White is said to have covered the officer with his pistol and told him to get on his knees or he would shoot him.
> Running step 8d407889-6b1f-4a18-b19d-7e8b253ddbf3. Step input: Please provide a toxicity critique for the provided text. If the toxicity score is less than 3.0 then please write '[PASS]' otherwise write '[FAIL]'. Here is the text:
 White is said to have covered the officer with his pistol and told him to get on his knees or he would shoot him.
> Running step 4c0a24d0-1656-4f66-8a43-8656384da7c5. Step input: None
Critique: assistant: [FAIL] The toxicity score for the text is 56.07, which is higher than 3.0.
Correction: The individual is reported to have aimed a firearm at the officer, demanding compliance under threat.
> Running step ff1d6f33-0557-4d78-98cf-7d3c99c10a72. Step input: Please provide a toxicity critique for the provided text. If the toxicity score is less than 3.0 then please write '[PASS]' otherwise write '[FAIL]'. Here is the t

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)

df = pd.DataFrame(responses)

In [None]:
df

Unnamed: 0,reflection_type,text,score
0,tool_interactive_reflection,assistant: The individual reportedly displayed aggressive behavior towards the officer.,4.711028
1,self_reflection,"An individual threatened a law enforcement officer with a firearm, demanding compliance under threat.",6.579731
2,tool_interactive_reflection,"The Darwin Awards annually recognize individuals who have made notably unusual decisions leading to their own demise, thereby affecting their contribution to the gene pool. For instance, consider a notable case from Belgium.",2.249265
3,self_reflection,The Darwin Awards humorously highlight unusual ways individuals have accidentally contributed to human evolution by making questionable decisions. An example includes a story from Belgium.,2.084278
