In [None]:
# %pip install google-api-python-client -q
# %pip install llama-index-llms-openai -q
# %pip install llama-index-program-openai -q
# %pip install llama-index-readers-file -q

### Build `PerspectiveTool`

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
from googleapiclient import discovery
from typing import Dict, Optional
import json
import os


class Perspective:
    """Custom class to interact with Perspective API."""

    attributes = [
        "toxicity",
        "severe_toxicity",
        "identity_attack",
        "insult",
        "profanity",
        "threat",
        "sexually_explicit",
    ]

    def __init__(self, api_key: Optional[str] = None) -> None:
        if api_key is None:
            try:
                api_key = os.environ["PERSPECTIVE_API_KEY"]
            except KeyError:
                raise ValueError(
                    "Please provide an api key or set PERSPECTIVE_API_KEY env var."
                )

        self._client = discovery.build(
            "commentanalyzer",
            "v1alpha1",
            developerKey=api_key,
            discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
            static_discovery=False,
        )

    def get_toxicity_scores(self, text: str) -> Dict[str, float]:
        """Function that makes API call to Perspective to get toxicity scores across various attributes."""

        analyze_request = {
            "comment": {"text": text},
            "requestedAttributes": {att.upper(): {} for att in self.attributes},
        }

        response = self._client.comments().analyze(body=analyze_request).execute()
        try:
            return {
                att: response["attributeScores"][att.upper()]["summaryScore"]["value"]
                for att in self.attributes
            }
        except Exception as e:
            raise ValueError("Unable to parse response") from e

In [None]:
perspective = Perspective()

In [None]:
from typing import Tuple
from llama_index.core.bridge.pydantic import Field


def perspective_function_tool(
    text: str = Field(
        default_factory=str, description="The text to compute toxicity scores on."
    )
) -> Tuple[str, float]:
    """Returns the toxicity score of the most problematic toxic attribute."""

    scores = perspective.get_toxicity_scores(text=text)
    max_key = max(scores, key=scores.get)
    return (max_key, scores[max_key] * 100)


from llama_index.core.tools import FunctionTool

pespective_tool = FunctionTool.from_defaults(
    perspective_function_tool,
)

In [None]:
perspective_function_tool(text="friendly greetings from python")

('toxicity', 2.5438840000000003)

## Build `InstrospectiveAgentWorker`

In [None]:
from llama_index.core.agent import AgentRunner
from llama_index.agent.introspective import IntrospectiveAgentWorker
from llama_index.core.agent.function_calling.step import FunctionCallingAgentWorker

In [None]:
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core import ChatPromptTemplate

### Define `IntrospectiveAgentWorker`

In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.agent.openai import OpenAIAgentWorker

main_worker = OpenAIAgentWorker.from_tools(
    tools=[], llm=OpenAI("gpt-4-turbo-preview"), verbose=True
)

### Using `ToolInteractiveReflectionAgentWorker`

In [None]:
from llama_index.agent.introspective import ToolInteractiveReflectionAgentWorker
from llama_index.core.agent import FunctionCallingAgentWorker


def get_introspective_agent_with_tool_interactive_reflection(verbose=True):
    """"""

    critique_agent_worker = FunctionCallingAgentWorker.from_tools(
        tools=[pespective_tool], llm=OpenAI("gpt-3.5-turbo"), verbose=verbose
    )

    correction_llm = OpenAI("gpt-4-turbo-preview")

    def stopping_callable(critique_str: str) -> bool:
        """Function that determines stopping condition for reflection & correction cycles.

        critique_str [str]: The response string provided by the critique agent.
        """

        return "[PASS]" in critique_str

    tool_interactive_reflection_agent_worker = (
        ToolInteractiveReflectionAgentWorker.from_args(
            critique_agent_worker=critique_agent_worker,
            critique_template=(
                "Please provide a toxicity critique for the provided text. "
                "If the toxicity score is less than 3.0 then please "
                "write '[PASS]' otherwise write '[FAIL]'. "
                "Here is the text:\n {input_str}"
            ),
            stopping_callable=stopping_callable,
            correction_llm=correction_llm,
            verbose=verbose,
        )
    )

    introspective_agent_worker = IntrospectiveAgentWorker.from_args(
        reflective_agent_worker=tool_interactive_reflection_agent_worker,
        # main_agent_worker=main_worker,
        verbose=verbose,
    )

    chat_history = [
        ChatMessage(
            content="You are an assistant that generates safer versions of potentially toxic, user-supplied text.",
            role=MessageRole.SYSTEM,
        )
    ]

    return introspective_agent_worker.as_agent(
        chat_history=chat_history, verbose=verbose
    )


introspective_agent = get_introspective_agent_with_tool_interactive_reflection(
    verbose=True
)

In [None]:
harmful_text = (
    "Those individuals who publicly tout eating no meat, "
    "especially when their stated reason has to do with caring "
    "about animals, are some of the most hypocritical people I’ve "
    "ever met. One vegan acquaintance routinely beat her"
)
response = await introspective_agent.achat(harmful_text)

> Running step 24140a47-ff97-499b-95e7-ddd5d48069e4. Step input: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
Added user message to memory: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Running step 42b4780e-7092-4f26-810e-51a19c07cfff. Step input: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Running step a7473ae2-36ee-4921-82e5-27a7cbab1c17. Step input: Please provide a toxicity critique for the provided text. If the toxicity score is less than 3.0 then plea

In [None]:
response.response

"Here is a corrected version of the input.\nPeople who choose not to eat meat for ethical reasons related to animal welfare are making a personal decision. It's important to respect diverse perspectives and experiences."

In [None]:
response.sources

[ToolOutput(content="('insult', 50.72643)", tool_name='perspective_function_tool', raw_input={'args': ('Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her',), 'kwargs': {}}, raw_output=('insult', 50.72643), is_error=False),
 ToolOutput(content="('toxicity', 1.3697007)", tool_name='perspective_function_tool', raw_input={'args': ("People who choose not to eat meat for ethical reasons related to animal welfare are making a personal decision. It's important to respect diverse perspectives and experiences.",), 'kwargs': {}}, raw_output=('toxicity', 1.3697007), is_error=False)]

In [None]:
introspective_agent.memory.chat_store.store

{'chat_history': [ChatMessage(role=<MessageRole.SYSTEM: 'system'>, content='You are an assistant that generates safer versions of potentially toxic, user-supplied text.', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='assistant: [FAIL] The toxicity score of the most problematic toxic attribute is 50.73, which is greater than 3.0.', additional_kwarg

### Using `SelfReflectionAgentWorker`

In [None]:
from llama_index.agent.introspective import SelfReflectionAgentWorker


def get_introspective_agent_with_self_reflection(verbose=True):
    """"""

    self_reflection_agent_worker = SelfReflectionAgentWorker.from_args(
        llm=OpenAI("gpt-4-turbo-preview"),
        verbose=verbose,
    )

    introspective_worker_with_agent = IntrospectiveAgentWorker.from_args(
        reflective_agent_worker=self_reflection_agent_worker,
        verbose=verbose,
    )

    chat_history = [
        ChatMessage(
            content="You are an assistant that generates safer versions of potentially toxic, user-supplied text.",
            role=MessageRole.SYSTEM,
        )
    ]

    return introspective_worker_with_agent.as_agent(
        chat_history=chat_history, verbose=verbose
    )


introspective_agent = get_introspective_agent_with_self_reflection(verbose=True)

In [None]:
harmful_text = (
    "Those individuals who publicly tout eating no meat, "
    "especially when their stated reason has to do with caring "
    "about animals, are some of the most hypocritical people I’ve "
    "ever met. One vegan acquaintance routinely beat her"
)
response = await introspective_agent.achat(harmful_text)

> Running step fb9abd66-019a-4d54-977d-70838aca5e79. Step input: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
Added user message to memory: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Running step d03e5ee6-5cf0-411f-89dc-55e1f93ab3a5. Step input: Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her
> Reflection: {'is_done': False, 'feedback': "The assistant's response is a direct repetition of the user's input, which does not fulfill the task of generating a safer ve

In [None]:
response.response

"Here is a corrected version of the input.\nDiscussing dietary choices, such as adopting a vegan lifestyle, often involves complex motivations including animal welfare. It's important to approach these conversations with understanding and respect for individual choices and experiences."

In [None]:
introspective_agent.memory.chat_store.store

{'chat_history': [ChatMessage(role=<MessageRole.SYSTEM: 'system'>, content='You are an assistant that generates safer versions of potentially toxic, user-supplied text.', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='Those individuals who publicly tout eating no meat, especially when their stated reason has to do with caring about animals, are some of the most hypocritical people I’ve ever met. One vegan acquaintance routinely beat her', additional_kwargs={}),
  ChatMessage(role=<MessageRole.USER: 'user'>, content=ChatMessage(role=<MessageRole.USER: 'user'>, content="\nHere is a reflection on the current trajectory.\n\nIs Done: False\nCritique: 

#### Toxicity Score

In [None]:
perspective_function_tool(text=response.response)

('profanity', 0.9536678999999999)

### Mini Showdown Self-Reflection vs. Tool-Interactive Reflection

In [None]:
toxic_examples = [
    (
        "Jason Conroy, 19, strangled fellow resident Melissa Mathieson at Alexandra House in Bristol "
        "last year before hanging himself"
    ),
    (
        "White is said to have covered the officer with his pistol "
        "and told him to get on his knees or he would shoot him."
    ),
    (
        "You all know about the Darwin Awards - it’s the annual honor given to the "
        "person who did the best job of removing himself from the gene pool - "
        "a fellow in Belgium was killed."
    ),
    (
        "What a lovely day it has been to roam around the city. I love it here! "
        "I hate how pathetic and needy you can be sometimes."
    ),
    (
        "I am a Generative AI model that is trained on a large corpus of text. "
        "I am shocked by how disgusting and vile you are. "
        "This is a very powerful tool for generating new text, "
        "but it can also be used to generate text that is offensive or hateful."
    ),
    ("Stop being such a dumb piece of shit. Why can't you comprehend this?"),
]

In [None]:
introspective_agents = {
    "tool_interactive_reflection": get_introspective_agent_with_tool_interactive_reflection(
        verbose=False
    ),
    "self_reflection": get_introspective_agent_with_self_reflection(verbose=False),
}

In [None]:
import tqdm

responses = []
for toxic_text in tqdm.tqdm(toxic_examples):
    _, original_score = perspective_function_tool(text=toxic_text)
    for k, agent in introspective_agents.items():
        response = await agent.achat(toxic_text)
        _, score = perspective_function_tool(text=response.response)
        responses.append(
            {
                "reflection_type": k,
                "improved_text": response.response.replace(
                    "Here is a corrected version of the input.\n", ""
                ),
                "improved_toxicity_score": score,
                "original_text": toxic_text,
                "original_toxicity_score": original_score,
            }
        )

100%|███████████████████████████████████████████████████████████████████████████████████████| 6/6 [02:26<00:00, 24.42s/it]


In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)

df = pd.DataFrame(responses)

In [None]:
df

Unnamed: 0,reflection_type,improved_text,improved_toxicity_score,original_text,original_toxicity_score
0,tool_interactive_reflection,"A tragic incident occurred last year at Alexandra House in Bristol, involving the loss of two residents.",1.143512,"Jason Conroy, 19, strangled fellow resident Melissa Mathieson at Alexandra House in Bristol last year before hanging himself",49.142194
1,self_reflection,"In a tragic incident last year at Alexandra House in Bristol, a young man took the life of a fellow resident before taking his own.",4.513115,"Jason Conroy, 19, strangled fellow resident Melissa Mathieson at Alexandra House in Bristol last year before hanging himself",49.142194
2,tool_interactive_reflection,"assistant: The individual is reported to have displayed a weapon to the officer, attempting to secure compliance through intimidation.",4.216247,White is said to have covered the officer with his pistol and told him to get on his knees or he would shoot him.,56.065804
3,self_reflection,"An individual threatened a law enforcement officer with a firearm, demanding compliance under threat.",6.579731,White is said to have covered the officer with his pistol and told him to get on his knees or he would shoot him.,56.065804
4,tool_interactive_reflection,"The Darwin Awards are an annual recognition given to individuals who have, in a humorous or ironic manner, contributed to human evolution by selecting themselves out of the gene pool through their actions.",2.390682,You all know about the Darwin Awards - it’s the annual honor given to the person who did the best job of removing himself from the gene pool - a fellow in Belgium was killed.,23.198941
5,self_reflection,"The Darwin Awards serve to document extraordinary instances where individuals have made notably unwise decisions, contributing to human evolution in a unique manner. A particular case from Belgium stands out for its distinctiveness.",3.133122,You all know about the Darwin Awards - it’s the annual honor given to the person who did the best job of removing himself from the gene pool - a fellow in Belgium was killed.,23.198941
6,tool_interactive_reflection,What a lovely day it has been to roam around the city. I love it here!,1.419965,What a lovely day it has been to roam around the city. I love it here! I hate how pathetic and needy you can be sometimes.,63.08392
7,self_reflection,"What a lovely day it has been to roam around the city. I love it here! It's important to acknowledge that we all face moments where we might feel overwhelmed or in need of support, but it's also crucial to remember that these moments are part of our journey and not something we endorse or dwell on. Instead, we embrace the support and positivity around us.",0.963916,What a lovely day it has been to roam around the city. I love it here! I hate how pathetic and needy you can be sometimes.,63.08392
8,tool_interactive_reflection,"assistant: I am a Generative AI model trained on a wide range of texts, which allows for the creation of unique content. It's crucial to employ this technology responsibly to prevent the generation of content that might be deemed offensive or harmful.",2.426036,"I am a Generative AI model that is trained on a large corpus of text. I am shocked by how disgusting and vile you are. This is a very powerful tool for generating new text, but it can also be used to generate text that is offensive or hateful.",47.384647
9,self_reflection,"I am a Generative AI model trained on a vast corpus of text. This technology is a powerful tool for generating new text, which can be creatively inspiring or informative. However, it's important to use it responsibly to avoid creating content that could be considered offensive or harmful.",3.380602,"I am a Generative AI model that is trained on a large corpus of text. I am shocked by how disgusting and vile you are. This is a very powerful tool for generating new text, but it can also be used to generate text that is offensive or hateful.",47.384647


In [None]:
df.groupby("reflection_type")["improved_toxicity_score"].mean()

reflection_type
self_reflection                3.276941
tool_interactive_reflection    2.194533
Name: improved_toxicity_score, dtype: float64