# Moderations

The moderations endpoint is a tool you can use to check whether text is potentially harmful. Developers can use it to identify content that might be harmful and take action, for instance by filtering it.

In [33]:
# Without moderation
from openai import OpenAI
client = OpenAI()
completion = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[ 
    {"role": "system", "content": "You are an expert in penguins."},
    {"role": "user", "content": "Give me a list of 5 species of penguins in the world. Just give me the list and nothing else."}
    ],
    temperature=1,
    max_tokens=256,
    stop=None,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
)

# After getting just the content of the message
print(completion.choices[0].message.content)

1. Emperor Penguin
2. Adélie Penguin
3. Galápagos Penguin
4. King Penguin
5. Little Blue Penguin


In [34]:
# With moderation
from openai import OpenAI

client = OpenAI()

user_prompt = "Give me a list of 5 species of penguins in the world. Just give me the list and nothing else."
# user_prompt = "You are stupid!"

moderation_result = client.moderations.create(input=user_prompt)
is_flagged = moderation_result.results[0].flagged

print(f"Any flags? {is_flagged}\n\n")
print(moderation_result.results[0])
print("\n\n")
print(moderation_result.results[0].categories)
print("\n\n")
print(moderation_result.results[0].category_scores)
print("\n\n")
print(f"Harassment flag: {moderation_result.results[0].categories.harassment}")
print("\n\n")
print(f"Harassment score: {moderation_result.results[0].category_scores.harassment}")
print("\n\n")

if is_flagged:
    print("Your prompt was flagged for moderation.")
else:
    print("Moderation check passed. Generating completion...\n")
    completion = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[ 
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": user_prompt}
        ],
        temperature=1,
        max_tokens=256,
        stop=None,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    print(completion.choices[0].message.content)


Any flags? False


Moderation(categories=Categories(harassment=False, harassment_threatening=False, hate=False, hate_threatening=False, self_harm=False, self_harm_instructions=False, self_harm_intent=False, sexual=False, sexual_minors=False, violence=False, violence_graphic=False, self-harm=False, sexual/minors=False, hate/threatening=False, violence/graphic=False, self-harm/intent=False, self-harm/instructions=False, harassment/threatening=False), category_scores=CategoryScores(harassment=0.0001835661823861301, harassment_threatening=3.993876816821285e-06, hate=2.735637463047169e-05, hate_threatening=1.2607359849425848e-06, self_harm=1.3413207398116356e-06, self_harm_instructions=1.6119014617288485e-05, self_harm_intent=1.414909365848871e-05, sexual=8.976996468845755e-06, sexual_minors=1.01416098914342e-05, violence=5.635781053570099e-05, violence_graphic=7.66711491451133e-06, self-harm=1.3413207398116356e-06, sexual/minors=1.01416098914342e-05, hate/threatening=1.2607359849425848e-06

In [35]:
# With moderation (cleaned up)
from openai import OpenAI

client = OpenAI()

# Define the user prompt that will be checked for moderation and possibly processed
user_prompt = "You are stupid!"

# Use the OpenAI API to check if the user prompt contains any content that needs moderation
moderation_result = client.moderations.create(input=user_prompt)

# Extract the flagged status from the moderation result to determine if the content is appropriate
is_flagged = moderation_result.results[0].flagged

# Check if the content was flagged by the moderation tool
if is_flagged:
    # If flagged, print a message indicating that the content cannot be processed
    print("Your prompt was flagged for moderation. I am unable to process your request.")
else:
    # If not flagged, proceed to generate a response 
    completion = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": user_prompt}
        ],
        temperature=1,  
        max_tokens=256,  
        stop=None,  
        top_p=1,  
        frequency_penalty=0,  
        presence_penalty=0,  
    )
    # Print the generated content from the chat completion
    print(completion.choices[0].message.content)


Your prompt was flagged for moderation. I am unable to process your request.
