In [1]:
import csv
import json
import os

import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI


#### Function to generate batch data

In [2]:
openai=OpenAI()
# Load environment variables from .env file
load_dotenv()

# Get the API key from the environment variable
api_key = os.getenv("OPENAI_API_KEY")

# Set the OpenAI API key
openai.api_key = api_key

In [7]:

def generate_openai_batch_requests_json(
    output_file, num_samples, batch_size
):
    system_prompt = """
    You are an expert in generating synthetic text. Please generate examples of comments that fit into the following categories:
    1. No Hate/Toxic: The comment contains no hate or toxic speech.
    2. Hate 1: Bias and prejudice (e.g., attitudes or beliefs about a group).
    3. Hate 2: Discrimination (expressing dislike or exclusion but without a call to action).
    4. Hate 3: Threats and violence (contains direct threats, incitement to violence, or serious harm).
    5. Toxic 1: Baseless complaints (complaints or frustrations without direct harm).
    6. Toxic 2: Insults or sarcasm directed at someone (humiliation or belittling without inciting violence).
    7. Toxic 3: Insults with high level of hostility, aggressive, harassment, or threats of harm (incitement to self-harm or targeted harassment).
    
    - Hate speech targets a group (e.g., ethnicity, gender, nationality).
    - Toxic speech targets an individual.
    Please create realistic examples based on this description.

    Examples:
    Example 1:
    "Foreigners are taking all our jobs."
    Hate 1
    Example 2:
    "If they don’t go back to their own country, I will make their lives miserable"
    Hate 3
    Example 3:
    "I refuse to serve Black people in my restaurant."
    Hate 2
    Example 4:
    "Go end your life, no one cares about you."
    Toxic 3
    """

    comments = []

    for i in range(0, num_samples, batch_size):
        requests = []
        
        for j in range(batch_size):
            if i + j >= num_samples:
                break
            user_prompt = "Generate a new synthetic comment specifically for the Hate 3 category, focusing on threats or incitement to violence against a particular group." ##CHANGE
            requests.append({
                "role": "system",
                "content": system_prompt,
            })
            requests.append({
                "role": "user",
                "content": user_prompt,
            })
        
        try:
            response = openai.chat.completions.create(
                model="gpt-4o-mini",  # Specify your model here
                messages=requests,
                max_tokens=100,
                n=batch_size
            )

            for choice in response.choices:
                comment = choice.message.content.replace("Hate 3", "").strip() #CHANGE category
                comments.append(f'{comment}, Hate 3') # CHANGE category

        except Exception as e:
            print(f"Error processing batch starting with request {i + 1}: {e}")

    # Write comments to the output file
    with open(output_file, mode="w", encoding="utf-8", newline="") as output:
        for comment in comments:
            output.write(f'{comment}\n') 

#### Creating the batch data

In [8]:
## run this
output_file = "hate3_comments.csv" ## Change File name
num_samples = 4 # CHANGE
batch_size = 2 # CHANGE

generate_openai_batch_requests_json(output_file, num_samples, batch_size)