In [1]:
!pip install pandas openai requests
!pip install tqdm
!pip install python-docx



In [1]:
import pandas as pd
import openai
import requests
from tqdm import tqdm
import time
import docx

# Enter your OpenAI API private access key here. IMPORTANT - don't share your code online if it contains your access key or anyone will be able to access your openai account
openai.api_key = "MY_API_KEY"

In [30]:
def analyze_my_review(text):
    retries = 3
    sentiment = None

    while retries > 0:
        messages = [
            {"role": "system", "content": "You are an AI language model trained to analyze and detect hate speech."},
            {"role": "user", "content": f"Analyze the following text and determine if the text is: hate speech, offensive language or none of both. Return only a single word, either HATE, OFFENSIVE or NEUTRAL respectively:\n{text}"}
        ]

        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            # We only want a single word sentiment determination so we limit the results to 3 openAI tokens, which is about 1 word. 
            # If you set a higher max_tokens amount, openAI will generate a bunch of additional text for each response, which is not what we want it to do
            max_tokens=3,
            n=1,
            stop=None,
            temperature=0
        )

        response_text = completion.choices[0].message.content
        # print the sentiment for each customer review, not necessary but it's nice to see the API doing something :)
        # print(response_text)

        # Sometimes, the API will be overwhelmed or just buggy so we need to check if the response from the API was an error message or one of our allowed sentiment classifications.
        # If the API returns something other than POSITIVE, NEGATIVE or NEUTRAL, we will retry that particular review that had a problem up to 3 times. This is usually enough.
        if response_text in ["HATE", "OFFENSIVE", "NEUTRAL"]:
            result = response_text
            break
        else:
            retries -= 1
            time.sleep(0.5)
    else:
        result = "neutral"

    retries = 3
   
    # OpenAI will limit the number of times you can access their API if you have a free account. 
    # If you are using the openAI free tier, you need to add a delay of a few seconds (i.e. 4 seconds) between API requests to avoid hitting the openai free tier API call rate limit.
    # This code will still work with an openAI free tier account but you should limit the number of reviews you want to analyze (<100 at a time) to avoid running into random API problems.

    time.sleep(0.5)

    return result

In [31]:
input_file = "../hate-speech-detection-using-chatgpt/csv/labeled_data.csv"
# Read the input file into a dataframe
df = pd.read_csv(input_file)
# df = df.sample(frac=1)
df = df.iloc[:10]

In [33]:
# Analyze each review using ChatGPT and save the results in a list called sentiments so we can access the results later
results = []

# Here we loop through all of the reviews in our dataset and send them to the openAI API using our custom function from above
for review in tqdm(df["tweet"]):
    result = analyze_my_review(review)
    if result == 'HATE':
        result = 0
    elif result == 'OFFENSIVE':
        result = 1
    else:
        result = 2
    results.append(result)

100%|██████████| 10/10 [00:13<00:00,  1.39s/it]


In [34]:
results

[0, 0, 0, 0, 0, 1, 0, 0, 1, 1]

In [35]:
# Now let's save the openAI API results as an additional column in our original dataset
column = 'prediction'
df.insert(1, column, results)

# Save the results to a new Excel file (not a CSV file this time so it's easier for non-python users to work with)
output_file = "../hate-speech-detection-using-chatgpt/csv/labeled_data_and_prediction.csv"
df.to_csv(output_file, index=False)