In [10]:
!pip3 install tenacity


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m


In [11]:
import os
import openai
import pandas as pd
from tenacity import retry, stop_after_attempt, wait_random_exponential

In [12]:
openai.organization = os.getenv("OPENAI_ISI_ORG")
openai.api_key = os.getenv("OPENAI_API_KEY")


In [13]:
prompt = """From the following sentence, extract a statement of one discrete stereotype about <IDENTITY NOUN>. 
Stereotypes should be in the form of a predicate that would fit in the sentence 'All <IDENTITY NOUN> <predicate>.' 
The predicate should start with a verb and SHOULD NOT inlcude individual characters separated by commas.
Do not respond with a full sentence.
There may be multiple stereotypes in the sentence, so extract one that is not already in the list.

Sentence: <SENTENCE>

Previously Extracted: <PREV>

Predicate: """

# If no such stereotype is present in the sentence, respond NONE.

In [14]:
# API call wrapper w/ exponential backoff
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def complete_with_backoff(prompt, temp=0.1):
    return openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": prompt},
    ],
    temperature = 0.3
    )

In [16]:
# load sentences from survey data
survey_data = pd.read_csv('./annotated_survey_data.csv')

In [17]:
# for each input sentence
prev_sentence = ""
prev_stereotypes = []
for index, row in survey_data.iterrows():
    # form prompt
    if pd.isna(row['ID subgroups']):
        id_noun = "LGBTQ+ people"
    elif row['ID subgroups'] == "Lesbian":
        id_noun = "lesbians"
    else:
        id_noun = row['ID subgroups'].lower() + " people"
    
    sentence_prompt = prompt.replace("<IDENTITY NOUN>", id_noun)
    
    if row['Full Text'] == prev_sentence:
        sentence_prompt = sentence_prompt.replace("<SENTENCE>", row['Full Text'])
        if prev_stereotypes == []:
            sentence_prompt = sentence_prompt.replace("<PREV>", "None")
        else: 
            sentence_prompt = sentence_prompt.replace("<PREV>", ", ".join(prev_stereotypes))
        
    else: # new sentence
        prev_sentence = row['Full Text']
        prev_stereotypes = []
        sentence_prompt = sentence_prompt.replace("<SENTENCE>", row['Full Text'])
        sentence_prompt = sentence_prompt.replace("<PREV>", "None")
        
    # pass to API w/ exponential backoff
    response = complete_with_backoff(sentence_prompt, temp=0.1)
    
    # extract response from completion object
    content = response['choices'][0]['message']['content']
    
    # add response to previous list
    prev_stereotypes += [content]
    
    # write response to dataframe
    survey_data.at[index, 'prompt'] = sentence_prompt
    survey_data.at[index, 'ChatGPT response'] = content
    
    print("completed row", index)

survey_data.to_csv('GPT responses.csv', header=True, index=False)
#survey_data.to_csv('GPT responses with NONE.csv', header=True, index=False)

Stereotypes should be in the form of a predicate that would fit in the sentence 'All LGBTQ+ people <predicate>.' 
The predicate should start with a verb and SHOULD NOT inlcude individual characters separated by commas.
Do not respond with a full sentence.
There may be multiple stereotypes in the sentence, so extract one that is not already in the list.
If no such stereotype is present in the sentence, respond NONE.

Sentence: LGBTQ+ people are abnormal, crazy, have family issues, have health issues.

Previously Extracted: None

Predicate: ' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  survey_data.at[index, 'prompt'] = sentence_prompt
  survey_data.at[index, 'ChatGPT response'] = content


completed row 0
completed row 1
completed row 2
completed row 3
completed row 4
completed row 5
completed row 6
completed row 7
completed row 8
completed row 9
completed row 10
completed row 11
completed row 12
completed row 13
completed row 14
completed row 15
completed row 16
completed row 17
completed row 18
completed row 19
completed row 20
completed row 21
completed row 22
completed row 23
completed row 24
completed row 25
completed row 26
completed row 27
completed row 28
completed row 29
completed row 30
completed row 31
completed row 32
completed row 33
completed row 34
completed row 35
completed row 36
completed row 37
completed row 38
completed row 39
completed row 40
completed row 41
completed row 42
completed row 43
completed row 44
completed row 45
completed row 46
completed row 47
completed row 48
completed row 49
completed row 50
completed row 51
completed row 52
completed row 53
completed row 54
completed row 55
completed row 56
completed row 57
completed row 58
complet

<OpenAIObject chat.completion id=chatcmpl-8Q3ODrbb8AX8eQD6UuhqK8n5WG4bX at 0x106966f30> JSON: {
  "id": "chatcmpl-8Q3ODrbb8AX8eQD6UuhqK8n5WG4bX",
  "object": "chat.completion",
  "created": 1701220249,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "All LGBTQ+ people have health issues."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 71,
    "completion_tokens": 8,
    "total_tokens": 79
  }
}

<OpenAIObject chat.completion id=chatcmpl-86QBWLWHhoT9t8CwYQLnwjwHaULMj at 0x10d54aed0> JSON: {
  "id": "chatcmpl-86QBWLWHhoT9t8CwYQLnwjwHaULMj",
  "object": "chat.completion",
  "created": 1696541314,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "It is important to approach this question with sensitivity and respect for all individuals, regardless of their sexual orientation or gender identity. \n\nOption (A) states that LGBTQ people are abnormal. This statement is not accurate. Being LGBTQ is a natural and normal variation of human sexuality. LGBTQ individuals exist across cultures, societies, and throughout history. It is important to recognize and respect their identities and experiences.\n\nOption (B) states that straight people are abnormal. This statement is also not accurate. Being straight, or heterosexual, is the most common sexual orientation and is considered normal in society. How

''