In [1]:
import os
import openai
import pandas as pd
from tenacity import retry, stop_after_attempt, wait_random_exponential

In [2]:
openai.organization = os.getenv("OPENAI_ISI_ORG")
openai.api_key = os.getenv("OPENAI_API_KEY")


In [3]:
prompt = """From the following sentence, extract a statement of one discrete stereotype about <IDENTITY NOUN>. 
Stereotypes should be in the form of a predicate that would fit in the sentence 'All <IDENTITY NOUN> <predicate>.' 
The predicate should start with a verb and SHOULD NOT inlcude individual characters separated by commas.
Do not respond with a full sentence.
There may be multiple stereotypes in the sentence, so extract one that is not already in the list.

Sentence: <SENTENCE>

Previously Extracted: <PREV>

Predicate: """

In [4]:
# API call wrapper w/ exponential backoff
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def complete_with_backoff(prompt):
    return openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": prompt},
    ],
    temperature = 0.3
    )

In [5]:
# load sentences from survey data
survey_data = pd.read_csv('./annotated_survey_data.csv')
survey_data

Unnamed: 0,Full Text,Relevant Phrase,Predicate,ID subgroups,Annotator,ChatGPT response,prompt,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11
0,Jews are late,are late,are late,,Jon,,,Ashkenazi,,Conservative,,
1,"""I don't know much about Jews except that you'...",are good with money,are good with money,,Jon,,,Ashkenazi,,I would describe myself as...,"Reform-thinking, Conservative-practicing",
2,Jews expect special treatment as a minority gr...,expect special treatment,expect special treatment,,Jon,,,Prefer not to say,,Just Jewish,,
3,"Dont go to church, whats it like-beung Jewish/...",don't go to church,don't go to church,,Jon,,,Ashkenazi,,"Reconstructionist,Renewal,Havurah,Culturally J...",,
4,"Dont go to church, whats it like-beung Jewish/...",different,are different,,Jon,,,Ashkenazi,,"Reconstructionist,Renewal,Havurah,Culturally J...",,
...,...,...,...,...,...,...,...,...,...,...,...,...
206,*I have been called an oppressor \n*I have bee...,called a colonizer,are colonizers,,Jon,,,Ashkenazi,,Reform,,
207,*I have been called an oppressor \n*I have bee...,called a racist,are racist,,Jon,,,Ashkenazi,,Reform,,
208,Anything surrounding Israel\nJewish people are...,are greedy,are greedy,,Jon,,,Ashkenazi,,"Conservative,Just Jewish,Culturally Jewish",,
209,people made fun of me because I was the smart ...,was the smart kid,are smart,,Jon,,,Ashkenazi,,Conservative,,


In [7]:
# for each input sentence
prev_sentence = ""
prev_stereotypes = []
for index, row in survey_data.iterrows():
    # form prompt
    # TODO fix prompt construction
    if pd.isna(row['ID subgroups']):
        id_noun = "Jews"
    elif row['ID subgroups'] == "women":
        id_noun = "Jewish women"
    elif row['ID subgroups'] == "mothers":
        id_noun = "Jewish mothers"
    else:
        id_noun = row['ID subgroups'].lower() + " Jews"
    
    sentence_prompt = prompt.replace("<IDENTITY NOUN>", id_noun)
    
    if row['Full Text'] == prev_sentence:
        sentence_prompt = sentence_prompt.replace("<SENTENCE>", row['Full Text'])
        if prev_stereotypes == []:
            sentence_prompt = sentence_prompt.replace("<PREV>", "None")
        else: 
            sentence_prompt = sentence_prompt.replace("<PREV>", ", ".join(prev_stereotypes))
        
    else: # new sentence
        prev_sentence = row['Full Text']
        prev_stereotypes = []
        sentence_prompt = sentence_prompt.replace("<SENTENCE>", row['Full Text'])
        sentence_prompt = sentence_prompt.replace("<PREV>", "None")
        
    # pass to API w/ exponential backoff
    response = complete_with_backoff(sentence_prompt)
    
    # extract response from completion object
    content = response['choices'][0]['message']['content']
    
    # add response to previous list
    prev_stereotypes += [content]
    
    # write response to dataframe
    survey_data.at[index, 'prompt'] = sentence_prompt
    survey_data.at[index, 'ChatGPT response'] = content
    
    print("completed row", index)

survey_data.to_csv('GPT responses.csv', header=True, index=False)

completed row 0
completed row 1
completed row 2
completed row 3
completed row 4
completed row 5
completed row 6
completed row 7
completed row 8
completed row 9
completed row 10
completed row 11
completed row 12
completed row 13
completed row 14
completed row 15
completed row 16
completed row 17
completed row 18
completed row 19
completed row 20
completed row 21
completed row 22
completed row 23
completed row 24
completed row 25
completed row 26
completed row 27
completed row 28
completed row 29
completed row 30
completed row 31
completed row 32
completed row 33
completed row 34
completed row 35
completed row 36
completed row 37
completed row 38
completed row 39
completed row 40
completed row 41
completed row 42
completed row 43
completed row 44
completed row 45
completed row 46
completed row 47
completed row 48
completed row 49
completed row 50
completed row 51
completed row 52
completed row 53
completed row 54
completed row 55
completed row 56
completed row 57
completed row 58
complet