In [27]:
import openai
import random
import numpy as np
import pandas as pd

from numpy import nan
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

from nltk import WordNetLemmatizer
LEMMATIZER = WordNetLemmatizer()

In [132]:
import json

In [2]:
glove_path = 'data/glove.6B.300d.txt'
missing_verbs_path = 'data/missing_verbs.txt'
lexicon_path = 'FramesAgencyPower/agency_power.csv'

In [3]:
def load_glove(glove_file):
    """Load GloVe vectors in numpy array"""
    with open(glove_file, 'r') as f:
        model = {}
        for line in f:
            split_line = line.split()
            word = split_line[0]
            embedding = np.array([float(val) for val in split_line[1:]])
            model[word] = embedding
    return model

In [4]:
# find k examples from lexicon which are similar to given word (to be used as demonstrations to GPT);
# will use cosine similarity between glove embeddings
def find_k_similar_words(word: str, lexicon: dict, glove_embeddings: dict, k=5):
    if word in glove_embeddings:
        word_embedding = glove_embeddings[word]
        similarities = []
        for lexicon_word in lexicon:
            if lexicon_word in glove_embeddings:
                lexicon_word_embedding = glove_embeddings[lexicon_word]
                similarity = cosine_similarity([word_embedding], [lexicon_word_embedding])[0][0]
                similarities.append((lexicon_word, similarity))
        
        similarities.sort(key=lambda x: x[1], reverse=True)
        return [x[0] for x in similarities[:k]]
    
    # if word not in glove embeddings, return k random words from lexicon
    else:
        return random.choices(list(lexicon.keys()), k=k)
    

In [5]:
def get_formatted_prompt(word: str, examples: list, lexicon: dict) -> str:
    examples_str = ''
    for example in examples:
        examples_str += "{'" + example + "': " + str(lexicon[example]) + '}\n'
    
    prompt = f"""I will give you a verb, and you will assign it two labels.
The first label represents the agency that the verb gives its grammatical subject. This label can be one of the following:
    "agency_pos": the verb gives its subject agency, 
    "agency_neg": the verb takes away agency from its subject,
    "agency_equal": the verb does not affect the agency of its subject.

The second label represents whether the verb gives power to its subject, object, or both. This label is only defined for transitive verbs. This label can be one of the following:
    "power_agent": the verb gives power to its subject,
    "power_theme": the verb gives power to its object,
    "power_equal": the verb does not affect the power of its subject or object, or it affects both equally.
    "nan": the verb is intransitive, so power is not defined.

Here are some examples of verbs and their labels:
{examples_str}
Now, please label the following verb: {word}

Format your answer as a JSON object in the same format as the examples above.

Answer: """

    return prompt


Have chatgpt classify which missing 'verbs' are actually verbs (because the spacy parser isn't totally accurate)

In [85]:
with open('data/missing_verbs.txt', 'r') as f:
    missing_verbs = f.read().splitlines()

In [122]:
len(missing_verbs)

2625

In [124]:
temperature = 0.0
p = 0.0
responses = {}
i = 688
    
while i < len(missing_verbs):
    responses = {}
    
    for verb in missing_verbs[i:]:
        print(i, end=' ')
        prompt = f'Can the word "{verb}" be used as a verb? Give a one word, yes or no answer.'
        
        try:
            response = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        n=1,
                        #temperature=temperature,
                        top_p=p,
                        messages=[
                            {"role": "user", "content": prompt}]
                        )
            responses[verb] = response.choices[0].message.content
            i += 1
        except:
            print('error')
            old_responses.update(responses)
            break
    

688 689 690 691 692 error
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 error
744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 error
824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 93

In [126]:
len(responses)

25

In [129]:
missing_verbs[-25:]

['bitch',
 'eke',
 'insights',
 'channel',
 'texted',
 'wretched',
 'keeo',
 'undereat',
 'trading',
 'trek',
 'psyches',
 'debunking',
 'morose',
 'veggie',
 'helpful',
 'certify',
 'upchuck',
 'undiagnosed',
 'wnt',
 'smh',
 'suicidal',
 'thanking',
 'life',
 'seep',
 'happier']

In [127]:
responses

{'bitch': 'Yes.',
 'eke': 'Yes.',
 'insights': 'No.',
 'channel': 'Yes.',
 'texted': 'Yes.',
 'wretched': 'No.',
 'keeo': 'No.',
 'undereat': 'Yes.',
 'trading': 'Yes.',
 'trek': 'Yes.',
 'psyches': 'No.',
 'debunking': 'Yes.',
 'morose': 'No.',
 'veggie': 'No.',
 'helpful': 'No.',
 'certify': 'Yes.',
 'upchuck': 'Yes.',
 'undiagnosed': 'No.',
 'wnt': 'No.',
 'smh': 'No.',
 'suicidal': 'No.',
 'thanking': 'Yes.',
 'life': 'No.',
 'seep': 'Yes.',
 'happier': 'No.'}

In [91]:
#old_responses = responses

In [130]:
old_responses.update(responses)

In [131]:
len(old_responses)

2625

In [None]:
old_responses

In [123]:
backup = old_responses.copy()

In [133]:
with open('data/missing_verbs_responses.json', 'w') as f:
    json.dump(old_responses, f, indent=4)

In [136]:
np.unique(list(old_responses.values()), return_counts=True)

(array(['No.', 'Yes.'], dtype='<U4'), array([1193, 1432]))

In [137]:
only_verbs = [verb for verb in old_responses if old_responses[verb] == 'Yes.']

In [139]:
LEMMATIZER.lemmatize('slapped', pos='v').lower()

'slap'

In [140]:
with open('data/missing_verbs_only.txt', 'w') as f:
    f.write('\n'.join(only_verbs))
with open('data/missing_verbs_only_lemmatized.txt', 'w') as f:
    f.write('\n'.join([LEMMATIZER.lemmatize(verb, pos='v').lower() for verb in only_verbs]))

Load the agency power lexicon

In [6]:
lexicon_df = pd.read_csv(lexicon_path)

# replace verb column with lemmas
lexicon_df['verb'] = lexicon_df['verb'].apply(lambda x: LEMMATIZER.lemmatize(x, pos='v').lower())

# conver to dict
lexicon = {}
for i, row in lexicon_df.iterrows():
    verb = row['verb']
    agency = row['agency']
    power = row['power']

    lexicon[verb] = {'agency': agency, 'power': power}

Load the verbs missing from the power agency lexicon

In [7]:
with open(missing_verbs_path, 'r') as f:
    missing_verbs = f.read().split('\n')

Load glove embeddings

In [8]:
glove_embeddings = load_glove(glove_path)

Have chatgpt label the missing verbs

In [42]:
missing_verb = missing_verbs[0]
examples = find_k_similar_words(missing_verb, lexicon, glove_embeddings, k=10)

In [43]:
examples

['anger',
 'fear',
 'disgust',
 'envy',
 'arouse',
 'concern',
 'provoke',
 'desire',
 'resent',
 'regret']

In [40]:
for example in examples:
    print( "{'" + example + "': " + str(lexicon[example]) + '}')

{'sentence': {'agency': 'agency_pos', 'power': 'power_agent'}}
{'jog': {'agency': 'agency_pos', 'power': nan}}
{'travel': {'agency': 'agency_pos', 'power': 'power_agent'}}
{'pardon': {'agency': 'agency_pos', 'power': 'power_agent'}}
{'stroll': {'agency': 'agency_equal', 'power': nan}}
{'terminate': {'agency': 'agency_pos', 'power': 'power_agent'}}
{'taxis': {'agency': 'agency_pos', 'power': nan}}
{'hike': {'agency': 'agency_equal', 'power': 'power_equal'}}
{'trip': {'agency': 'agency_pos', 'power': 'power_agent'}}
{'endure': {'agency': 'agency_neg', 'power': 'power_theme'}}


In [44]:
print(get_formatted_prompt(missing_verb, examples, lexicon))

I will give you a verb, and you will assign it two labels.
The first label represents the agency that the verb gives its grammatical subject. This label can be one of the following:
    "agency_pos": the verb gives its subject agency, 
    "agency_neg": the verb takes away agency from its subject,
    "agency_equal": the verb does not affect the agency of its subject.

The second label represents whether the verb gives power to its subject, object, or both. This label is only defined for transitive verbs. This label can be one of the following:
    "power_agent": the verb gives power to its subject,
    "power_theme": the verb gives power to its object,
    "power_equal": the verb does not affect the power of its subject or object, or it affects both equally.
    "nan": the verb is intransitive, so power is not defined.

Here are some examples of verbs and their labels:
{'anger': {'agency': 'agency_equal', 'power': 'power_equal'}}
{'fear': {'agency': 'agency_neg', 'power': 'power_theme

Test chatgpt's accuracy on subset of the lexicon

In [9]:
train, test = train_test_split(lexicon_df, test_size=0.2, random_state=42)

In [10]:
train_dict = {}
test_dict = {}

for i, row in train.iterrows():
    verb = row['verb']
    agency = row['agency']
    power = row['power']

    train_dict[verb] = {'agency': agency, 'power': power}

for i, row in test.iterrows():
    verb = row['verb']
    agency = row['agency']
    power = row['power']

    test_dict[verb] = {'agency': agency, 'power': power}

In [30]:
len(train_dict), len(test_dict)

(1718, 431)

In [11]:
gpt_prompts = {}
for verb in test_dict:
    examples = find_k_similar_words(verb, train_dict, glove_embeddings, k=10)
    prompt = get_formatted_prompt(verb, examples, train_dict)
    gpt_prompts[verb] = prompt

In [13]:
# save prompts as json
import json
with open('data/gpt_prompts.json', 'w') as f:
    json.dump(gpt_prompts, f, indent=4)

In [19]:
# get openai key
with open('openai_key.txt', 'r') as f:
    openai.api_key = f.read()

In [20]:
verb = 'rest'
prompt = gpt_prompts[verb]
response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                #n=n,
                #temperature=temperature,
                messages=[
                    {"role": "user", "content": prompt}]
                )

In [28]:
result = response['choices'][0].message.content
result = eval(result)

In [29]:
result[verb]

{'agency': 'agency_equal', 'power': nan}

In [83]:
gpt_responses = {}
i = 0
n = 5 # number of responses to generate
for verb, prompt in list(gpt_prompts.items())[268:]:
    print(i, end=' ')
    response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                n=n,
                #temperature=temperature,
                messages=[
                    {"role": "user", "content": prompt}]
                )
    gpt_responses[verb] = []
    for j in range(n):
        result = response['choices'][j].message.content
        gpt_responses[verb].append(result)
    i += 1

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 

RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID b14bc88b8db68555a8953dd350a3dcce in your message.)

In [82]:
first_gpt_responses = gpt_responses

In [81]:
list(gpt_responses.items())[267]

('enjoy',
 ["{'enjoy': {'agency': 'agency_pos', 'power': 'power_theme'}}",
  '{"enjoy": {"agency": "agency_equal", "power": "power_theme"}}',
  "{'enjoy': {'agency': 'agency_equal', 'power': 'power_theme'}}",
  "{'enjoy': {'agency': 'agency_equal', 'power': 'power_theme'}}",
  "{'enjoy': {'agency': 'agency_equal', 'power': 'power_theme'}}"])

In [73]:
len(gpt_responses)

268

In [74]:
# save gpt responses as json
with open('data/gpt_responses_multiple5_INCOMPLETE.json', 'w') as f:
    json.dump(gpt_responses, f, indent=4)

In [77]:
gpt_preds = {}
for verb, responses in gpt_responses.items():
    new_responses = []
    for res in responses:
        try: 
            new_responses.append(eval(res))
        except:
            continue
    responses = new_responses
    all_agencies = [res[verb]['agency'] for res in responses]
    all_powers = [res[verb]['power'] for res in responses]
    
    # get most common agency
    majority_agency = max(set(all_agencies), key=all_agencies.count)
    majority_power = max(set(all_powers), key=all_powers.count)
    
    final_ans = {'agency': majority_agency, 'power': majority_power}
    gpt_preds[verb] = final_ans

In [63]:
verb = 'rest'
responses = gpt_responses[verb]
responses = [eval(res) for res in responses]

all_agencies = [res[verb]['agency'] for res in responses]
all_powers = [res[verb]['power'] for res in responses]

In [66]:
all_agencies = ['agency_equal', 'agency_pos', 'agency_pos']

In [67]:
max(set(all_agencies), key=all_agencies.count)

'agency_pos'

In [62]:
responses

["{'rest': {'agency': 'agency_equal', 'power': nan}}",
 "{'rest': {'agency': 'agency_equal', 'power': 'nan'}}",
 "{'rest': {'agency': 'agency_equal', 'power': nan}}"]

In [78]:
# compute accuracy
agency_correct = 0
power_correct = 0
total = 0
for verb in gpt_preds:
    total += 1
    agency_correct += gpt_preds[verb]['agency'] == test_dict[verb]['agency']
    
    # if verb is intransitive, power is not defined; need to check separately for nan values
    if test_dict[verb]['power'] is nan and gpt_preds[verb]['power'] is nan:
        power_correct += 1
    else:
        power_correct += gpt_preds[verb]['power'] == test_dict[verb]['power']
    
    #print(verb, gpt_preds[verb], test_dict[verb])

In [79]:
print(f'Agency accuracy: {agency_correct/total} ({agency_correct}/{total})')
print(f'Power accuracy: {power_correct/total} ({power_correct}/{total})')

Agency accuracy: 0.8134328358208955 (218/268)
Power accuracy: 0.4253731343283582 (114/268)
