# Preprocessing

In [182]:
import pandas as pd
import os
import openai
import math
import numpy as np
from gensim import matutils
import gensim.downloader
import time
openai.api_key = openai.api_key_path="GPT3.txt"

In [None]:
word_vectors = gensim.downloader.load('glove-wiki-gigaword-300')
word_vectors.similarity('table', 'spain')

In [64]:
# read csv dataset
df = pd.read_csv('Data/crawled_metaphors_selected_three_examples.csv', sep=";", index_col=0)
df_non_metaphoric = pd.read_csv('Data/non_metaphoric_examples.csv', sep=";")
df

Unnamed: 0,Source Domain,Target Domain,Title,Metaphor,Example
2,moving object,force,A Force Is A Moving Object,The Object May Have An Intention,Greed is the strongest evil force at work in t...
7,moving object,force,A Force Is A Moving Object,The Object Can Hold/control The Affected Party,The force of his hypnotic words had me in its ...
8,moving object,force,A Force Is A Moving Object,The Strength Of The Object Determines Its Abil...,The force of gravity on the moon is much weaker.
11,body of water,problem,A Problem Is A Body Of Water,Investigating Problem Is Exploring Water,He dived right into the problem.
14,body of water,problem,A Problem Is A Body Of Water,Difficulty In Solving Is Difficulty In Explori...,The murky waters of the investigation frustrat...
...,...,...,...,...,...
1704,well-being,wealth,Well-being Is Wealth,Well-being Is Wealth,It is a poor man who has no love of himself.
1705,well-being,wealth,Well-being Is Wealth,Well-being Is Wealth,The cynics of this world lead impoverished lives.
1706,weapons,words,Words Are Weapons,Words Are Weapons,She used some sharp words.
1707,weapons,words,Words Are Weapons,Words Are Weapons,That was pretty cutting language.


In [65]:
# how many different unique combinations of source and target
combinations = df.groupby(['Source Domain', 'Target Domain']).size().reset_index(name='counts').sort_values(by='counts', ascending=False)
combinations

Unnamed: 0,Source Domain,Target Domain,counts
38,destruction,intoxication,4
129,size,amount,4
108,plants,beliefs,4
53,goal,success,4
6,areas,subjects,4
...,...,...,...
97,object,ideas,2
65,insanity,strong emotion,2
14,burden,intoxication,2
143,up,rational,2


In [66]:
# split combinations randomly into train of size 40 and test set
train = combinations.sample(n=40, random_state=1)
test = combinations.drop(train.index)
print(len(train), len(test))
test

40 113


Unnamed: 0,Source Domain,Target Domain,counts
38,destruction,intoxication,4
129,size,amount,4
108,plants,beliefs,4
53,goal,success,4
6,areas,subjects,4
...,...,...,...
97,object,ideas,2
65,insanity,strong emotion,2
14,burden,intoxication,2
143,up,rational,2


In [67]:
# create train, val, and test tests 
train_df = pd.DataFrame(columns=["Source Domain", "Target Domain", "Title", "Metaphor", "Example"])
for i, row in train.iterrows():
    samples = df[(df["Source Domain"]==row["Source Domain"]) & (df["Target Domain"]==row["Target Domain"])] 
    train_df = train_df.append(samples.sample(n=min(3, len(samples)), random_state=1))

test_df = pd.DataFrame(columns=["Source Domain", "Target Domain", "Title", "Metaphor", "Example"])
valid_df = pd.DataFrame(columns=["Source Domain", "Target Domain", "Title", "Metaphor", "Example"])
for i, row in test.iterrows():
    samples = df[(df["Source Domain"]==row["Source Domain"]) & (df["Target Domain"]==row["Target Domain"])] 
    # split into samples for test and val; 1 sample for val, 2 for test
    test_samples = samples.sample(n=min(2, len(samples)), random_state=1)
    test_df = test_df.append(test_samples)
    valid_df = valid_df.append(samples.drop(test_samples.index))
valid_df


Unnamed: 0,Source Domain,Target Domain,Title,Metaphor,Example
952,destruction,intoxication,Intoxication Is Getting Destroyed,Intoxication Is Getting Destroyed,He is stoned.
953,destruction,intoxication,Intoxication Is Getting Destroyed,Intoxication Is Getting Destroyed,He is bombed.
1340,size,amount,Properties Are Physical Properties,Amount Is Size,Which of the two classes is bigger?
1341,size,amount,Properties Are Physical Properties,Amount Is Size,He only has a little money.
228,plants,beliefs,Beliefs Are Beings With A Life Cycle,Development Of A Belief Is Growth Of A Plant,This is just the seed of a belief.
...,...,...,...,...,...
783,food,ideas,Ideas Are Food,Thinking Is Preparing Food,Let me chew on that for a while.
460,force,desire,Desires Are Forces Between The Desired And The...,The Force May Be Within The Desirer,Something in me pulls me toward the wrong kind...
369,writing,communication,Communication Is Linguistic Communication,Communication Is Linguistic Communication,I read more into his actions than actually was...
1355,force,influence,Psychological Forces Are Physical Forces.,Influence Is A Force,She could bend his will.


In [68]:
train_df = train_df.append(df_non_metaphoric[:15])
valid_df = valid_df.append(df_non_metaphoric[15:30])
test_df = test_df.append(df_non_metaphoric[30:])
valid_df

Unnamed: 0,Source Domain,Target Domain,Title,Metaphor,Example
952,destruction,intoxication,Intoxication Is Getting Destroyed,Intoxication Is Getting Destroyed,He is stoned.
953,destruction,intoxication,Intoxication Is Getting Destroyed,Intoxication Is Getting Destroyed,He is bombed.
1340,size,amount,Properties Are Physical Properties,Amount Is Size,Which of the two classes is bigger?
1341,size,amount,Properties Are Physical Properties,Amount Is Size,He only has a little money.
228,plants,beliefs,Beliefs Are Beings With A Life Cycle,Development Of A Belief Is Growth Of A Plant,This is just the seed of a belief.
...,...,...,...,...,...
25,not metaphoric,missing,No Metaphor,No Metaphor,We are trying to find our missing scientist now.
26,not metaphoric,bells,No Metaphor,No Metaphor,The doorbell sounded.
27,not metaphoric,time,No Metaphor,No Metaphor,Four hours of instruction a day for two weeks.
28,not metaphoric,listening,No Metaphor,No Metaphor,The inspector was not listening.


In [91]:
print("Lenght of train: ", len(train_df))
print("Lenght of valid: ", len(valid_df))
print("Lenght of test: ", len(test_df))

Lenght of train:  132
Lenght of valid:  120
Lenght of test:  244


# Prompt Creation

In [73]:
def create_source_prompt(row):
    prompt="Extract the conceptual metaphor from the following sentence:\n"
    prompt+="Sentence: "+row['Example']+"\n"
    prompt+="Target Domain: "+str(row['Target Domain'])+"\n"
    prompt+="Source Domain:"
    completion=str(row['Source Domain'])
    return prompt, completion

def create_target_prompt(row):
    prompt = "Extract the conceptual metaphor from the following sentence:\n"
    prompt+="Sentence: "+row['Example']+"\n"
    prompt+="Source Domain: "+str(row['Source Domain'])+"\n"
    prompt+="Target Domain:"
    completion = str(row['Target Domain'])
    return prompt, completion

def create_full_prompt(row):
    prompt = "Extract the conceptual metaphor from the following sentence:\n"
    prompt+="Sentence: "+row['Example']+"\n"
    completion = "Target Domain: "+str(row['Target Domain'])+"\n"
    completion+="Source Domain: "+str(row['Source Domain'])
    return prompt, completion

In [75]:
# create source prompts
list_of_source_prompts_train = []
list_of_source_completions_train = []
list_of_source_prompts_test = []
list_of_source_completions_test = []
list_of_source_prompts_valid = []
list_of_source_completions_valid = []

for i, row in train_df.iterrows():
    list_of_source_prompts_train.append(create_source_prompt(row)[0])
    list_of_source_completions_train.append(create_source_prompt(row)[1])

for i, row in test_df.iterrows():
    list_of_source_prompts_test.append(create_source_prompt(row)[0])
    list_of_source_completions_test.append(create_source_prompt(row)[1])

for i, row in valid_df.iterrows():
    list_of_source_prompts_valid.append(create_source_prompt(row)[0])
    list_of_source_completions_valid.append(create_source_prompt(row)[1])
    
print("Prompt:", list_of_source_prompts_train[2])
print("Completion:", list_of_source_completions_train[2])

Prompt: Extract the conceptual metaphor from the following sentence:
Sentence: He recovered his hopes for a peace on earth.
Target Domain: hope
Source Domain:
Completion: possessions


In [76]:
# create full prompts
list_of_full_prompts_train = []
list_of_full_completions_train = []
list_of_full_prompts_test = []
list_of_full_completions_test = []
list_of_full_prompts_valid = []
list_of_full_completions_valid = []
for i, row in train_df.iterrows():
    list_of_full_prompts_train.append(create_full_prompt(row)[0])
    list_of_full_completions_train.append(create_full_prompt(row)[1])
for i, row in test_df.iterrows():
    list_of_full_prompts_test.append(create_full_prompt(row)[0])
    list_of_full_completions_test.append(create_full_prompt(row)[1])
for i, row in valid_df.iterrows():
    list_of_full_prompts_valid.append(create_full_prompt(row)[0])
    list_of_full_completions_valid.append(create_full_prompt(row)[1])
print("Prompt:", list_of_full_prompts_train[2])
print("Completion:", list_of_full_completions_train[2])

Prompt: Extract the conceptual metaphor from the following sentence:
Sentence: He recovered his hopes for a peace on earth.

Completion: Target Domain: hope
Source Domain: possessions


In [201]:
def create_prompts(prompts_train, completions_train, prompts_test, completions_test, train_indices, extra_def):
    prompts = []
    completions = []
    train_prompt = ""
    if extra_def:
        train_prompt = 'In cognitive linguistics, conceptual metaphor, or cognitive metaphor, refers to the understanding of one idea, or conceptual domain, in terms of another. An example of this is the understanding of quantity in terms of directionality (e.g. "the price of peace is rising") or the understanding of time in terms of money (e.g. "I spent time at work today"). This idea, and a detailed examination of the underlying processes, was first extensively explored by George Lakoff and Mark Johnson in their work Metaphors We Live By in 1980.\n'
    for i in train_indices:
        train_prompt+=prompts_train[i]+" "+completions_train[i]+"\n"
    for i in range(len(prompts_test)):
        prompts.append(train_prompt+prompts_test[i])
        completions.append(completions_test[i])
    return prompts, completions
    

In [203]:
train_indices=[0,15,30,-1]
prompts, completions = create_prompts(prompts_train = list_of_source_prompts_train, completions_train = list_of_source_completions_train, 
                prompts_test = list_of_source_prompts_valid, completions_test = list_of_source_completions_valid, train_indices=train_indices, extra_def=True)
print(len(prompts))

120


In [204]:
print(prompts[5])
print(completions[5])

In cognitive linguistics, conceptual metaphor, or cognitive metaphor, refers to the understanding of one idea, or conceptual domain, in terms of another. An example of this is the understanding of quantity in terms of directionality (e.g. "the price of peace is rising") or the understanding of time in terms of money (e.g. "I spent time at work today"). This idea, and a detailed examination of the underlying processes, was first extensively explored by George Lakoff and Mark Johnson in their work Metaphors We Live By in 1980.
Extract the conceptual metaphor from the following sentence:
Sentence: I've lost all hope of a solution.
Target Domain: hope
Source Domain: possessions
Extract the conceptual metaphor from the following sentence:
Sentence: Time heals all wounds.
Target Domain: time
Source Domain: changer
Extract the conceptual metaphor from the following sentence:
Sentence: He's really high.
Target Domain: euphoria
Source Domain: up
Extract the conceptual metaphor from the followin

In [100]:
GPT3_completions=[]
for p in prompts[:5]:
    c = openai.Completion.create(
            model="text-davinci-002",
            prompt=p,
            max_tokens=14,
            temperature=0
        )
    GPT3_completions.append(c)

In [104]:
df_results = valid_df.copy().reset_index()
# append GPT3_completions to df_results [trainings_units:len(GPT3_completions)]
for i in range(len(GPT3_completions)):
    df_results.loc[i, 'GPT3 Source Completion'] = GPT3_completions[i].choices[0].text
df_results[:len(GPT3_completions)][["Target Domain", "Source Domain","GPT3 Source Completion", "Example"]]

Unnamed: 0,Target Domain,Source Domain,GPT3 Source Completion,Example
0,intoxication,destruction,stone,He is stoned.
1,intoxication,destruction,explosives,He is bombed.
2,amount,size,size,Which of the two classes is bigger?
3,amount,size,size,He only has a little money.
4,beliefs,plants,plants,This is just the seed of a belief.
5,beliefs,plants,plants,This belief has taken root in my mind.
6,success,goal,travel,She finally reached the end of the dissertatio...
7,success,goal,light,"After working on the application for days, I f..."
8,subjects,areas,physical locations,He is in a difficult field.
9,subjects,areas,land,The scientists have covered a lot of ground in...


# Prompt Tuning

In [208]:
def get_GPT3_completions(train_indices, test, temperature, model, result_samples, extra_def):
    '''
    train_indices: indices of the examples to be used for training
    test: True if evaluation is on test set, False if evaluation is on validation set
    temperature: temperature of the GPT3 model
    model: GPT3 model to be used for evaluation
    result_samples: number of samples to be evaluated
    extra_def: True if extra definition is to be added to the beginning of the prompt
    '''

    if test:
        prompts_test = list_of_source_prompts_test
        completions_test = list_of_source_completions_test
    else:
        prompts_test = list_of_source_prompts_valid
        completions_test = list_of_source_completions_valid

    prompts, completions = create_prompts(prompts_train = list_of_source_prompts_train, completions_train = list_of_source_completions_train, 
                    prompts_test = prompts_test, completions_test = completions_test, train_indices=train_indices, extra_def=extra_def)
    GPT3_completions=[]
    for p in prompts[:result_samples]:
        c = openai.Completion.create(
                model=model,
                prompt=p,
                max_tokens=14,
                temperature=temperature
            )
        GPT3_completions.append(c)
        # wait for 1.5 second to avoid rate limit
        time.sleep(1.5)

    if test:
        df_results = test_df.copy().reset_index()
    else:
        df_results = valid_df.copy().reset_index()
    # append GPT3_completions to df_results [trainings_units:len(GPT3_completions)]
    for i in range(len(GPT3_completions)):
        df_results.loc[i, 'GPT3 Completion'] = GPT3_completions[i].choices[0].text
    return df_results[:len(GPT3_completions)][["Target Domain", "Source Domain","GPT3 Completion", "Example"]]

In [263]:
# compute the average similarity between the GPT3 completion and the source domain
def compute_similarity(df_results, task_type):
    '''
    df_results: dataframe containing the GPT3 completions
    result_samples: number of samples to be evaluated
    '''
    for i in range(len(df_results)):
        if task_type == "Source":
            gold = df_results.loc[i, 'Source Domain']
        elif task_type == "Target":
            gold = df_results.loc[i, 'Target Domain']
        elif task_type == "Finetuned":
            gold = df_results.loc[i, 'completion'].strip()
            # remove "END" token
            gold = gold[:gold.rfind("END")].strip()
        predicted = df_results.loc[i, 'GPT3 Completion'].strip()
        # check if predicted or gold is multiple words
        try: 
            if len(predicted.split()) > 1:
                predicted = matutils.unitvec(word_vectors.get_mean_vector(keys=predicted.split()))
            else:
                predicted = word_vectors.get_vector(predicted, norm=True)
            if len(gold.split()) > 1:
                print(gold)
                gold = matutils.unitvec(word_vectors.get_mean_vector(keys=gold.split()))
            else:
                gold = word_vectors.get_vector(gold, norm=True)
            similarity = np.dot(predicted, gold)
        except:
            print("Error in computing similarity for example: ", predicted, gold)
            similarity = 0
        df_results.loc[i, 'similarity'] = similarity
        mean = df_results['similarity'].mean()
        std = df_results['similarity'].std()
    return mean, std, df_results

In [209]:
len(list_of_source_completions_train)

132

In [210]:
# define training parameters 
train_indices=[0,15,30,45,-1]
model = "text-davinci-002"
# model = "text-curie-001"  # smaller/faster/cheaper model
temperature = 0
result_samples = 999 # 999 for all
extra_def = True 
test=False


run_name=model[5:]+"_temp-"+str(temperature)+"_"+str(train_indices)+"_("+str(len(train_indices))+"-train-samples)"+"_def-"+str(extra_def)
if test:
    run_name=run_name+"_test"
else:
    run_name=run_name+"_valid"

# prompt 
df_results = get_GPT3_completions(train_indices, test=test, temperature=temperature, model=model, result_samples=result_samples, extra_def=extra_def)
# evaluate 
mean, std, df_results = compute_similarity(df_results, "Source")
run_name+="_score-"+str(round(mean, 2))[2:]
print("Mean similarity: ", mean)
print("Standard deviation: ", std)
df_results.to_csv("Validation Results/Source Completion/Few Shot/"+run_name+".csv")
df_results

Mean similarity:  0.46337338117009497
Standard deviation:  0.37928598321938106


Unnamed: 0,Target Domain,Source Domain,GPT3 Completion,Example,similarity
0,intoxication,destruction,up,He is stoned.,0.144491
1,intoxication,destruction,up,He is bombed.,0.144491
2,amount,size,size,Which of the two classes is bigger?,1.000000
3,amount,size,size,He only has a little money.,1.000000
4,beliefs,plants,plants,This is just the seed of a belief.,1.000000
...,...,...,...,...,...
115,missing,not metaphoric,looking,We are trying to find our missing scientist now.,0.308055
116,bells,not metaphoric,not metaphoric,The doorbell sounded.,1.000000
117,time,not metaphoric,money,Four hours of instruction a day for two weeks.,0.275181
118,listening,not metaphoric,not metaphoric,The inspector was not listening.,1.000000


# Finetuning

In [243]:
def create_finetuning_dataset(raw_prompts, raw_completions, name):
    prompts = []
    completions = []
    for i in range(len(raw_prompts)):
        prompts.append(raw_prompts[i][71:])
        completions.append(" "+raw_completions[i]+" END")
    df = pd.DataFrame({'prompt': prompts, 'completion': completions})
    df.to_csv(name, index=False)

create_finetuning_dataset(list_of_source_prompts_train, list_of_source_completions_train, "Finetuning/source-finetuning-train.csv")
create_finetuning_dataset(list_of_source_prompts_test, list_of_source_completions_test, "Finetuning/source-finetuning-test.csv")
create_finetuning_dataset(list_of_source_prompts_valid, list_of_source_completions_valid, "Finetuning/source-finetuning-valid.csv")

In [244]:
def get_GPT3_completions_finetune(test, temperature, model, result_samples):
    if test:
        data=pd.read_csv("Finetuning/source-finetuning-test.csv")
    else:
        data=pd.read_csv("Finetuning/source-finetuning-valid.csv")
    prompts = data['prompt']
    completions = data['completion']

    GPT3_completions=[]
    for p in prompts[:result_samples]:
        c = openai.Completion.create(
                model=model,
                prompt=p,
                max_tokens=14,
                temperature=temperature,
                stop=[" END"]

            )
        GPT3_completions.append(c)
        # wait for 1.5 second to avoid rate limit
        time.sleep(1.5)

    df_results = data.copy().reset_index()

    # append GPT3_completions to df_results [trainings_units:len(GPT3_completions)]
    for i in range(len(GPT3_completions)):
        gpt3_completion = GPT3_completions[i].choices[0].text
        # strip everything after the first newline
        #gpt3_completion = gpt3_completion.split("\n")[0]
        df_results.loc[i, 'GPT3 Completion'] = gpt3_completion
    return df_results[:len(GPT3_completions)]

In [247]:
model = "davinci:ft-personal-2022-08-27-11-18-24"#"davinci:ft-personal-2022-08-27-10-12-16"#"davinci:ft-personal-2022-08-27-09-05-45"
temperature=0
result_samples=999
test=False

run_name="Finetuned_temp-"+str(temperature)+model
if test:
    run_name=run_name+"_test"
else:
    run_name=run_name+"_valid"

df_results = get_GPT3_completions_finetune(test, temperature, model, result_samples)

mean, std, df_results = compute_similarity(df_results, "Finetuned")
run_name+="_score-"+str(round(mean, 2))[2:]
print("Mean similarity: ", mean)
print("Standard deviation: ", std)
df_results.to_csv("Validation Results/Source Completion/Finetuned/"+run_name+".csv")
df_results

Error in computing similarity for example:  part-whole possession END
Error in computing similarity for example:  part-whole possessions END
Error in computing similarity for example:  part-whole body END
Mean similarity:  0.3406552021226768
Standard deviation:  0.3290585217908446


Unnamed: 0,index,prompt,completion,GPT3 Completion,similarity
0,0,He is stoned.\nTarget Domain: intoxication\nSo...,destruction END,injury,0.165798
1,1,He is bombed.\nTarget Domain: intoxication\nSo...,destruction END,injury,0.165798
2,2,Which of the two classes is bigger?\nTarget Do...,size END,size,1.000000
3,3,He only has a little money.\nTarget Domain: am...,size END,size,1.000000
4,4,This is just the seed of a belief.\nTarget Dom...,plants END,seed,0.301230
...,...,...,...,...,...
115,115,We are trying to find our missing scientist no...,not metaphoric END,not metaphoric,1.000000
116,116,The doorbell sounded.\nTarget Domain: bells \n...,not metaphoric END,not metaphoric,1.000000
117,117,Four hours of instruction a day for two weeks....,not metaphoric END,money,0.275181
118,118,The inspector was not listening.\nTarget Domai...,not metaphoric END,hearing,0.208229


In [264]:
mean, std, df_results = compute_similarity(df_results, "Finetuned")
run_name+="_score-"+str(round(mean, 2))[2:]
print("Mean similarity: ", mean)
print("Standard deviation: ", std)
df_results.to_csv("Validation Results/Source Completion/Finetuned/"+run_name+".csv")
df_results

musical harmony
moving object
moving object
moving object
Error in computing similarity for example:  part-whole possession
Error in computing similarity for example:  part-whole possessions
Error in computing similarity for example:  part-whole body
body of water
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
not metaphoric
Mean similarity:  0.3406552044199392
Standard deviation:  0.32905852537693336


Unnamed: 0,index,prompt,completion,GPT3 Completion,similarity
0,0,He is stoned.\nTarget Domain: intoxication\nSo...,destruction END,injury,0.165798
1,1,He is bombed.\nTarget Domain: intoxication\nSo...,destruction END,injury,0.165798
2,2,Which of the two classes is bigger?\nTarget Do...,size END,size,1.000000
3,3,He only has a little money.\nTarget Domain: am...,size END,size,1.000000
4,4,This is just the seed of a belief.\nTarget Dom...,plants END,seed,0.301230
...,...,...,...,...,...
115,115,We are trying to find our missing scientist no...,not metaphoric END,not metaphoric,1.000000
116,116,The doorbell sounded.\nTarget Domain: bells \n...,not metaphoric END,not metaphoric,1.000000
117,117,Four hours of instruction a day for two weeks....,not metaphoric END,money,0.275181
118,118,The inspector was not listening.\nTarget Domai...,not metaphoric END,hearing,0.208229


# Model Application 

In [109]:
from datasets import load_dataset
text_dataset = load_dataset("SocialGrep/the-reddit-covid-dataset", "posts")

Downloading and preparing dataset the-reddit-covid-dataset/posts to /Users/lennartwachowiak/.cache/huggingface/datasets/SocialGrep___the-reddit-covid-dataset/posts/1.0.0/35698a78e6ebe9f3da4d0d354139c89d8097b4498816cab987639ad00dbe4a92...


Downloading data: 100%|██████████| 580M/580M [01:01<00:00, 9.36MB/s]
Downloading data files: 100%|██████████| 1/1 [01:02<00:00, 62.77s/it]
Extracting data files: 100%|██████████| 1/1 [00:08<00:00,  8.89s/it]
                                                                     

Dataset the-reddit-covid-dataset downloaded and prepared to /Users/lennartwachowiak/.cache/huggingface/datasets/SocialGrep___the-reddit-covid-dataset/posts/1.0.0/35698a78e6ebe9f3da4d0d354139c89d8097b4498816cab987639ad00dbe4a92. Subsequent calls will reuse this data.


100%|██████████| 1/1 [00:00<00:00,  6.18it/s]


In [130]:
sentences_to_analyse=[]
for sentence in text_dataset["train"]["title"][0:10000]:
    if "restriction" in sentence:
        sentences_to_analyse.append(sentence)
        #print(sentence)
    

In [121]:
# remove duplicates from sentences_to_analyse
sentences_to_analyse = list(set(sentences_to_analyse))
print(len(sentences_to_analyse))

128


In [159]:
def create_long_prompt_new_text(list_of_prompts, list_of_completions, start_index, end_index, query_text, query_target):
    prompt=""
    for j in range(start_index, end_index):
        prompt+=list_of_prompts[j]+list_of_completions[j]+"\n"
    prompt+="Extract the conceptual metaphor from the following sentence:\nSentence: "
    prompt+=query_text
    prompt+="\nTarget Domain: "+query_target
    prompt+="\nSource Domain:"
    return prompt

In [160]:
analysis_prompts = []
for s in sentences_to_analyse:
    analysis_prompts.append(create_long_prompt_new_text(list_of_source_prompts, list_of_source_completions, 0, trainings_units, s, "restriction"))

In [161]:
print(analysis_prompts[0])

Extract the conceptual metaphor from the following sentence:
Sentence: She spends her time unwisely.
Target Domain: time
Source Domain:money
Extract the conceptual metaphor from the following sentence:
Sentence: Even small magnets are sources of magnetism that can erase credit cards.
Target Domain: force
Source Domain:object, mover
Extract the conceptual metaphor from the following sentence:
Sentence: I'm all charged up and full of energy
Target Domain: people
Source Domain:batteries
Extract the conceptual metaphor from the following sentence:
Sentence: He turned all funny.
Target Domain: change
Source Domain:direction, motion, movement
Extract the conceptual metaphor from the following sentence:
Sentence: Daughter broke Covid restrictions against the rules of the house
Target Domain: restriction
Source Domain:


In [162]:
GPT3_analysis_completions=[]
for p in analysis_prompts[:10]:
    c = openai.Completion.create(
            model="text-davinci-002",
            prompt=p,
            max_tokens=14,
            temperature=0
        )
    GPT3_analysis_completions.append(c)

In [164]:
for i in range(len(GPT3_analysis_completions)):
    print(sentences_to_analyse[i])
    print("Predicted Source Domain:", GPT3_analysis_completions[i].choices[0].text)
    print()

Daughter broke Covid restrictions against the rules of the house
Predicted Source Domain: prison

Farmers Mother of all protests still going ahead next month despite Covid-19 restrictions
Predicted Source Domain: containment, enclosure

Illinois trying to legislate Covid restrictions... ?
Predicted Source Domain:  law

@Reuters: Biden issuing new order lifting COVID-19 travel restrictions, imposing vaccine rules https://t.co/pZogUEwyLV https://t.co/EoRYNpqFlr
Predicted Source Domain:  travel

COVID restrictions in Prague?
Predicted Source Domain: prison

Parents, has covid restrictions added more or less pressure on you to be a Super Parent?
Predicted Source Domain: weight, pressure

Advice for friend circumventing COVID restrictions
Predicted Source Domain: obstacle, blockage

[World] - Slovakia extends COVID-19 restrictions amid infection surge | Toronto Star
Predicted Source Domain: containment

[World] - Slovakia extends COVID-19 restrictions amid infection surge
Predicted Source D

TODOS:

--> do this for 5 different terms such as restriction and create nice charts showing the most common source domains 
- use LCC paper

- make the model predict nothing  if the term is not used metaphorically --> take sentences from VUA dataset that are tagged as all non-metaphoric

- compare effectivness of different prompt designs
- compare with Bloom model 
- add languages: german + portuguese + ? --> how to get them? Metanet has russian and spanish
- add examples that have 2 metaphors ---> see if it confuses the model. Give nthe target domain, does it say the correct of the 2 possible source domains?
- can i publish on arxiv first and then somewhere else? all the conferences seem rly far away right now?
- manuelle evaluation regel: wenn etwas preziser ist, ist es ok; wenn es breiter ist (objekt anstatt container) ist es falsch. 
- get explanation, i.e., "which word made you think of this source domain?"

In [137]:
df

Unnamed: 0,Source Domain,Target Domain,Title,Metaphor,Example
0,"object, mover",force,A Force Is A Moving Object,The Object Has A Source,Even small magnets are sources of magnetism th...
1,body of water,problem,A Problem Is A Body Of Water,Investigating Problem Is Exploring Water,He dived right into the problem.
2,container,"problem, solving",A Problem Is A Locked Container For Its Solution,The Solution Is Contained In The Problem,We have to look deeply into this problem for i...
3,location,problem,A Problem Is A Region In A Landscape,Investigating A Problem Is Exploring A Landscape,We've got to explore this problem.
4,"moving object, mover",schedule,A Schedule Is A Moving Object,A Schedule Is A Moving Object,He was behind (the) schedule.
...,...,...,...,...,...
107,pursuer,time,Time Is A Pursuer,Time Is A Pursuer,Time will catch up with him.
108,"resource, commodity",time,Time Is A Resource,Time Is A Resource,We're almost out of time.
109,money,time,Time Is Money,Time Is Money,She spends her time unwisely.
110,"something moving, mover",time,Time Is Something Moving Toward You,Time Is Something Moving Toward You,When Tuesday comes. . .


In [138]:
# save df to csv
df.to_csv("1ExamplePerMetaphor.csv")