# Preprocessing

In [None]:
import pandas as pd
import os
import openai
openai.api_key = openai.api_key_path="GPT3.txt"

In [35]:
# read csv dataset
df = pd.read_csv('crawled_metaphors_cleaned.csv', sep=";", index_col=0)
df

Unnamed: 0,Source Domain,Target Domain,Title,Metaphor,Example
0,"object, mover",force,A Force Is A Moving Object,The Object Has A Source,Even small magnets are sources of magnetism th...
1,"object, mover",force,A Force Is A Moving Object,The Object Has A Goal And Direction,The attractive force of the females' pheromone...
2,"object, mover",force,A Force Is A Moving Object,The Object May Have An Intention,Greed is the strongest evil force at work in t...
3,"object, mover",force,A Force Is A Moving Object,The Object Can Hit An Affected Party,It takes a long time for force applied at one ...
4,"object, mover",force,A Force Is A Moving Object,The Object Can Move An Affected Party In Its O...,The magnetic force pulled at the horseshoe.
...,...,...,...,...,...
1706,,,Words Are Weapons,Words Are Weapons,She used some sharp words.
1707,,,Words Are Weapons,Words Are Weapons,That was pretty cutting language.
1708,,,Words Are Weapons,Words Are Weapons,It was a barrage of insults.
1709,,,Words Are Weapons,Words Are Weapons,He was bombarded by insults.


In [43]:
# remove lines with NaNs
df = df.dropna(axis=0, how='any')
# keep 1 row per Title
df = df.drop_duplicates(subset='Title', keep='first')
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Source Domain,Target Domain,Title,Metaphor,Example
0,"object, mover",force,A Force Is A Moving Object,The Object Has A Source,Even small magnets are sources of magnetism th...
11,body of water,problem,A Problem Is A Body Of Water,Investigating Problem Is Exploring Water,He dived right into the problem.
19,container,"problem, solving",A Problem Is A Locked Container For Its Solution,The Solution Is Contained In The Problem,We have to look deeply into this problem for i...
27,location,problem,A Problem Is A Region In A Landscape,Investigating A Problem Is Exploring A Landscape,We've got to explore this problem.
54,"moving object, mover",schedule,A Schedule Is A Moving Object,A Schedule Is A Moving Object,He was behind (the) schedule.
...,...,...,...,...,...
1676,pursuer,time,Time Is A Pursuer,Time Is A Pursuer,Time will catch up with him.
1677,"resource, commodity",time,Time Is A Resource,Time Is A Resource,We're almost out of time.
1680,money,time,Time Is Money,Time Is Money,She spends her time unwisely.
1683,"something moving, mover",time,Time Is Something Moving Toward You,Time Is Something Moving Toward You,When Tuesday comes. . .


In [143]:
df=pd.read_csv("1ExamplePerMetaphor_cleaned.csv", sep=";", index_col=0)
# shuffle df
df = df.sample(frac=1).reset_index(drop=True)
df

Unnamed: 0,Source Domain,Target Domain,Title,Metaphor,Example
0,money,time,Time Is Money,Time Is Money,She spends her time unwisely.
1,"object, mover",force,A Force Is A Moving Object,The Object Has A Source,Even small magnets are sources of magnetism th...
2,batteries,people,People Are Batteries,People Are Batteries,I'm all charged up and full of energy
3,"direction, motion, movement",change,Change Of State Is Change Of Direction,Change Of State Is Change Of Direction,He turned all funny.
4,substance,laughter,Laughter Is A Substance,Potential Laughter Is A Substance Inside The P...,You can't get a single joke out of him.
...,...,...,...,...,...
107,part-whole,love,Love Is A Unity (of Two Complementary Parts),EMOTIONAL INTIMACY IS PHYSICAL CLOSENESS,We were made for each other
108,"motion, change of location",change of state,Change Is Motion (location),Change Is Motion (location),He went from innocent to worldly.
109,line,light,Light Is A Line,Light Is A Line,Sunbeams
110,person,machine,Machines Are People,Calculation Is Thinking,"""ROM looks for purged menus."""


# Prompt Creation

In [144]:
def create_source_prompt(row):
    prompt="Extract the conceptual metaphor from the following sentence:\n"
    prompt+="Sentence: "+row['Example']+"\n"
    prompt+="Target Domain: "+str(row['Target Domain'])+"\n"
    prompt+="Source Domain:"
    completion=str(row['Source Domain'])
    return prompt, completion

def create_target_prompt(row):
    prompt = "Extract the conceptual metaphor from the following sentence:\n"
    prompt+="Sentence: "+row['Example']+"\n"
    prompt+="Source Domain: "+str(row['Source Domain'])+"\n"
    prompt+="Target Domain:"
    completion = str(row['Target Domain'])
    return prompt, completion

def create_full_prompt(row):
    prompt = "Extract the conceptual metaphor from the following sentence:\n"
    prompt+="Sentence: "+row['Example']+"\n"
    completion = "Target Domain: "+str(row['Target Domain'])+"\n"
    completion+="Source Domain: "+str(row['Source Domain'])
    return prompt, completion

In [145]:
# create source prompts
list_of_source_prompts = []
list_of_source_completions = []
for i, row in df.iterrows():
    list_of_source_prompts.append(create_source_prompt(row)[0])
    list_of_source_completions.append(create_source_prompt(row)[1])
print("Prompt:", list_of_source_prompts[2])
print("Completion:", list_of_source_completions[2])

Prompt: Extract the conceptual metaphor from the following sentence:
Sentence: I'm all charged up and full of energy
Target Domain: people
Source Domain:
Completion: batteries


In [146]:
# create full prompts
list_of_full_prompts = []
list_of_full_completions = []
for i, row in df.iterrows():
    list_of_full_prompts.append(create_full_prompt(row)[0])
    list_of_full_completions.append(create_full_prompt(row)[1])
print("Prompt:", list_of_full_prompts[2])
print("Completion:", list_of_full_completions[2])

Prompt: Extract the conceptual metaphor from the following sentence:
Sentence: I'm all charged up and full of energy

Completion: Target Domain: people
Source Domain: batteries


In [150]:
def create_long_prompt(list_of_prompts, list_of_completions, start_index, end_index, query_index):
    '''
    Creates a prompt with a lot of examples for a single completion
    start_index: index of the first full example added to prompt
    end_index: index of the last example added to prompt
    query_index: index of the example that needs to be completed
    '''
    prompt=""
    for j in range(start_index, end_index): # add training prompts
        prompt+=list_of_prompts[j]+" "+list_of_completions[j]+"\n"
    prompt+=list_of_prompts[i]  # add final prompt
    return prompt, list_of_completions[i]
    

In [151]:
# create finetuning prompts 
prompts = []
completions = []
trainings_units=4
for i in range(trainings_units, len(list_of_source_prompts)):
    # create a prompt based on sample 1 to 5 and an additional sample i 
    p, c = create_long_prompt(list_of_source_prompts, list_of_source_completions, 0, trainings_units, i)
    prompts.append(p)
    completions.append(c)

In [154]:
print(prompts[0])
print(completions[0])

Extract the conceptual metaphor from the following sentence:
Sentence: She spends her time unwisely.
Target Domain: time
Source Domain: money
Extract the conceptual metaphor from the following sentence:
Sentence: Even small magnets are sources of magnetism that can erase credit cards.
Target Domain: force
Source Domain: object, mover
Extract the conceptual metaphor from the following sentence:
Sentence: I'm all charged up and full of energy
Target Domain: people
Source Domain: batteries
Extract the conceptual metaphor from the following sentence:
Sentence: He turned all funny.
Target Domain: change
Source Domain: direction, motion, movement
Extract the conceptual metaphor from the following sentence:
Sentence: You can't get a single joke out of him.
Target Domain: laughter
Source Domain:
substance


In [155]:
GPT3_completions=[]
for p in prompts[:25]:
    c = openai.Completion.create(
            model="text-davinci-002",
            prompt=p,
            max_tokens=14,
            temperature=0
        )
    GPT3_completions.append(c)

In [158]:
df_results = df.copy()
# append GPT3_completions to df_results [trainings_units:len(GPT3_completions)]
for i in range(len(GPT3_completions)):
    df_results.loc[i+trainings_units, 'GPT3 Source Completion'] = GPT3_completions[i].choices[0].text
df_results[trainings_units:len(GPT3_completions)][["Target Domain", "Source Domain","GPT3 Source Completion", "Example"]]

Unnamed: 0,Target Domain,Source Domain,GPT3 Source Completion,Example
4,laughter,substance,water,You can't get a single joke out of him.
5,means of change,"motion, path",journey,He went from fat to thin through an intensive ...
6,problem,"constructed object, manufactured object","object, thing",Let's look at the make-up of this problem.
7,anger,heat,heat,She's a real hothead.
8,intoxication,burden,"weight, heaviness",She was loaded.
9,problem,location,"land, territory",We've got to explore this problem.
10,communication,speech,movement,Body language
11,problem,container,object,We have to look deeply into this problem for i...
12,moral deeds,"debt, accounting, balance",physical work,He worked hard on that account: they owe him a...
13,harm,"possessions, transfer","bad luck, misfortune",I have the worst luck


# Model Application 

In [109]:
from datasets import load_dataset
text_dataset = load_dataset("SocialGrep/the-reddit-covid-dataset", "posts")

Downloading and preparing dataset the-reddit-covid-dataset/posts to /Users/lennartwachowiak/.cache/huggingface/datasets/SocialGrep___the-reddit-covid-dataset/posts/1.0.0/35698a78e6ebe9f3da4d0d354139c89d8097b4498816cab987639ad00dbe4a92...


Downloading data: 100%|██████████| 580M/580M [01:01<00:00, 9.36MB/s]
Downloading data files: 100%|██████████| 1/1 [01:02<00:00, 62.77s/it]
Extracting data files: 100%|██████████| 1/1 [00:08<00:00,  8.89s/it]
                                                                     

Dataset the-reddit-covid-dataset downloaded and prepared to /Users/lennartwachowiak/.cache/huggingface/datasets/SocialGrep___the-reddit-covid-dataset/posts/1.0.0/35698a78e6ebe9f3da4d0d354139c89d8097b4498816cab987639ad00dbe4a92. Subsequent calls will reuse this data.


100%|██████████| 1/1 [00:00<00:00,  6.18it/s]


In [130]:
sentences_to_analyse=[]
for sentence in text_dataset["train"]["title"][0:10000]:
    if "restriction" in sentence:
        sentences_to_analyse.append(sentence)
        #print(sentence)
    

In [121]:
# remove duplicates from sentences_to_analyse
sentences_to_analyse = list(set(sentences_to_analyse))
print(len(sentences_to_analyse))

128


In [159]:
def create_long_prompt_new_text(list_of_prompts, list_of_completions, start_index, end_index, query_text, query_target):
    prompt=""
    for j in range(start_index, end_index):
        prompt+=list_of_prompts[j]+list_of_completions[j]+"\n"
    prompt+="Extract the conceptual metaphor from the following sentence:\nSentence: "
    prompt+=query_text
    prompt+="\nTarget Domain: "+query_target
    prompt+="\nSource Domain:"
    return prompt

In [160]:
analysis_prompts = []
for s in sentences_to_analyse:
    analysis_prompts.append(create_long_prompt_new_text(list_of_source_prompts, list_of_source_completions, 0, trainings_units, s, "restriction"))

In [161]:
print(analysis_prompts[0])

Extract the conceptual metaphor from the following sentence:
Sentence: She spends her time unwisely.
Target Domain: time
Source Domain:money
Extract the conceptual metaphor from the following sentence:
Sentence: Even small magnets are sources of magnetism that can erase credit cards.
Target Domain: force
Source Domain:object, mover
Extract the conceptual metaphor from the following sentence:
Sentence: I'm all charged up and full of energy
Target Domain: people
Source Domain:batteries
Extract the conceptual metaphor from the following sentence:
Sentence: He turned all funny.
Target Domain: change
Source Domain:direction, motion, movement
Extract the conceptual metaphor from the following sentence:
Sentence: Daughter broke Covid restrictions against the rules of the house
Target Domain: restriction
Source Domain:


In [162]:
GPT3_analysis_completions=[]
for p in analysis_prompts[:10]:
    c = openai.Completion.create(
            model="text-davinci-002",
            prompt=p,
            max_tokens=14,
            temperature=0
        )
    GPT3_analysis_completions.append(c)

In [164]:
for i in range(len(GPT3_analysis_completions)):
    print(sentences_to_analyse[i])
    print("Predicted Source Domain:", GPT3_analysis_completions[i].choices[0].text)
    print()

Daughter broke Covid restrictions against the rules of the house
Predicted Source Domain: prison

Farmers Mother of all protests still going ahead next month despite Covid-19 restrictions
Predicted Source Domain: containment, enclosure

Illinois trying to legislate Covid restrictions... ?
Predicted Source Domain:  law

@Reuters: Biden issuing new order lifting COVID-19 travel restrictions, imposing vaccine rules https://t.co/pZogUEwyLV https://t.co/EoRYNpqFlr
Predicted Source Domain:  travel

COVID restrictions in Prague?
Predicted Source Domain: prison

Parents, has covid restrictions added more or less pressure on you to be a Super Parent?
Predicted Source Domain: weight, pressure

Advice for friend circumventing COVID restrictions
Predicted Source Domain: obstacle, blockage

[World] - Slovakia extends COVID-19 restrictions amid infection surge | Toronto Star
Predicted Source Domain: containment

[World] - Slovakia extends COVID-19 restrictions amid infection surge
Predicted Source D

TODOS:

--> do this for 5 different terms such as restriction and create nice charts showing the most common source domains 
- use LCC paper

- make the model predict nothing  if the term is not used metaphorically --> take sentences from VUA dataset that are tagged as all non-metaphoric

- compare effectivness of different prompt designs
- compare with Bloom model 
- add languages: german + portuguese + ? --> how to get them? Metanet has russian and spanish
- add examples that have 2 metaphors ---> see if it confuses the model. Give nthe target domain, does it say the correct of the 2 possible source domains?
- can i publish on arxiv first and then somewhere else? all the conferences seem rly far away right now?
- manuelle evaluation regel: wenn etwas preziser ist, ist es ok; wenn es breiter ist (objekt anstatt container) ist es falsch. 
- get explanation, i.e., "which word made you think of this source domain?"

In [137]:
df

Unnamed: 0,Source Domain,Target Domain,Title,Metaphor,Example
0,"object, mover",force,A Force Is A Moving Object,The Object Has A Source,Even small magnets are sources of magnetism th...
1,body of water,problem,A Problem Is A Body Of Water,Investigating Problem Is Exploring Water,He dived right into the problem.
2,container,"problem, solving",A Problem Is A Locked Container For Its Solution,The Solution Is Contained In The Problem,We have to look deeply into this problem for i...
3,location,problem,A Problem Is A Region In A Landscape,Investigating A Problem Is Exploring A Landscape,We've got to explore this problem.
4,"moving object, mover",schedule,A Schedule Is A Moving Object,A Schedule Is A Moving Object,He was behind (the) schedule.
...,...,...,...,...,...
107,pursuer,time,Time Is A Pursuer,Time Is A Pursuer,Time will catch up with him.
108,"resource, commodity",time,Time Is A Resource,Time Is A Resource,We're almost out of time.
109,money,time,Time Is Money,Time Is Money,She spends her time unwisely.
110,"something moving, mover",time,Time Is Something Moving Toward You,Time Is Something Moving Toward You,When Tuesday comes. . .


In [138]:
# save df to csv
df.to_csv("1ExamplePerMetaphor.csv")