In [4]:
import os
import re
import pickle
import numpy as np
from dotenv import dotenv_values
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage

In [5]:
config = dotenv_values("../.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [6]:
labels_to_text = {
"time.event.locations": "time event locations",
"music.artist.album": "music artist album",
"sports.sports_team.sport": "sports team",
"baseball.baseball_team.league": "baseball team league",
"tv.tv_program.country_of_origin": "tv program origin country",
"music.album.artist": "music album artist",
"sports.sports_team.location": "sports team location",
"time.event.instance_of_recurring_event": "instance of recurring event",
"aviation.airline.hubs": "airline hubs",
"sports.sports_championship_event.champion": "sports championship event",
"sports.sports_facility.teams": "sports facility teams",
"baseball.baseball_player.position_s": "baseball player positions",
"sports.sports_league.teams-sports.sports_league_participation.team": "sports league participation team",
"tv.tv_network.programs-tv.tv_network_duration.program": "tv duration program",
"sports.sports_league_season.league": "sports season league",
"olympics.olympic_athlete.country-olympics.olympic_athlete_affiliation.country": "olympic athlete affiliation country",
"american_football.football_player.position_s": "american football player position",
"music.composer.compositions": "music composer compositions",
"meteorology.tropical_cyclone.tropical_cyclone_season": "tropical cyclone season",
"cvg.computer_videogame.developer": "cvg videogame developer",
"tv.tv_character.appeared_in_tv_program-tv.regular_tv_appearance.actor": "tv actor appearance",
"cvg.computer_videogame.publisher": "cvg videogame publisher",
"soccer.football_player.position_s": "football player position",
"tv.tv_program.original_network-tv.tv_network_duration.network": "tv duration network",
"music.composition.composer": "music composition composer",
"ice_hockey.hockey_player.hockey_position": "ice hockey player position",
"book.author.works_written": "authors written books",
"film.film.genre": "film genre",
"film.film.directed_by": "film directed by",
"film.film.produced_by": "film produced by",
"film.film.language": "film language",
"broadcast.broadcast.area_served": "broadcast area served",
"award.award_category.category_of": "award category",
"location.location.nearby_airports": "location of nearby airports",
"location.country.official_language": "country official language"
}

In [7]:
text_to_label = {
'time event locations': 'time.event.locations',
'music artist album': 'music.artist.album',
'sports team': 'sports.sports_team.sport',
'baseball team league': 'baseball.baseball_team.league',
'tv program origin country': 'tv.tv_program.country_of_origin',
'music album artist': 'music.album.artist',
'sports team location': 'sports.sports_team.location',
'instance of recurring event': 'time.event.instance_of_recurring_event',
'airline hubs': 'aviation.airline.hubs',
'sports championship event': 'sports.sports_championship_event.champion',
'sports facility teams': 'sports.sports_facility.teams',
'baseball player positions': 'baseball.baseball_player.position_s',
'sports league participation team': 'sports.sports_league.teams-sports.sports_league_participation.team',
'tv duration program': 'tv.tv_network.programs-tv.tv_network_duration.program',
'sports season league': 'sports.sports_league_season.league',
'olympic athlete affiliation country': 'olympics.olympic_athlete.country-olympics.olympic_athlete_affiliation.country',
'american football player position': 'american_football.football_player.position_s',
'music composer compositions': 'music.composer.compositions',
'tropical cyclone season': 'meteorology.tropical_cyclone.tropical_cyclone_season',
'cvg videogame developer': 'cvg.computer_videogame.developer',
'tv actor appearance': 'tv.tv_character.appeared_in_tv_program-tv.regular_tv_appearance.actor',
'cvg videogame publisher': 'cvg.computer_videogame.publisher',
'football player position': 'soccer.football_player.position_s',
'tv duration network': 'tv.tv_program.original_network-tv.tv_network_duration.network',
'music composition composer': 'music.composition.composer',
'ice hockey player position': 'ice_hockey.hockey_player.hockey_position',
'authors written books': 'book.author.works_written',
'film genre': 'film.film.genre',
'film directed by': 'film.film.directed_by',
'film produced by': 'film.film.produced_by',
'film language': 'film.film.language',
'broadcast area served': 'broadcast.broadcast.area_served',
'award category': 'award.award_category.category_of',
'location of nearby airports': 'location.location.nearby_airports',
'country official language': 'location.country.official_language'
}

## Load test (and training) set

In [8]:
with open('wiki-cpa-train-table.pkl', "rb") as f:
    train = pickle.load(f)
with open('wiki-cpa-test-table.pkl', "rb") as f:
    test = pickle.load(f)

examples = [example[1] for example in test ]
labels = [l for example in test for l in example[2]]

train_examples = [ example[1] for example in train ]
train_example_labels = []
for table in train:
    col_labels = """"""
    for i, l in enumerate(table[2]):
        col_labels += f"""Column {i+1}: {", ".join([labels_to_text[m] for m in l])}\n"""
    train_example_labels.append(col_labels.strip())

In [None]:
labels_joined = ", ".join([labels_to_text[l] for l in labels_to_text])
labels_joined

In [14]:
model_name = 'gpt-3.5-turbo-1106'
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

## Choose setup: zero-shot, one-shot or five-shot

CPA TABLE

ZERO SHOT

In [13]:
#ROLE
nr="zero"
prompt_name = "r"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [29]:
#ROLE
nr="zero"
prompt_name = "r2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)

In [42]:
#ROLE
nr="zero"
prompt_name = "r3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to identify the correlation of each column with the initial column in the table and label these relations with one or more of the following labels that are separated with a comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE
nr="zero"
prompt_name = "r4"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Data Scientist, the best in your field and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)

In [69]:
#ROLE
nr="zero"
prompt_name = "r5"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns (do not classify Column 1) of a given table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [10]:
#ROLE + INSTRUCTIONS
nr="zero"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label or more if needed, that best represents the relationship between that column and the first column of the table. 4.Answer with your selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [95]:
#ROLE + INSTRUCTIONS 2
nr="zero"
prompt_name = "r+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [112]:
#ROLE + INSTRUCTIONS + Step by Step
nr="zero"
prompt_name = "r+i2+s_b_s"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [125]:
#ROLE + INSTRUCTIONS(step by step)
nr="zero"
prompt_name = "r+i2(s_b_s)"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [140]:
#ROLE + Step by Step + INSTRUCTIONS 
nr="zero"
prompt_name = "r+s_b_s+i2"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [154]:
#ROLE + INSTRUCTIONS + MOTIVATION
nr="zero"
prompt_name = "r+i2+m"

preds = []
for example in examples:
   messages = []
    
   messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
   messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
   messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
   messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
   res = chat(messages)
   preds.append(res.content)
    

In [166]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
nr="zero"
prompt_name = "r+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
   
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [181]:
#ROLE + INSTRUCTIONS + CONTEXT
nr="zero"
prompt_name = "r+i+c"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [192]:
#ROLE + INSTRUCTIONS + CONTEXT 2
nr="zero"
prompt_name = "r+i+c1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the relation of each column with the initial column in the table."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [204]:
#ROLE + INSTRUCTIONS + CONTEXT 3 (removing the word "CONTEXT")
nr="zero"
prompt_name = "r+i+c2"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set.")) 
    messages.append(SystemMessage(content=f"Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [217]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION + CONTEXT
nr="zero"
prompt_name = "r+i2+s_b_s+m+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [225]:
#ROLE + INSTRUCTIONS + Step by Step + CONTEXT
nr="zero"
prompt_name = "r+i2+s_b_s+c"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE + INSTRUCTIONS + MOTIVATION + CONTEXT
nr="zero"
prompt_name = "r+i2+m+c"

preds = []
for example in examples:
   messages = []
    
   messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
   messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
   messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
   messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))
 

   messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
   res = chat(messages)
   preds.append(res.content)
    

In [246]:
#ROLE + INSTRUCTIONS + CONTEXT + EXAMPLE
nr="zero"
prompt_name = "r+i+m+c+example"


preds = []
for example in examples:
    messages = []
   

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."
                                  "Here is an example how you could solve a CPA task: 'Classify the relationship between these two columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni.'"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: animal name, pet name"))
    

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [95]:
# Generating tables to use for "Generated Knowledge Prompt"  
import openai
import random

generated_examples = []
generated_examples_labels = []

for label in labels:

    for table_number in range(1, 6):
        messages = []
        
        prompt = f"Generate 1 table with 4 random columns and 5 rows. Include one column about {label} and one of the other columns to be related to the {label}. Please return only the values, no need to explain."

        messages.append({"role": "system", "content": prompt})
       
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo-0301',
            messages=messages
        )
        
        generated_content = response['choices'][0]['message']['content'].strip()
        generated_examples.append(generated_content)
        generated_examples_labels.append(label)


In [96]:
""" file_name=f'GKP-Input/Generated-Tables.pkl'
f = open(file_name,'wb')
pickle.dump(generated_examples, f)
f.close() """

In [97]:
""" file_name=f'GKP-Input/Generated-Table-Labels.pkl'
f = open(file_name,'wb')
pickle.dump(generated_examples_labels, f)
f.close() """

In [None]:
with open(f'GKP-Input/Generated-Tables.pkl', "rb") as f:
    tables = pickle.load(f)
tables   

In [None]:
with open(f'GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    labels = pickle.load(f)
labels    

ONE SHOT CPA TABLE

In [261]:
#ROLE + INSTRUCTIONS 
import random

nr="one"
prompt_name = "r+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
  
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [275]:
#ROLE + INSTRUCTIONS + Step by Step
import random

nr="one"
prompt_name = "r+i2+s_b_s"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [286]:
#ROLE + INSTRUCTIONS + MOTIVATION
import random 

nr="one"
prompt_name = "r+i2+m"

preds = []
for example in examples:
   messages = []
    
   messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
   messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
   messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    

   index = random.randint(0, len(train_examples)-1)
   messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
   messages.append(AIMessage(content=f"{train_example_labels[index]}"))
   
   messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
   res = chat(messages)
   preds.append(res.content)
    

In [302]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
import random

nr="one"
prompt_name = "r+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [317]:
#ROLE + INSTRUCTIONS + CONTEXT
import random

nr="one"
prompt_name = "r+i+c"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [15]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION + CONTEXT
import random

nr="one"
prompt_name = "r+i2+s_b_s+m+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
     
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [345]:
#ROLE + INSTRUCTIONS + Step by Step + CONTEXT
import random

nr="one"
prompt_name = "r+i2+s_b_s+c"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [359]:
#ROLE + INSTRUCTIONS + CONTEXT + EXAMPLE
import random 

nr="one"
prompt_name = "r+i+m+c+example"


preds = []
for example in examples:
    messages = []
   

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."
                                  "Here is an example how you could solve a CPA task: 'Classify the relationship between these two columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni.'"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: animal name, pet name"))
    
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [375]:
with open('GKP-Input/Generated-Tables.pkl', "rb") as f:
    train_tables = pickle.load(f)
with open('GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    train_labels = pickle.load(f)

In [378]:
#Role + instructions + motivation + Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r+i2+m+GKP"

preds = []
for example in examples:
    messages = []
        
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
        
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes the label {train_labels[index_gen]}:\n{train_tables[index_gen]}"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))

    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [396]:
#Role + instructions + steb by step + Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r+i2+s_b_s+GKP"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes the label {train_labels[index_gen]}:\n{train_tables[index_gen]}"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))

    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

FIVE SHOT CPA TABLE

In [416]:
#ROLE + INSTRUCTIONS 
import random

nr="five"
prompt_name = "r+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [430]:
#ROLE + INSTRUCTIONS + Step by Step
import random

nr="five"
prompt_name = "r+i2+s_b_s"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [444]:
#ROLE + INSTRUCTIONS + MOTIVATION
import random 

nr="five"
prompt_name = "r+i2+m"

preds = []
for example in examples:
  messages = []
    
  messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
  messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
  messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

  for i in range(0,5):
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
  messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
  res = chat(messages)
  preds.append(res.content)

In [457]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
import random

nr="five"
prompt_name = "r+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
   
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [469]:
#ROLE + INSTRUCTIONS + CONTEXT
import random

nr="five"
prompt_name = "r+i+c"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [486]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION + CONTEXT
import random

nr="five"
prompt_name = "r+i2+s_b_s+m+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#ROLE + INSTRUCTIONS + Step by Step + CONTEXT
import random

nr="five"
prompt_name = "r+i2+s_b_s+c"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [497]:
#ROLE + INSTRUCTIONS + step by step + motivation + CONTEXT + EXAMPLE
import random 

nr="five"
prompt_name = "r+i2+s_b_s+m+c+example"


preds = []
for example in examples:
    messages = []
   

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."
                                  "Here is an example how you could solve a CPA task: 'Classify the relationship between these two columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni.'"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: animal name, pet name"))
    
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [508]:
with open('GKP-Input/Generated-Tables.pkl', "rb") as f:
    train_tables = pickle.load(f)
with open('GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    train_labels = pickle.load(f)

In [None]:
#Role + instructions + motivation + Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r+i2+m+GKP"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
        
    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes the label {train_labels[index_gen]}:\n{train_tables[index_gen]}"))
      
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#Role + instructions + steb by step + motivation + context + context example  + Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r+i2+s_b_s+m+c+example+GKP"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with one or more of the following labels that are separated with comma: {labels_joined}. Note, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided input. 2. Carefully examine the values within each cell of the table. 3. Select one label or more only if needed, that best represents the relationship between the base column with the rest. 4. Do not assign a label for the base column. 5. Answer with your final selected labels, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct. 7. Ensure that you answer using only the labels from the provided label-set."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic relation between two or more columns."
                                  "Here is an example how you could solve a CPA task: 'Classify the relationship between these two columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni.'"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: animal name, pet name"))
    
    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes the label {train_labels[index_gen]}:\n{train_tables[index_gen]}"))
      
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

Save predictions

In [None]:
preds

In [525]:
file_name=f'Predictions/{model_name}/table/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

## Evaluation

In [None]:
predictions = []
i=0
for j, table_preds in enumerate(preds):
   
    table_number = len(test[j][2])
   
    if "Class:" in table_preds:
        table_preds = table_preds.split("Class:")[1]

    if ":\n" in table_preds:
        table_preds = table_preds.split(":\n")[1]
      

    if ":" in table_preds or "-" in table_preds:
        if ":" in table_preds:
            separator = ":"
            start = 1
            end = table_number+1
        else:
            separator = "-"  
            start = 1
            end = table_number+1
    else:
        separator = ","
        start = 0
        end = table_number
     
    col_preds = table_preds.split(separator)[start:end]
    
    for pred in col_preds:
        column_predictions = []
        i+=1

        if "," in pred:
            separator = ","

            multilabels = pred.split(separator)

            for multi in multilabels:
            
                if "\n" in multi:
                    multi = multi.split('\n')[0].strip()
                if "," in multi:
                    multi = multi.split(",")[0].strip()
                if '(' in multi:
                    multi = multi.split("(")[0].strip()
                if '.' in multi:
                    multi = multi.split(".")[0].strip()
                multi = multi.strip().lower()
                
                if multi in text_to_label:
                    column_predictions.append(text_to_label[multi])
                else:
                    print(f"For test example {i} out of label space prediction: {multi}")
                    column_predictions.append('-')

        else:

            if "\n" in pred:
                pred = pred.split('\n')[0].strip()
            if "," in pred:
                pred = pred.split(",")[0].strip()
            if '(' in pred:
                pred = pred.split("(")[0].strip()
            if '.' in pred:
                pred = pred.split(".")[0].strip()
            pred = pred.strip().lower()

            if pred in text_to_label:
                column_predictions.append(text_to_label[pred])
            else:
                print(f"For test example {i} out of label space prediction: {pred}")
                column_predictions.append('-')

        predictions.append(column_predictions)
        
    if len(col_preds) < table_number:
        for m in range(0, table_number-len(col_preds)):
            predictions.append(["-"]) 
            i+=1

In [None]:
predictions[:10]

### Calculate Precision, Recall, Macro-F1 and Micro-F1

In [530]:
def calculate_f1_scores(y_tests, y_preds, num_classes, types):

    y_tests = [[types.index(l) for l in y] for y in y_tests]
    y_preds = [[types.index(l) for l in y] for y in y_preds]


    cm = np.zeros(shape=(num_classes,num_classes))
    

    for i, labels in enumerate(y_tests):
   
        for label in labels:
            if label not in y_preds[i]:
                cm[-1][label] += 1 #FN
               
            else:
                cm[label][label] += 1 #TP

    for i, labels in enumerate(y_preds):
        for label in labels:
            if label not in y_tests[i]:
                cm[label][-1] += 1 #FP
    
        
    report = {}
    
    for j in range(len(cm[0])):
        report[j] = {}
        report[j]['FN'] = 0
        report[j]['FP'] = 0
        report[j]['TP'] = cm[j][j]

        for i in range(len(cm)):
            if i != j:
                report[j]['FN'] += cm[i][j]
        for k in range(len(cm[0])):
            if k != j:
                report[j]['FP'] += cm[j][k]

        precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
        recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])
        f1 = 2*precision*recall / (precision + recall)
        
        if np.isnan(f1):
            f1 = 0
        if np.isnan(precision):
            f1 = 0
        if np.isnan(recall):
            f1 = 0

        report[j]['p'] =  precision
        report[j]['r'] =  recall
        report[j]['f1'] = f1
    
    all_fn = 0
    all_tp = 0
    all_fp = 0

    for r in report:
        if r != num_classes-1:
            all_fn += report[r]['FN']
            all_tp += report[r]['TP']
            all_fp += report[r]['FP']
        
    class_f1s = [ report[class_]['f1'] for class_ in report]
    class_p = [ 0 if np.isnan(report[class_]['p']) else report[class_]['p'] for class_ in report]
    class_r = [ 0 if np.isnan(report[class_]['r']) else report[class_]['r'] for class_ in report]
    macro_f1 = sum(class_f1s[:-1]) / (num_classes-1)
    
    p =  sum(class_p[:-1]) / (num_classes-1)
    r =  sum(class_r[:-1]) / (num_classes-1)
    micro_f1 = all_tp / ( all_tp + (1/2 * (all_fp + all_fn) )) 
    
    per_class_eval = {}
    for index, t in enumerate(types[:-1]):
        per_class_eval[t] = {"Precision":class_p[index], "Recall": class_r[index], "F1": class_f1s[index]}
    
    evaluation = {
        "Micro-F1": micro_f1,
        "Macro-F1": macro_f1,
        "Precision": p,
        "Recall": r
    }
    
    return [ evaluation, per_class_eval]

In [None]:
list_set_labels = list(labels_to_text.keys())
types = list_set_labels
types = types + ["-"] if ["-"] in predictions else types
evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)

In [None]:
evaluation

In [None]:
per_class_eval

## Error Analysis

In [None]:
errors = 0
for i in range(len(predictions)):
    label_set = set(labels[i])
    
    prediction_set = set(predictions[i])
    
    if len(list(label_set-prediction_set) + list(prediction_set-label_set)) != 0:
        for y in label_set:
            if y not in prediction_set:
                errors +=1
        print(f"Predicted as {predictions[i]} when it was {label_set}")
errors

### Re-load previous preds files

In [224]:
with open(f'Predictions/{model_name}/table/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [None]:
preds