In [1]:
import os
import re
import pickle
import numpy as np
from dotenv import dotenv_values
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage

In [2]:
config = dotenv_values("../.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [3]:
labels_to_text = {
    "soccer.football_league": "football league",
    "government.government_office_or_title": "government office or title",
    "organization.non_profit_organization": "non profit organization",
    "olympics.olympic_games": "olympic games",
    "cvg.cvg_genre": "cvg genre",
    "ice_hockey.hockey_position": "ice hockey position",
    "tv.tv_network": "tv network",
    "aviation.airline": "aviation airline",
    "american_football.football_conference": "american football conference",
    "soccer.football_world_cup": "football world cup",
    "american_football.football_coach": "american football coach",
    "military.military_unit": "military unit",
    "military.military_post": "military post",
    "music.media_format": "music media format",
    "tv.tv_personality": "tv personality",
    "baseball.baseball_team": "baseball team",
    "cvg.cvg_developer": "cvg developer",
    "soccer.football_award": "football award",
    "ice_hockey.hockey_team": "ice hockey team",
    "tv.tv_writer": "tv writer",
    "meteorology.tropical_cyclone_season": "tropical cyclone season",
    "soccer.fifa": "soccer fifa",
    "cvg.cvg_publisher": "cvg publisher",
    "baseball.baseball_player": "baseball player",
    "sports.sports_championship": "sports championship",
    "soccer.football_team_manager": "football team manager",
    "sports.golfer": "sports golfer",
    "baseball.baseball_position": "baseball position",
    "military.rank": "military rank",
    "sports.sports_championship": "sports championship",
    "cvg.cvg_platform": "cvg platform",
    "music.musical_group": "musical group",
    "amusement_parks.ride": "amusement parks ride",
    "music.genre": "music genre",
    "music.lyricist": "music lyricist",
    "music.record_label": "music record label",
    "meteorology.tropical_cyclone": "tropical cyclone",
    "aviation.airport": "airport"
}

In [4]:
text_to_label = {
    "football league": "soccer.football_league",
    "government office or title": "government.government_office_or_title",
    "non profit organization": "organization.non_profit_organization",
    "olympic games": "olympics.olympic_games",
    "cvg genre": "cvg.cvg_genre",
    "ice hockey position": "ice_hockey.hockey_position",
    "tv network": "tv.tv_network",
    "aviation airline": "aviation.airline",
    "american football conference": "american_football.football_conference",
    "football world cup": "soccer.football_world_cup",
    "american football coach": "american_football.football_coach",
    "military unit": "military.military_unit",
    "military post": "military.military_post",
    "music media format": "music.media_format",
    "tv personality": "tv.tv_personality",
    "baseball team": "baseball.baseball_team",
    "cvg developer": "cvg.cvg_developer",
    "football award": "soccer.football_award",
    "ice hockey team": "ice_hockey.hockey_team",
    "tv writer": "tv.tv_writer",
    "tropical cyclone season": "meteorology.tropical_cyclone_season",
    "soccer fifa": "soccer.fifa",
    "cvg publisher": "cvg.cvg_publisher",
    "baseball player": "baseball.baseball_player",
    "sports championship": "sports.sports_championship",
    "football team manager": "soccer.football_team_manager",
    "sports golfer": "sports.golfer",
    "baseball position": "baseball.baseball_position",
    "military rank": "military.rank",
    "cvg platform": "cvg.cvg_platform",
    "musical group": "music.musical_group",
    "amusement parks ride": "amusement_parks.ride",
    "music genre": "music.genre",
    "music lyricist": "music.lyricist",
    "music record label": "music.record_label",
    "tropical cyclone": "meteorology.tropical_cyclone",
    "airport": "aviation.airport"
}

## Load test (and training) set

In [5]:
with open('wiki-cta-train-table.pkl', "rb") as f:
    train = pickle.load(f)
with open('wiki-cta-test-table.pkl', "rb") as f:
    test = pickle.load(f)

examples = [example[1] for example in test ]
labels =  [l for example in test for l in example[2]]

train_examples = [ example[1] for example in train ]
train_example_labels = []
for table in train:
    col_labels = """"""
    for i, l in enumerate(table[2]):
        col_labels += f"""Column {i+1}: {", ".join([labels_to_text[m] for m in l])}\n"""
    train_example_labels.append(col_labels.strip())

In [None]:
labels_joined = ", ".join([labels_to_text[l] for l in labels_to_text])
labels_joined

In [7]:
model_name = 'gpt-3.5-turbo-1106'
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

## Choose setup: zero-shot, one-shot or five-shot

CTA TABLE

ZERO SHOT

In [33]:
#role
nr="zero"
prompt_name = "r"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [51]:
#role2
nr="zero"
prompt_name = "r2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}. Answer with more than one label only if needed!"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}."))

    res = chat(messages)
    preds.append(res.content)
    

In [64]:
#role
nr="zero"
prompt_name = "r3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Data Scientist, the best in your field and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [140]:
# role + instructions

nr="zero"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
# role + instructions

nr="zero"
prompt_name = "r+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label from the label-set that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, use only the labels from the label-set and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [217]:
# role + instructions

nr="zero"
prompt_name = "r+i3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content= "Your instructions are: 1. Review the provided input and organize it into a table format. 2. Carefully examine the values within each cell of the table. 3. For every column in the table, choose one label or more if needed, that best represents the meaning of the cells in that column. 4. Avoid duplicate labels when responding. Provide a single unique label, or if multiple, ensure they are distinct and follow the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...' 5. Ensure that your answer contains ONLY labels from the set and no additional text or characters."))
                                           
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
# role + instructions + step by step
nr="zero"
prompt_name = "r+i+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [152]:
# role + step by step + instructions
nr="zero"
prompt_name = "r+s_b_s+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [163]:
# role + instructions(step by step)
nr="zero"
prompt_name = "r+i(s_b_s)"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [179]:
#role + instructions + motivation
nr="zero"
prompt_name = "r+i+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [234]:
#role + motivation + instructions
nr="zero"
prompt_name = "r+m+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [9]:
#role + instructions + step by step + motivation
nr="zero"
prompt_name = "r+i+s_b_s+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [262]:
#ROLE + INSTRUCTIONS + CONTEXT

nr="zero"
prompt_name = "r+i+c"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and refers to the process of identifying the semantic type of the values of a column in a table."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [272]:
#ROLE + INSTRUCTIONS + CONTEXT P2

nr="zero"
prompt_name = "r+i+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [283]:
#ROLE + INSTRUCTIONS + CONTEXT P3
nr="zero"
prompt_name = "r+i+c2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [377]:
#ROLE + INSTRUCTIONS + CONTEXT (removing the word "CONTEXT")
nr="zero"
prompt_name = "r+i+c2.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions + step by step + motivation + context
nr="zero"
prompt_name = "r+i+s_b_s+m+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [305]:
#role + instructions + context (example)
nr="zero"
prompt_name = "r+i+c.example"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Here is an example of how Column Type Annotation task is solved: 'Classify the given column: Columm1: Dog, Cat, Dog, parrot.'"
                                  "First check the values of Columm1: Dog, Cat, Dog, parrot"   
                                  "Carefully think and analyze the values, decide and predict the label that best fits"
                                  "Answer: Column 1 : animal, pet "))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [319]:
#role + instructions + context and (example)
nr="zero"
prompt_name = "r+i+c+example"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set. Here is an example of how Column Type Annotation task is solved: 'Classify the given column: Columm1: Dog, Cat, Dog, parrot.'"
                                  "First check the values of Columm1: Dog, Cat, Dog, parrot"   
                                  "Carefully think and analyze the values, decide and predict the label that best fits"
                                  "Answer: Column 1 : animal, pet "))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [94]:
# Generating tables to use for "Generated Knowledge Prompt"  
import openai

generated_examples = []
generated_examples_labels = []

for label in labels:

    for table_number in range(1, 6):
        messages = []
        
        prompt = f"Generate 1 table with 4 random columns and 5 rows. Include one column about {label}. Please return only the values, no need to explain."

        messages.append({"role": "system", "content": prompt})
       
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo-0301',
            messages=messages
        )
        
        generated_content = response['choices'][0]['message']['content'].strip()
        generated_examples.append(generated_content)
        generated_examples_labels.append(label)


In [95]:
""" file_name=f'GKP-Input/Generated-Tables.pkl'
f = open(file_name,'wb')
pickle.dump(generated_examples, f)
f.close() """

In [96]:
""" file_name=f'GKP-Input/Generated-Table-Labels.pkl'
f = open(file_name,'wb')
pickle.dump(generated_examples_labels, f)
f.close() """

In [None]:
with open(f'GKP-Input/Generated-Tables.pkl', "rb") as f:
    tables = pickle.load(f)
tables    

In [None]:
with open(f'GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    labels = pickle.load(f)
labels    

ONE SHOT

In [331]:
#role
import random 

nr="one"
prompt_name = "r"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [11]:
# role + instructions
import random 

nr="one"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [351]:
# role + instructions + step by step
import random

nr="one"
prompt_name = "r+i+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [364]:
# role + step by step + instructions
import random

nr="one"
prompt_name = "r+s_b_s+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [390]:
#role + instructions + motivation
import random 

nr="one"
prompt_name = "r+i+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#role + motivation + instructions
import random 

nr="one"
prompt_name = "r+m+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [398]:
#role + instructions + step by step + motivation
import random

nr="one"
prompt_name = "r+i+s_b_s+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [407]:
#ROLE + INSTRUCTIONS + CONTEXT P3
import random 

nr="one"
prompt_name = "r+i+c2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [418]:
#role + instructions + step by step + motivation + context
import random 

nr="one"
prompt_name = "r+i+s_b_s+m+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [431]:
#role + instructions + context (example)
import random 

nr="one"
prompt_name = "r+i+c.example"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Here is an example of how Column Type Annotation task is solved: 'Classify the given column: Columm1: Dog, Cat, Dog, parrot.'"
                                  "First check the values of Columm1: Dog, Cat, Dog, parrot"   
                                  "Carefully think and analyze the values, decide and predict the label that best fits"
                                  "Answer: Column 1 : animal, pet "))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [446]:
#role + instructions + context and (example)
import random 

nr="one"
prompt_name = "r+i+c+example"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set. Here is an example of how Column Type Annotation task is solved: 'Classify the given column: Columm1: Dog, Cat, Dog, parrot.'"
                                  "First check the values of Columm1: Dog, Cat, Dog, parrot"   
                                  "Carefully think and analyze the values, decide and predict the label that best fits"
                                  "Answer: Column 1 : animal, pet "))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [469]:
with open('GKP-Input/Generated-Tables.pkl', "rb") as f:
    train_tables = pickle.load(f)
with open('GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    train_labels = pickle.load(f)

In [None]:
#Role + instructions +  Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r+i+s_b_s+m+GKP"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
  
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label {train_labels[index_gen]}:\n{train_tables[index_gen]}"))
    index = random.randint(0, len(train_examples)-1)
    messages.append(SystemMessage(content=f" Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))

     
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#Role + instructions +  Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r+i+c2+GKP"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
  
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label {train_labels[index_gen]}:\n{train_tables[index_gen]}"))
    index = random.randint(0, len(train_examples)-1)
    messages.append(SystemMessage(content=f" Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))

     
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

FIVE SHOT

In [8]:
# role + task
import random 

nr="five"
prompt_name = "r"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [493]:
# role + instructions
import random 

nr="five"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [506]:
# role + instructions + step by step
import random

nr="five"
prompt_name = "r+i+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [24]:
#role + instructions + motivation
import random 

nr="five"
prompt_name = "r+i+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [54]:
#role + instructions + step by step + motivation
import random

nr="five"
prompt_name = "r+i+s_b_s+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [44]:
#role + instructions + step by step + motivation
import random

nr="five"
prompt_name = "r+i+s_b_s+m.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(HumanMessage(content="Let's think step by step."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    res = chat(messages)
    preds.append(res.content)
    

In [529]:
#ROLE + INSTRUCTIONS + CONTEXT P1
import random 

nr="five"
prompt_name = "r+i+c2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [34]:
#ROLE + INSTRUCTIONS + CONTEXT P2
import random 

nr="five"
prompt_name = "r+i+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions + context and (example)
import random 

nr="five"
prompt_name = "r+i+c+example"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set. Here is an example of how Column Type Annotation task is solved: 'Classify the given column: Columm1: Dog, Cat, Dog, parrot.'"
                                  "First check the values of Columm1: Dog, Cat, Dog, parrot"   
                                  "Carefully think and analyze the values, decide and predict the label that best fits"
                                  "Answer: Column 1 : animal, pet "))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [548]:
with open('GKP-Input/Generated-Tables.pkl', "rb") as f:
    train_tables = pickle.load(f)
with open('GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    train_labels = pickle.load(f)

In [None]:
#Role + instructions +  Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r+i+c2+GKP"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the given columns of a table with one or more of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a label that best captures the overall meaning of the cells in that column. 4.Respond with your selected labels for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Answer with more than one label for a column only if needed, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column of a table based on its content.  Your task is the same, to analyze and then predict the column type with one or more of the provided labels from the label-set!")) 
  
    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
        index = random.randint(0, len(train_examples)-1)
        messages.append(SystemMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
preds

In [47]:
file_name=f'Predictions/{model_name}/table/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

## Evaluation

In [None]:
predictions = []
i=0
for j, table_preds in enumerate(preds):
   
    table_number = len(test[j][2])

    if "Class:" in table_preds:
        table_preds = table_preds.split("Class:")[1]

    if ":\n" in table_preds:
        table_preds = table_preds.split(":\n")[1]
      

    if ":" in table_preds or "-" in table_preds:
        if ":" in table_preds:
            separator = ":"
            start = 1
            end = table_number+1
        else:
            separator = "-"  
            start = 1
            end = table_number+1
    else:
        separator = ","
        start = 0
        end = table_number
        
    col_preds = table_preds.split(separator)[start:end]
    
    for pred in col_preds:
        column_predictions = []
        i+=1
       
        if "," in pred:
            separator = ","

            multilabels = pred.split(separator)

            for multi in multilabels:
            
                if "\n" in multi:
                    multi = multi.split('\n')[0].strip()
                if "," in multi:
                    multi = multi.split(",")[0].strip()
                if '(' in multi:
                    multi = multi.split("(")[0].strip()
                if '.' in multi:
                    multi = multi.split(".")[0].strip()
                multi = multi.strip().lower()
                
                if multi in text_to_label:
                    column_predictions.append(text_to_label[multi])
                else:
                    print(f"For test example {i} out of label space prediction: {multi}")
                    column_predictions.append('-')

        else:

            if "\n" in pred:
                pred = pred.split('\n')[0].strip()
            if "," in pred:
                pred = pred.split(",")[0].strip()
            if '(' in pred:
                pred = pred.split("(")[0].strip()
            if '.' in pred:
                pred = pred.split(".")[0].strip()
            pred = pred.strip().lower()

            if pred in text_to_label:
                column_predictions.append(text_to_label[pred])
            else:
                print(f"For test example {i} out of label space prediction: {pred}")
                column_predictions.append('-')

        predictions.append(column_predictions)
        
    if len(col_preds) < table_number:
        for m in range(0, table_number-len(col_preds)):
            predictions.append(["-"])
            i+=1

In [None]:
predictions

### Calculate Precision, Recall, Macro-F1 and Micro-F1

In [58]:
def calculate_f1_scores(y_tests, y_preds, num_classes, types):

    y_tests = [[types.index(l) for l in y] for y in y_tests]
    y_preds = [[types.index(l) for l in y] for y in y_preds]

    cm = np.zeros(shape=(num_classes,num_classes))
    
    for i, labels in enumerate(y_tests):
   
        for label in labels:
            if label not in y_preds[i]:
                cm[-1][label] += 1 #FN
             
            else:
                cm[label][label] += 1 #TP
               

    for i, labels in enumerate(y_preds):
       
        for label in labels:
            if label not in y_tests[i]:
                cm[label][-1] += 1 #FP
        
    report = {}
    
    for j in range(len(cm[0])):
        report[j] = {}
        report[j]['FN'] = 0
        report[j]['FP'] = 0
        report[j]['TP'] = cm[j][j]

        for i in range(len(cm)):
            if i != j:
                report[j]['FN'] += cm[i][j]
        for k in range(len(cm[0])):
            if k != j:
                report[j]['FP'] += cm[j][k]

        precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
        recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])
        f1 = 2*precision*recall / (precision + recall)
        
        if np.isnan(f1):
            f1 = 0
        if np.isnan(precision):
            f1 = 0
        if np.isnan(recall):
            f1 = 0

        report[j]['p'] =  precision
        report[j]['r'] =  recall
        report[j]['f1'] = f1
    
    all_fn = 0
    all_tp = 0
    all_fp = 0

    for r in report:
        if r != num_classes-1:
            all_fn += report[r]['FN']
            all_tp += report[r]['TP']
            all_fp += report[r]['FP']
        
    class_f1s = [ report[class_]['f1'] for class_ in report]
    class_p = [ 0 if np.isnan(report[class_]['p']) else report[class_]['p'] for class_ in report]
    class_r = [ 0 if np.isnan(report[class_]['r']) else report[class_]['r'] for class_ in report]
    macro_f1 = sum(class_f1s[:-1]) / (num_classes-1)
    
    p =  sum(class_p[:-1]) / (num_classes-1)
    r =  sum(class_r[:-1]) / (num_classes-1)
    micro_f1 = all_tp / ( all_tp + (1/2 * (all_fp + all_fn) )) 
    
    per_class_eval = {}
    for index, t in enumerate(types[:-1]):
        per_class_eval[t] = {"Precision":class_p[index], "Recall": class_r[index], "F1": class_f1s[index]}
    
    evaluation = {
        "Micro-F1": micro_f1,
        "Macro-F1": macro_f1,
        "Precision": p,
        "Recall": r
    }
    
    return [ evaluation, per_class_eval]

In [None]:
list_set_labels = list(labels_to_text.keys())
types = list_set_labels
types = types + ["-"] if ["-"] in predictions else types
evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)

In [None]:
evaluation

In [None]:
per_class_eval

## Error Analysis

In [None]:
errors = 0
for i in range(len(predictions)):
    label_set = set(labels[i])
    prediction_set = set(predictions[i]) 
    
    if len(list(label_set-prediction_set) + list(prediction_set-label_set)) != 0:
        for y in label_set:
            if y not in prediction_set:
                errors +=1
        print(f"Predicted as {predictions[i]} when it was {label_set}")
errors

### Re-load previous preds files

In [None]:
with open(f'Predictions/{model_name}/table/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [None]:
preds