In [1]:
import os
import re
import pickle
import numpy as np
from dotenv import dotenv_values
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage

In [2]:
config = dotenv_values("../.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [3]:
labels_to_text = {
    "Date": "date",
    "Book/name": "name of book",
    "Language": "language",
    "Person/name": "name of person",
    "BookFormatType": "book format type",
    "OfferItemCondition": "offer item condition",
    "ItemAvailability": "item availability",
    "price": "price",
    "currency": "currency",
    "Review": "review",
    "Number": "number",
    "IdentifierAT": "identifier",
    "URL": "url",
    "Place/name": "name of place",
    "Event/name": "name of event",
    "EventStatusType": "event status",
    "EventAttendanceModeEnumeration": "event attendance mode",
    "telephone": "telephone",
    "email": "email",
    "category": "category",
    "Duration": "duration",
    "streetAddress": "street address",
    "addressLocality": "locality of address",
    "LocalBusiness/name": "name of local business",
    "priceRange": "price range",
    "openingHours": "opening hours",
    "faxNumber": "fax number",
    "Country": "country",
    "postalCode": "postal code",
    "addressRegion": "region of address",
    "Photograph": "photograph",
    "Movie/name": "name of movie",
    "Rating": "rating",
    "MusicArtistAT": "music artist",
    "MusicAlbum/name": "name of music album",
    "MusicRecording/name": "name of music recording",
    "weight": "weight",
    "GenderType": "gender type",
    "Product/name": "name of product",
    "DeliveryMethod": "delivery method",
    "Organization": "organization",
    "Book/description": "description of book",
    "CreativeWork": "creative work",
    "Boolean": "boolean",
    "DateTime": "date and time",
    "CreativeWork/name": "name of creative work",
    "Event/description": "description of event",
    "PostalAddress": "postal address",
    "Time": "time",
    "Hotel/name": "name of hotel",
    "CoordinateAT": "coordinate",
    "Hotel/description": "description of hotel",
    "LocationFeatureSpecification": "location feature",
    "paymentAccepted": "payment accepted",
    "Brand": "brand",
    "MonetaryAmount": "monetary amount",
    "JobPosting/name": "name of job posting",
    "OccupationalExperienceRequirements": "occupational experience requirements",
    "EducationalOccupationalCredential": "educational occupational credential",
    "workHours": "work hours",
    "CategoryCode": "category code",
    "JobPosting/description": "description of job posting",
    "DayOfWeek": "day of week",
    "Movie/description": "description of movie",
    "Museum/name": "name of museum",
    "ItemList": "item list",
    "Distance": "distance",
    "unitCode": "unit code",
    "ProductModel": "product model",
    "unitText": "unit text",
    "QuantitativeValue": "quantitative value",
    "Product/description": "description of product",
    "Recipe/name": "name of recipe",
    "Mass": "mass",
    "Energy": "energy",
    "RestrictedDiet": "restricted diet",
    "Recipe/description": "description of recipe",
    "Restaurant/name": "name of restaurant",
    "SportsEvent/name": "name of sports event",
    "SportsTeam": "sports team",
    "TVEpisode/name": "name of TV episode",
    "CreativeWorkSeries": "creative work series"
}

In [4]:
text_to_label = {
    "date": "Date",
    "name of book": "Book/name",
    "language": "Language",
    "name of person": "Person/name",
    "book format type": "BookFormatType",
    "offer item condition": "OfferItemCondition",
    "item availability": "ItemAvailability",
    "price": "price",
    "currency": "currency",
    "review": "Review",
    "number": "Number",
    "identifier": "IdentifierAT",
    "url": "URL",
    "name of place": "Place/name",
    "name of event": "Event/name",
    "event status": "EventStatusType",
    "event attendance mode": "EventAttendanceModeEnumeration",
    "telephone": "telephone",
    "email": "email",
    "category": "category",
    "duration": "Duration",
    "street address": "streetAddress",
    "locality of address": "addressLocality",
    "name of local business": "LocalBusiness/name",
    "price range": "priceRange",
    "opening hours": "openingHours",
    "fax number": "faxNumber",
    "country": "Country",
    "postal code": "postalCode",
    "region of address": "addressRegion",
    "photograph": "Photograph",
    "name of movie": "Movie/name",
    "rating": "Rating",
    "music artist": "MusicArtistAT",
    "name of music album": "MusicAlbum/name",
    "name of music recording": "MusicRecording/name",
    "weight": "weight",
    "gender type": "GenderType",
    "name of product": "Product/name",
    "delivery method": "DeliveryMethod",
    "organization": "Organization",
    "description of book": "Book/description",
    "creative work": "CreativeWork",
    "boolean": "Boolean",
    "date and time": "DateTime",
    "name of creative work": "CreativeWork/name",
    "description of event": "Event/description",
    "postal address": "PostalAddress",
    "time": "Time",
    "name of hotel": "Hotel/name",
    "coordinate": "CoordinateAT",
    "description of hotel": "Hotel/description",
    "location feature": "LocationFeatureSpecification",
    "payment accepted": "paymentAccepted",
    "brand": "Brand",
    "monetary amount": "MonetaryAmount",
    "name of job posting": "JobPosting/name",
    "occupational experience requirements": "OccupationalExperienceRequirements",
    "educational occupational credential": "EducationalOccupationalCredential",
    "work hours": "workHours",
    "category code": "CategoryCode",
    "description of job posting": "JobPosting/description",
    "day of week": "DayOfWeek",
    "description of movie": "Movie/description",
    "name of museum": "Museum/name",
    "item list": "ItemList",
    "distance": "Distance",
    "unit code": "unitCode",
    "product model": "ProductModel",
    "unit text": "unitText",
    "quantitative value": "QuantitativeValue",
    "description of product": "Product/description",
    "name of recipe": "Recipe/name",
    "mass": "Mass",
    "energy": "Energy",
    "restricted diet": "RestrictedDiet",
    "description of recipe": "Recipe/description",
    "name of restaurant": "Restaurant/name",
    "name of sports event": "SportsEvent/name",
    "sports team": "SportsTeam",
    "name of TV episode": "TVEpisode/name",
    "creative work series": "CreativeWorkSeries"
}

## Load test (and training) set

In [5]:
with open('sotabv2-cta-train-table.pkl', "rb") as f:
    train = pickle.load(f)
with open('sotabv2-cta-test-table.pkl', "rb") as f:
    test = pickle.load(f)

examples = [example[1] for example in test ]
labels = [l for example in test for l in example[2]]

train_examples = [ example[1] for example in train ]
train_example_labels = []
for table in train:
    col_labels = """"""
    for i, l in enumerate(table[2]):
        col_labels += f"""Column {i+1}: {labels_to_text[l]}\n"""
    train_example_labels.append(col_labels.strip())

In [None]:
labels_joined = ", ".join([labels_to_text[label] for label in list(set(labels))])
labels_joined

In [9]:
model_name = 'gpt-3.5-turbo-1106'
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

## Choose setup: zero-shot, one-shot or five-shot

CTA TABLE

ZERO-SHOT

In [43]:
#role
nr="zero"
prompt_name = "r"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great data scientist. The best at what you do and your task is to annotate a given table with only one of the following labels that are separated with comma: {labels_joined}. Answer only with labels from the provided label set!"))
  
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)

In [198]:
#role 
nr="zero"
prompt_name = "r4"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with only one of the following labels that are separated with comma: {labels_joined}. Answer with one of the labels from the provided label set!"))
  
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#role 
nr="zero"
prompt_name = "r4.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}. Answer with ONLY ONE label from the provided label set for each Column!"))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    print(messages)
    res = chat(messages)
    preds.append(res.content)
    

In [253]:
#role 
nr="zero"
prompt_name = "r4.2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [270]:
#role 
nr="zero"
prompt_name = "r4.1.DHM"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}. Answer with ONLY ONE label from the provided label set for each Column!"))

    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#role + instructions
nr = "zero"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"Your task is to classify the columns of a given table with only one of the following classes that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a class that best represents the meaning of all cells in the column. 4. Answer with the selected class for each columns with the format Column1: class."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)


In [318]:
#role + instructions
nr="zero"
#prompt_name = "r+i2"
prompt_name = "r+i2.2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))

    #messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    messages.append(HumanMessage(content=f"Classify these table columns: {example}. Answer with ONLY ONE label from the provided label set for each Column!"))

    res = chat(messages)
    preds.append(res.content)
    

In [56]:
#role + instructions
nr="zero"
prompt_name = "r+i2H"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [307]:
#role + instructions + step by step 
nr="zero"
#prompt_name = "r+i+s_b_s"
prompt_name = "r+i+s_b_s.2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    #messages.append(HumanMessage(content=f"Classify these table columns: {example}. Answer with ONLY ONE label from the provided label set for each Column!"))
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [122]:
#role + instructions + step by step 
nr="zero"
prompt_name = "r+i+s_b_s.3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    messages.append(HumanMessage(content="Let's think step by step."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [86]:
#role + instructions + step by step 
nr="zero"
prompt_name = "r+i+s_b_s.3.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    messages.append(HumanMessage(content="Let's think step by step."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
  

    res = chat(messages)
    preds.append(res.content)
    

In [98]:
#role + instructions + step by step 
nr="zero"
prompt_name = "r+i+s_b_s.3.2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    messages.append(HumanMessage(content="Let's think step by step."))

    res = chat(messages)
    preds.append(res.content)
    

In [107]:
#role + step by step + instructions 
nr="zero"
prompt_name = "r+s_b_s+i"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
   
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [17]:
#role + instructions
nr="zero"
#prompt_name = "r+i3"
prompt_name = "r+i3.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5.Ensure that you use only one label from the provided set for each column. If multiple labels seem applicable, prioritize the one that best represents the overall content of the column.")) 
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [389]:
#role + instructions + step by step + motivation 
nr="zero"
#prompt_name = "r+i2+m"
#prompt_name = "r+i2+s_b_s+m"
prompt_name = "r+i2+s_b_s+m.1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    #messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    #messages.append(SystemMessage(content="Let's think step by step."))
    #messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Take your time and think well before answering!"))
    
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [138]:
#role + instructions + step by step + motivation 
nr="zero"
prompt_name = "r+i2+m.2"



preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [153]:
#role + instructions + step by step + motivation 
nr="zero"
prompt_name = "r+i2+m.3"



preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))


    res = chat(messages)
    preds.append(res.content)
    

In [418]:
#role + instructions + step by step + motivation 
nr="zero"
prompt_name = "r+i2+s_b_s.2+m.2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(HumanMessage(content="Let's think step by step."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
   

    res = chat(messages)
    preds.append(res.content)
    

In [889]:
#CONTEXT

nr="zero"
prompt_name = "r+c"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)

In [922]:
#CONTEXT
nr="zero"
prompt_name = "r+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.  Your task is the same, to predict the column types of a given table with only one label per column from the provided label-set!"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)

In [979]:
#CONTEXT

nr="zero"
prompt_name = "r+c1.1.1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content.  Your task is the same, to predict the column types of a given table with only one label per column from the provided label-set!")) 
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)

In [1021]:
#CONTEXT

nr="zero"
#prompt_name = "r+i2+c1+m"
prompt_name = "r+i2+c1.1.1+m"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    #messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content.  Your task is the same, to predict the column types of a given table with only one label per column from the provided label-set!"))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)

In [950]:
#CONTEXT

nr="zero"
prompt_name = "r+i2+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)

In [173]:
#CONTEXT

nr="zero"
prompt_name = "r+i2+c"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes."))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)

In [187]:
#CONTEXT

nr="zero"
prompt_name = "r+i2+c2"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
   
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes."))

    
    res = chat(messages)
    preds.append(res.content)

In [1179]:
#CONTEXT (example context) 

nr="zero"
prompt_name = "r+c.example"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Here is an example of how Column Type Annotation task is solved: 'Classify these table columns: Columm1: Dog, Cat, Dog.  Column2: bread, pasta, meat, beef.'"
                                  "First we check Columm1: Dog, Cat, Dog. Analyze, and predict: pets."   
                                  "Now we check Column2: bread, pasta, meat, beef. Analyze, and predict: food"
                                  "Answer: Column 1 Name : Pets, Column 2 Name: Food"))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)

In [204]:
#CONTEXT (example context) 

nr="zero"
prompt_name = "r+c.example.2"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"CONTEXT: Here is an example of how Column Type Annotation task is solved: 'Classify these table columns: Columm1: Dog, Cat, Dog.  Column2: bread, pasta, meat, beef.'"
                                  "First we check Columm1: Dog, Cat, Dog. Analyze, and predict: pets."   
                                  "Now we check Column2: bread, pasta, meat, beef. Analyze, and predict: food"
                                  "Answer: Column 1 Name : Pets, Column 2 Name: Food"))

    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + step by step
nr="zero"
prompt_name = "r+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [12]:
# Generating tables to use for "Generated Knowledge Prompt"  
import openai

generated_examples = []
generated_examples_labels = []

for label in labels:

    for table_number in range(1, 6):
        messages = []
        
        prompt = f"Generate 1 table with 4 random columns and 5 rows. Include one column about {label}. Please return only the values, no need to explain."

        messages.append({"role": "system", "content": prompt})
       
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo-0301',
            messages=messages
        )
        
        generated_content = response['choices'][0]['message']['content'].strip()
        generated_examples.append(generated_content)
        generated_examples_labels.append(label)


In [13]:
""" file_name=f'GKP-Input/Generated-Tables.pkl'
f = open(file_name,'wb')
pickle.dump(generated_examples, f)
f.close() """

In [14]:
""" file_name=f'GKP-Input/Generated-Table-Labels.pkl'
f = open(file_name,'wb')
pickle.dump(generated_examples_labels, f)
f.close() """

In [None]:
with open(f'GKP-Input/Generated-Tables.pkl', "rb") as f:
    tables = pickle.load(f)
tables    

In [None]:
with open(f'GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    labels = pickle.load(f)
labels    

ONE SHOT

In [529]:
#role
import random

nr="one"
#prompt_name = "r4.1.DHM"
prompt_name = "r4.1.DHM.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    index = random.randint(0, len(train_examples)-1)
    #messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}.Answer with ONLY ONE label from the provided label set for each Column!"))
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}. Answer with ONLY ONE label from the provided label set for each Column!"))

    res = chat(messages)
    preds.append(res.content)
    

In [24]:
#role + instructions 
import random

nr="one"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []
    

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a class that best represents the meaning of all cells in the column. 4. Answer with the selected class for each columns with the format Column1: class."))
    
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [502]:
#role + instructions
import random

nr="one"
prompt_name = "r+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [78]:
#role + step by step
import random 

nr="one"
prompt_name = "r+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [569]:
#role + instructions + step by step
import random

nr="one"
prompt_name = "r+i+s_b_s.2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#role + instructions + CoT
import random

nr="one"
prompt_name = "r+i2+SbS"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Let's think step by step: Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Let's think step by step: Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    print(res)
    print(messages)

In [None]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i2+m"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i2+m2"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
   
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [43]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i2+m3"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [228]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i2+m4"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
   
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    res = chat(messages)
    preds.append(res.content)
    

In [370]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i2+c-f"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 

    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [387]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i2+c-f.1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 

    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [861]:
#CONTEXT
import random

nr="one"
prompt_name = "r+c"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [216]:
#CONTEXT
import random

nr="one"
prompt_name = "r+c.1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [242]:
#CONTEXT + instructions + motivation 
import random

nr="one"
prompt_name = "r+i2+c"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [252]:
#CONTEXT + instructions + motivation 
import random

nr="one"
prompt_name = "r+i2+c.2"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [263]:
#CONTEXT + instructions + motivation 
import random

nr="one"
prompt_name = "r+i2+c.3"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
   
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [1032]:
#CONTEXT (example context)
import random

nr="one"
prompt_name = "r+ctest"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    index = random.randint(0, len(train_examples)-1)
    messages.append(SystemMessage(content=f"CONTEXT: Classify these table columns: {train_examples[index]}"))
    
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [11]:
#CONTEXT (example context)  2.
import random

nr="one"
prompt_name = "r+ctest.2"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    index = random.randint(0, len(train_examples)-1)
    messages.append(SystemMessage(content=f"Classify these table columns: {train_examples[index]}"))
    
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [1048]:
#CONTEXT + instructions + motivation 
import random

nr="one"
prompt_name = "r+i2+c1+m"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#CONTEXT + instructions + motivation    .2
import random

nr="one"
prompt_name = "r+i2+c1+m.2"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [276]:
with open('GKP-Input/Generated-Tables.pkl', "rb") as f:
    train_tables = pickle.load(f)
with open('GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    train_labels = pickle.load(f)

In [17]:
#Role + instructions + context + motivation + Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r+i2+c1+m(GKP)"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    index = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index]}':\n{train_tables[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [19]:
#Role + instructions + context + motivation + Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r+i2+c1+m+GKP"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [277]:
#Role + instructions + context + motivation + Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r+i2+c1+m+GKP.1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
   
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
   
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    res = chat(messages)
    preds.append(res.content)

In [288]:
#Role + instructions + context + motivation + Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r+i2+c1+m+GKP.1.1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
   
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    res = chat(messages)
    preds.append(res.content)

In [298]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i2+m4+GKP"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 

    
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    res = chat(messages)
    preds.append(res.content)
    

In [408]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i2+m4.1+GKP"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 

    
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
   
    
    res = chat(messages)
    preds.append(res.content)
    

FIVE SHOT

In [644]:
#role
import random

nr="five"
#prompt_name = "r4.1.DHM"
prompt_name = "r4.1.DHM.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    #messages.append(HumanMessage(content=f"Classify these table columns: {example}. Answer with ONLY ONE label from the provided label set for each Column!"))
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [792]:
import random

nr="five"
#prompt_name = "r5"
prompt_name = "r6"

preds = []
for example in examples:
    messages = []
    
    #messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"You are THE BEST Data Scientist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
   
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
import random

nr="five"
#prompt_name = "r+i2"
#prompt_name = "r5+i2"
prompt_name = "r6+i2"

preds = []
for example in examples:
    messages = []
    
    #messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    #messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"You are THE BEST Data Scientist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions + step by step 
import random

nr="five"
#prompt_name = "r+i+s_b_s.2"
prompt_name = "r5+i+s_b_s.2"

preds = []
for example in examples:
    messages = []
    
    #messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#role + instructions + step by step 
import random

nr="five"
prompt_name = "r+i+s_b_s.3"

preds = []
for example in examples:
    messages = []
    

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    messages.append(HumanMessage(content="Let's think step by step."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#role + instructions + motivation
import random

nr="five"
#prompt_name = "r+i2+m"
prompt_name = "r5+i2+m"
#prompt_name = "r6+i2+m"

preds = []
for example in examples:
    messages = []
    
    #messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    #messages.append(SystemMessage(content=f"You are THE BEST Data Scientist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#role + instructions + motivation
import random

nr="five"
prompt_name = "r+i2+m.2.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to annotate a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))

    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#CONTEXT
import random

nr="five"
prompt_name = "r+c"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#CONTEXT (example context)
import random

nr="five"
prompt_name = "r+ctest"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
   
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(SystemMessage(content=f"CONTEXT: Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions + context
import random

nr="five"
prompt_name = "r+i2+c"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(HumanMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [322]:
#role + instructions + context
import random

nr="five"
prompt_name = "r+i2+c.1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#CONTEXT + instructions + motivation 
import random

nr="five"
prompt_name = "r+i2+c1+m"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#CONTEXT + instructions + motivation 
import random

nr="five"
prompt_name = "r+i2+ctest+m"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(SystemMessage(content=f"CONTEXT: Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#Role + instructions + step by step + Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r5+i+s_b_s.2+GKP"

preds = []
for example in examples:
    messages = []
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))

    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
        index = random.randint(0, len(train_examples)-1)
        messages.append(SystemMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#Role + instructions + step by step + Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r5+i+s_b_s.2.1+GKP"

preds = []
for example in examples:
    messages = []
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas."))
    messages.append(HumanMessage(content="Let's think step by step."))

    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
        index = random.randint(0, len(train_examples)-1)
        messages.append(SystemMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#Role + instructions + context + motivation + Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r+i2+c1+m+GKP"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
        index = random.randint(0, len(train_examples)-1)
        messages.append(SystemMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#Role + instructions + context + motivation + Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r+i2+c1+m+GKP.1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}.")) 
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best captures the overall meaning of the cells in that column. 4.Respond with your selected label for each column, following the format: 'Column1: SelectedLabel, Column2: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set, and separate your responses with commas.")) 
    messages.append(SystemMessage(content=f"CONTEXT: Column Type Annotation is a sub-task of Table Annotation and involves categorizing each column in a table based on its content. Furthermore, classification involves assigning predefined categories or labels to data based on its features or attributes.")) 
  
    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes a column with the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
        index = random.randint(0, len(train_examples)-1)
        messages.append(SystemMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))

    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))

    res = chat(messages)
    preds.append(res.content)

In [205]:
#zero-shot
file_name=f'Predictions/{model_name}/chat-table-{prompt_name}-{nr}-shot.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

In [455]:
file_name=f'Predictions/{model_name}/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

## Evaluation

In [None]:
predictions = []
i=0
for j, table_preds in enumerate(preds):
   
    table_number = len(test[j][2])
    
    if "Class:" in table_preds:
        table_preds = table_preds.split("Class:")[1]
      

    if ":" in table_preds or "-" in table_preds:
        if ":" in table_preds:
            separator = ":"
            start = 1
            end = table_number+1
        else:
            separator = "-"  
            start = 1
            end = table_number+1
    else:
        separator = ","
        start = 0
        end = table_number
        
    col_preds = table_preds.split(separator)[start:end]
    
    for pred in col_preds:
        i+=1
        
        if "\n" in pred:
            pred = pred.split('\n')[0].strip()
        if "," in pred:
            pred = pred.split(",")[0].strip()
        if '(' in pred:
            pred = pred.split("(")[0].strip()
        if '.' in pred:
            pred = pred.split(".")[0].strip()
        pred = pred.strip().lower()
        
        if pred in text_to_label:
            predictions.append(text_to_label[pred])
        else:
            print(f"For test example {i} out of label space prediction: {pred}")
            predictions.append('-')
        
    if len(col_preds) < table_number:
        for m in range(0, table_number-len(col_preds)):
            predictions.append('-')
            i+=1

In [None]:
predictions[:15]

### Calculate Precision, Recall, Macro-F1 and Micro-F1

In [451]:
def calculate_f1_scores(y_tests, y_preds, num_classes, types):

    y_tests = [types.index(y) for y in y_tests]
    y_preds = [types.index(y) for y in y_preds]
    
    cm = np.zeros(shape=(num_classes,num_classes))
    
    for i in range(len(y_tests)):
        cm[y_preds[i]][y_tests[i]] += 1
        
    report = {}
    
    for j in range(len(cm[0])):
        report[j] = {}
        report[j]['FN'] = 0
        report[j]['FP'] = 0
        report[j]['TP'] = cm[j][j]

        for i in range(len(cm)):
            if i != j:
                report[j]['FN'] += cm[i][j]
        for k in range(len(cm[0])):
            if k != j:
                report[j]['FP'] += cm[j][k]

        precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
        recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])
        f1 = 2*precision*recall / (precision + recall)
        
        if np.isnan(f1):
            f1 = 0
        if np.isnan(precision):
            f1 = 0
        if np.isnan(recall):
            f1 = 0

        report[j]['p'] =  precision
        report[j]['r'] =  recall
        report[j]['f1'] = f1
    
    all_fn = 0
    all_tp = 0
    all_fp = 0

    for r in report:
        if r != num_classes-1:
            all_fn += report[r]['FN']
            all_tp += report[r]['TP']
            all_fp += report[r]['FP']
        
    class_f1s = [ report[class_]['f1'] for class_ in report]
    class_p = [ 0 if np.isnan(report[class_]['p']) else report[class_]['p'] for class_ in report]
    class_r = [ 0 if np.isnan(report[class_]['r']) else report[class_]['r'] for class_ in report]
    macro_f1 = sum(class_f1s[:-1]) / (num_classes-1)
    
    p =  sum(class_p[:-1]) / (num_classes-1)
    r =  sum(class_r[:-1]) / (num_classes-1)
    micro_f1 = all_tp / ( all_tp + (1/2 * (all_fp + all_fn) )) 
    
    per_class_eval = {}
    for index, t in enumerate(types[:-1]):
        per_class_eval[t] = {"Precision":class_p[index], "Recall": class_r[index], "F1": class_f1s[index]}
    
    evaluation = {
        "Micro-F1": micro_f1,
        "Macro-F1": macro_f1,
        "Precision": p,
        "Recall": r
    }
    
    return [ evaluation, per_class_eval]


In [None]:
types = list(set(labels))
types = types + ["-"] if "-" in predictions else types
evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)

In [None]:
evaluation

In [None]:
per_class_eval

## Error Analysis

In [None]:
errors = 0
for i in range(len(predictions)):
    if predictions[i] != labels[i]:
        errors += 1
        print(f"Predicted as {predictions[i]} when it was {labels[i]}")
errors

### Re-load previous preds files

In [None]:
with open(f'Predictions/{model_name}/chat-table-{prompt_name}-{nr}-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [112]:
with open(f'Predictions/{model_name}/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [None]:
preds