In [1]:
import os
import re
import pickle
import numpy as np
from dotenv import dotenv_values
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage

In [2]:
config = dotenv_values("../.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [3]:
labels_to_text = {
    "datePublished": "date published",
    "isbn": "isbn",
    "numberOfPages": "number of pages",
    "worstRating": "worst rating",
    "priceCurrency": "price currency",
    "publisher": "publisher",
    "author": "author",
    "bookFormat": "book format",
    "inLanguage": "in language",
    "ratingValue": "rating value",
    "description": "description",
    "price": "price",
    "image": "image",
    "url": "url",
    "availability": "availability",
    "genre": "genre",
    "bestRating": "best rating",
    "itemCondition": "item condition",
    "review": "review",
    "startDate": "start date",
    "endDate": "end date",
    "location": "location",
    "validFrom": "valid from",
    "eventStatus": "event status",
    "eventAttendanceMode": "event attendance mode",
    "organizer": "organizer",
    "category": "category",
    "validThrough": "valid through",
    "telephone": "telephone",
    "duration": "duration",
    "email": "email",
    "streetAddress": "street address",
    "addressRegion": "region of address",
    "addressLocality": "locality of address",
    "priceRange": "price range",
    "postalCode": "postal code",
    "addressCountry": "address country",
    "faxNumber": "fax number",
    "dateCreated": "date created",
    "director": "director",
    "contentRating": "content rating",
    "actor": "actor",
    "inAlbum": "in album",
    "byArtist": "by artist",
    "jobTitle": "job title",
    "birthDate": "birth date",
    "gender": "gender",
    "nationality": "nationality",
    "weight": "weight",
    "releaseDate": "release date",
    "copyrightYear": "copyright year",
    "bookEdition": "book edition",
    "headline": "headline",
    "text": "text",
    "address": "address",
    "performer": "performer",
    "longitude": "longitude",
    "latitude": "latitude",
    "currenciesAccepted": "currencies accepted",
    "checkoutTime": "checkout time",
    "checkInTime": "check-in time",
    "amenityFeature": "amenity feature",
    "paymentAccepted": "payment accepted",
    "availableLanguage": "available language",
    "brand": "brand",
    "openingHours": "opening hours",
    "datePosted": "date posted",
    "employmentType": "employment type",
    "hiringOrganization": "hiring organization",
    "opens": "opens",
    "dayOfWeek": "day of week",
    "closes": "closes",
    "productionCompany": "production company",
    "countryOfOrigin": "country of origin",
    "numTracks": "number of tracks",
    "track": "track",
    "givenName": "given name",
    "familyName": "family name",
    "birthPlace": "birth place",
    "honorificSuffix": "honorific suffix",
    "alumniOf": "alumni of",
    "deathDate": "death date",
    "measurements": "measurements",
    "unitCode": "unit code",
    "productID": "product ID",
    "unitText": "unit text",
    "availableDeliveryMethod": "available delivery method",
    "model": "model",
    "manufacturer": "manufacturer",
    "color": "color",
    "gtin": "gtin",
    "material": "material",
    "servingSize": "serving size",
    "recipeInstructions": "recipe instructions",
    "recipeIngredient": "recipe ingredient",
    "cookTime": "cook time",
    "prepTime": "prep time",
    "totalTime": "total time",
    "nutrition": "nutrition",
    "recipeCategory": "recipe category",
    "recipeCuisine": "recipe cuisine",
    "cookingMethod": "cooking method",
    "suitableForDiet": "suitable for diet",
    "servesCuisine": "serves cuisine",
    "awayTeam": "away team",
    "homeTeam": "home team",
    "partOfSeries": "part of series",
    "episodeNumber": "episode number"
}

In [4]:
text_to_label = {
    "date published": "datePublished",
    "isbn": "isbn",
    "number of pages": "numberOfPages",
    "worst rating": "worstRating",
    "price currency": "priceCurrency",
    "publisher": "publisher",
    "author": "author",
    "book format": "bookFormat",
    "in language": "inLanguage",
    "rating value": "ratingValue",
    "description": "description",
    "price": "priceRange",
    "image": "image",
    "url": "url",
    "availability": "availability",
    "genre": "genre",
    "best rating": "bestRating",
    "item condition": "itemCondition",
    "review": "review",
    "start date": "startDate",
    "end date": "endDate",
    "location": "location",
    "valid from": "validFrom",
    "event status": "eventStatus",
    "event attendance mode": "eventAttendanceMode",
    "organizer": "organizer",
    "category": "category",
    "valid through": "validThrough",
    "telephone": "telephone",
    "duration": "duration",
    "email": "email",
    "street address": "streetAddress",
    "region of address": "addressRegion",
    "locality of address": "addressLocality",
    "price range": "priceRange",
    "postal code": "postalCode",
    "address country": "addressCountry",
    "fax number": "faxNumber",
    "date created": "dateCreated",
    "director": "director",
    "content rating": "contentRating",
    "actor": "actor",
    "in album": "inAlbum",
    "by artist": "byArtist",
    "job title": "jobTitle",
    "birth date": "birthDate",
    "gender": "gender",
    "nationality": "nationality",
    "weight": "weight",
    "release date": "releaseDate",
    "copyright year": "copyrightYear",
    "book edition": "bookEdition",
    "headline": "headline",
    "text": "text",
    "address": "address",
    "performer": "performer",
    "longitude": "longitude",
    "latitude": "latitude",
    "currencies accepted": "currenciesAccepted",
    "checkout time": "checkoutTime",
    "check-in time": "checkInTime",
    "amenity feature": "amenityFeature",
    "payment accepted": "paymentAccepted",
    "available language": "availableLanguage",
    "brand": "brand",
    "opening hours": "openingHours",
    "date posted": "datePosted",
    "employment type": "employmentType",
    "hiring organization": "hiringOrganization",
    "opens": "opens",
    "day of week": "dayOfWeek",
    "closes": "closes",
    "production company": "productionCompany",
    "country of origin": "countryOfOrigin",
    "number of tracks": "numTracks",
    "track": "track",
    "given name": "givenName",
    "family name": "familyName",
    "birth place": "birthPlace",
    "honorific suffix": "honorificSuffix",
    "alumni of": "alumniOf",
    "death date": "deathDate",
    "measurements": "measurements",
    "unit code": "unitCode",
    "product ID": "productID",
    "unit text": "unitText",
    "available delivery method": "availableDeliveryMethod",
    "model": "model",
    "manufacturer": "manufacturer",
    "color": "color",
    "gtin": "gtin",
    "material": "material",
    "serving size": "servingSize",
    "recipe instructions": "recipeInstructions",
    "recipe ingredient": "recipeIngredient",
    "cook time": "cookTime",
    "prep time": "prepTime",
    "total time": "totalTime",
    "nutrition": "nutrition",
    "recipe category": "recipeCategory",
    "recipe cuisine": "recipeCuisine",
    "cooking method": "cookingMethod",
    "suitable for diet": "suitableForDiet",
    "serves cuisine": "servesCuisine",
    "away team": "awayTeam",
    "home team": "homeTeam",
    "part of series": "partOfSeries",
    "episode number": "episodeNumber"
}

## Load test (and training) set

In [5]:
with open('sotabv2-cpa-train-table.pkl', "rb") as f:
    train = pickle.load(f)
with open('sotabv2-cpa-sample-test-table.pkl', "rb") as f:
    test = pickle.load(f)

examples = [example[1] for example in test ]
labels = [l for example in test for l in example[2]]

train_examples = [ example[1] for example in train ]
train_example_labels = []
for table in train:
    col_labels = """"""
    for i, l in enumerate(table[2]):
        col_labels += f"""Column {i+2}: {labels_to_text[l]}\n"""
    train_example_labels.append(col_labels.strip())

In [None]:
labels_joined = ", ".join([labels_to_text[label] for label in list(set(labels))]) 
labels_joined

In [7]:
model_name = 'gpt-3.5-turbo-1106'
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

## Choose setup: zero-shot, one-shot or five-shot

ZERO SHOT CPA TABLE

In [50]:
#ROLE
nr="zero"
#prompt_name = "r"  #two columns ...
prompt_name = "r1"  #two or more ...

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}. Answer with ONLY ONE of the labels from the provided label-set! Notice, you must not classify Column 1!"))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [99]:
#ROLE
nr="zero"
prompt_name = "r2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}. Answer with ONLY ONE of the labels from the provided label-set for each column! Notice, you must not classify Column 1"))

    res = chat(messages)
    preds.append(res.content)

In [175]:
#ROLE
nr="zero"
prompt_name = "r3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to predict the relationship between the first column with the rest of the columns from a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}. Notice, do not annotate Column 1"))

    res = chat(messages)
    preds.append(res.content)

In [194]:
#ROLE
nr="zero"
prompt_name = "r4"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}."))

    res = chat(messages)
    preds.append(res.content)

In [847]:
#ROLE
nr="zero"
prompt_name = "r5"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to identify the correlation of each column with the initial column in the table and label these relations with ONLY ONE of the following labels that are separated with a comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}."))

    res = chat(messages)
    preds.append(res.content)

In [212]:
#ROLE + INSTRUCTIONS
nr="zero"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [234]:
#ROLE + INSTRUCTIONS
nr="zero"
prompt_name = "r2+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [249]:
#ROLE + INSTRUCTIONS
nr="zero"
prompt_name = "r+i2"   #1+2

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE + INSTRUCTIONS + StepByStep
nr="zero"
prompt_name = "r2+i2+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [13]:
#ROLE + StepByStep + INSTRUCTIONS
nr="zero"
prompt_name = "r2+s_b_s+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [367]:
#ROLE + INSTRUCTIONS + StepByStep
nr="zero"
prompt_name = "r2+i2+s_b_s.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [456]:
#ROLE + INSTRUCTIONS + MOTIVATION
nr="zero"
prompt_name = "r2+i2+m"

preds = []
for example in examples:
   messages = []
    
   messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
   messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
   messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    
   messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
   res = chat(messages)
   preds.append(res.content)
    

In [24]:
#ROLE + INSTRUCTIONS + MOTIVATION
nr="zero"
prompt_name = "r2+i2+m.1"

preds = []
for example in examples:
   messages = []
    
   messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
   messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
   messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
   messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
   
   res = chat(messages)
   preds.append(res.content)
    

In [474]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
nr="zero"
prompt_name = "r2+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
   
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [498]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION  P2
nr="zero"
prompt_name = "r+i2+s_b_s+m.1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Take your time and think well before answering!"))
    
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [None]:
#ROLE + StepByStep + MOTIVATION + INSTRUCTIONS 
nr="zero"
prompt_name = "r+s_b_s+m+i2"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [547]:
#ROLE + INSTRUCTIONS (StepByStep, motivation) 
nr="zero"
#prompt_name = "r+i2(sbs-m)"
prompt_name = "r+i2(sbs-m)2"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    #messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Let's think step by step. 8. Note, your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5. Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. Do not explain your answer. 7. Let's think step by step. 8. Note, your answer is very important. Take your time and think well before answering!"))


    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [562]:
#ROLE + INSTRUCTIONS (motivation) 
nr="zero"
prompt_name = "r+i2(m)"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [596]:
#ROLE + MOTIVATION 
nr="zero"
#prompt_name = "r1+m"
prompt_name = "r2+m"


preds = []
for example in examples:
    messages = []
    
    #messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [712]:
#ROLE + MOTIVATION + INSTRUCTIONS 
nr="zero"
prompt_name = "r+m+i2"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering! Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [820]:
#ROLE + CONTEXT
nr="zero"
prompt_name = "r+c"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [876]:
#ROLE + CONTEXT
nr="zero"
prompt_name = "r+c1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [889]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT
nr="zero"
prompt_name = "r+i2(m)+c1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [905]:
#ROLE + (StepByStep)INSTRUCTIONS + CONTEXT
nr="zero"
prompt_name = "r2+i2+s_b_s.1+c1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [12]:
#ROLE + (StepByStep)INSTRUCTIONS + CONTEXT
nr="zero"
prompt_name = "r2+i2+c1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(HumanMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [10]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT + EXAMPLE
nr="zero"
prompt_name = "r+i2(m)+c.example"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, here is an example how you could solve a CPA task: 'Classify the relationship between these table columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni. '"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: pet names"))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [12]:
# Generating tables to use for "Generated Knowledge Prompt"  
import openai

generated_examples = []
generated_examples_labels = []

for label in labels:

    for table_number in range(1, 6):
        messages = []
        
        prompt = f"Generate 1 table with 4 random columns and 5 rows. Include one column about {label} and one of the other columns to be related to the {label}. Please return only the values, no need to explain."

        messages.append({"role": "system", "content": prompt})
       
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo-0301',
            messages=messages
        )
        
        generated_content = response['choices'][0]['message']['content'].strip()
        generated_examples.append(generated_content)
        generated_examples_labels.append(label)


In [13]:
""" file_name=f'GKP-Input/Generated-Tables.pkl'
f = open(file_name,'wb')
pickle.dump(generated_examples, f)
f.close() """

In [14]:
""" file_name=f'GKP-Input/Generated-Table-Labels.pkl'
f = open(file_name,'wb')
pickle.dump(generated_examples_labels, f)
f.close() """

In [None]:
with open(f'GKP-Input/Generated-Tables.pkl', "rb") as f:
    tables = pickle.load(f)
tables    

In [None]:
with open(f'GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    labels = pickle.load(f)
labels    

ONE SHOT CPA TABLE

In [69]:
import random
#One-shot table + instructions + roles  

nr="one"
prompt_name = "r1+i"

preds = []
for example in examples:
    messages = []
    

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}. Notice, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))
 
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo-1106 in organization org-JnRe4IF9kM1kFkJzhKlurHyV on tokens per min (TPM): Limit 60000, Used 58585, Requested 1711. Please try again in 296ms. Visit https://platform.openai.com/account/rate-limits to learn more..


In [627]:
#ROLE
import random 

nr="one"
prompt_name = "r2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [612]:
#ROLE
import random 

nr="one"
prompt_name = "r3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to predict the relationship between the first column with the rest of the columns from a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [303]:
#ROLE
import random 

nr="one"
prompt_name = "r4"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [262]:
#ROLE + INSTRUCTIONS
import random

nr="one"
prompt_name = "r2+i2"

preds = []
for example in examples:
    messages = []
    

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [385]:
#ROLE + INSTRUCTIONS + StepByStep
import random

nr="one"
prompt_name = "r2+i2+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [28]:
#ROLE + StepByStep + INSTRUCTIONS
import random

nr="one"
prompt_name = "r2+s_b_s+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [401]:
#ROLE + INSTRUCTIONS + StepByStep
import random

nr="one"
prompt_name = "r2+i2+s_b_s.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [657]:
#ROLE + INSTRUCTIONS + MOTIVATION
import random

nr="one"
prompt_name = "r2+i2+m"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [696]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
import random

nr="one"
prompt_name = "r2+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [727]:
#ROLE + StepByStep + MOTIVATION + INSTRUCTIONS 
import random

nr="one"
prompt_name = "r+s_b_s+m+i2"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [744]:
#ROLE + INSTRUCTIONS (motivation) 
import random

nr="one"
prompt_name = "r+i2(m)"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [943]:
#ROLE + CONTEXT
import random

nr="one"
prompt_name = "r+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Fri, 26 Jan 2024 19:02:44 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '84bb0300cee22c0f-STR', 'alt-svc': 'h3=":443"; ma=86400'}.


In [958]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT
import random

nr="one"
prompt_name = "r+i2(m)+c1"

preds = []
for example in examples:
    messages = []
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))
     
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [974]:
#ROLE + (StepByStep)INSTRUCTIONS + CONTEXT
import random

nr="one"
prompt_name = "r2+i2+s_b_s.1+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))
     
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [27]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT + EXAMPLE
import random

nr="one"
prompt_name = "r+i2(m)+c.example"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, here is an example how you could solve a CPA task: 'Classify the relationship between these table columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni. '"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: pet names"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


In [1002]:
#ROLE + (StepByStep)INSTRUCTIONS + EXAMPLE CONTEXT
import random

nr="one"
prompt_name = "r2+i2+s_b_s.1+cTEST"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    index = random.randint(0, len(train_examples)-1)
    messages.append(SystemMessage(content=f"CONTEXT: Classify the relationship between these table columns: {train_examples[index]}"))
    
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [1022]:
#ROLE + (StepByStep)INSTRUCTIONS + EXAMPLE CONTEXT  (2)
import random

nr="one"
prompt_name = "r2+i2+s_b_s.1+cTEST2"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    index = random.randint(0, len(train_examples)-1)
    messages.append(SystemMessage(content=f"CONTEXT: Classify the relationship between these table columns: {train_examples[index]}, {train_example_labels[index]}"))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [10]:
with open('GKP-Input/Generated-Tables.pkl', "rb") as f:
    train_tables = pickle.load(f)
with open('GKP-Input/Generated-Table-Labels.pkl', "rb") as f:
    train_labels = pickle.load(f)

In [11]:
#Role + instructions + steb by step + Generated Knowledge (GKP)
import random

nr="one"
prompt_name = "r2+i2+s_b_s+GKP"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    index_gen = random.randint(0, len(train_tables)-1)
    messages.append(HumanMessage(content=f"This is an example of a table that includes the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

FIVE SHOT CPA TABLE

In [None]:
import random
#Five-shot table + instructions + roles 

nr="five"
prompt_name = "r1+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}. Notice, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))
    

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE
import random

nr="five"
prompt_name = "r3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to predict the relationship between the first column with the rest of the columns from a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE
import random

nr="five"
prompt_name = "r4"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE + INSTRUCTIONS
import random

nr="five"
prompt_name = "r2+i2"

preds = []
for example in examples:
    messages = []
    

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE + INSTRUCTIONS + StepByStep
import random

nr="five"
prompt_name = "r2+i2+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS + StepByStep
import random

nr="five"
prompt_name = "r2+i2+s_b_s.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS + MOTIVATION
import random

nr="five"
prompt_name = "r2+i2+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS + MOTIVATION
import random

nr="five"
prompt_name = "r2+i2+m.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS + MOTIVATION
import random

nr="five"
prompt_name = "r2+i2+m.2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
   
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    messages.append(HumanMessage(content="Your answer is very important. Take your time and think well before answering!"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
import random

nr="five"
prompt_name = "r2+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + StepByStep + MOTIVATION + INSTRUCTIONS 
import random

nr="five"
prompt_name = "r+s_b_s+m+i2"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS (motivation) 
import random

nr="five"
prompt_name = "r+i2(m)"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + CONTEXT 
import random

nr="five"
prompt_name = "r+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + (StepByStep)INSTRUCTIONS + CONTEXT
import random

nr="five"
prompt_name = "r2+i2+s_b_s.1+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))
     
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT + EXAMPLE
import random

nr="five"
prompt_name = "r+i2(m)+c.example"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, here is an example how you could solve a CPA task: 'Classify the relationship between these table columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni. '"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: pet names"))
     

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#Role + instructions + steb by step + Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r2+i2+s_b_s+GKP"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))

    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
      
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#Role + instructions + steb by step + Generated Knowledge (GKP)
import random

nr="five"
prompt_name = "r2+i2+m+GKP"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    for i in range(0,5):
        index_gen = random.randint(0, len(train_tables)-1)
        messages.append(HumanMessage(content=f"This is an example of a table that includes the label '{train_labels[index_gen]}':\n{train_tables[index_gen]}"))
      
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

Save predictions

In [None]:
preds

In [49]:
file_name=f'Predictions/{model_name}/table/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

## Evaluation

In [None]:
predictions = []
i=0
for j, table_preds in enumerate(preds):
   
    table_number = len(test[j][2])
    
    if "Class:" in table_preds:
        table_preds = table_preds.split("Class:")[1]
      

    if ":" in table_preds or "-" in table_preds:
        if ":" in table_preds:
            separator = ":"
            start = 1
            end = table_number+1
        else:
            separator = "-"  
            start = 1
            end = table_number+1
    else:
        separator = ","
        start = 0
        end = table_number
        
    col_preds = table_preds.split(separator)[start:end]
    
    for pred in col_preds:
        i+=1
     
        if "\n" in pred:
            pred = pred.split('\n')[0].strip()
        if "," in pred:
            pred = pred.split(",")[0].strip()
   
        if '(' in pred:
            pred = pred.split("(")[0].strip()
       
        if '.' in pred:
            pred = pred.split(".")[0].strip()
        pred = pred.strip().lower()
        
        if pred in text_to_label:
            predictions.append(text_to_label[pred])
        else:
            print(f"For test example {i} out of label space prediction: {pred}")
            predictions.append('-')
  
    if len(col_preds) < table_number:
        for m in range(0, table_number-len(col_preds)):
            predictions.append('-')
            i+=1

In [None]:
predictions

### Calculate Precision, Recall, Macro-F1 and Micro-F1

In [14]:
def calculate_f1_scores(y_tests, y_preds, num_classes, types):

    y_tests = [types.index(y) for y in y_tests]
    y_preds = [types.index(y) for y in y_preds]
    
    
    cm = np.zeros(shape=(num_classes,num_classes))
    
    for i in range(len(y_tests)):
        cm[y_preds[i]][y_tests[i]] += 1
        
    report = {}
    
    for j in range(len(cm[0])):
        report[j] = {}
        report[j]['FN'] = 0
        report[j]['FP'] = 0
        report[j]['TP'] = cm[j][j]

        for i in range(len(cm)):
            if i != j:
                report[j]['FN'] += cm[i][j]
        for k in range(len(cm[0])):
            if k != j:
                report[j]['FP'] += cm[j][k]

        precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
        recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])
        f1 = 2*precision*recall / (precision + recall)
        
        if np.isnan(f1):
            f1 = 0
        if np.isnan(precision):
            f1 = 0
        if np.isnan(recall):
            f1 = 0

        report[j]['p'] =  precision
        report[j]['r'] =  recall
        report[j]['f1'] = f1
    
    all_fn = 0
    all_tp = 0
    all_fp = 0

    for r in report:
        if r != num_classes-1:
            all_fn += report[r]['FN']
            all_tp += report[r]['TP']
            all_fp += report[r]['FP']
        
    class_f1s = [ report[class_]['f1'] for class_ in report]
    class_p = [ 0 if np.isnan(report[class_]['p']) else report[class_]['p'] for class_ in report]
    class_r = [ 0 if np.isnan(report[class_]['r']) else report[class_]['r'] for class_ in report]
    macro_f1 = sum(class_f1s[:-1]) / (num_classes-1)
    
    p =  sum(class_p[:-1]) / (num_classes-1)
    r =  sum(class_r[:-1]) / (num_classes-1)
    micro_f1 = all_tp / ( all_tp + (1/2 * (all_fp + all_fn) )) 
    
    per_class_eval = {}
    for index, t in enumerate(types[:-1]):
        per_class_eval[t] = {"Precision":class_p[index], "Recall": class_r[index], "F1": class_f1s[index]}
    
    evaluation = {
        "Micro-F1": micro_f1,
        "Macro-F1": macro_f1,
        "Precision": p,
        "Recall": r
    }
    
    return [ evaluation, per_class_eval]


In [None]:
types = list(set(labels))
types = types + ["-"] if "-" in predictions else types
evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)

In [None]:
evaluation

In [None]:
per_class_eval

## Error Analysis

In [None]:
errors = 0
for i in range(len(predictions)):
    if predictions[i] != labels[i]:
        errors += 1
        print(f"Predicted as {predictions[i]} when it was {labels[i]}")
errors

### Re-load previous preds files

In [1110]:
with open(f'Predictions/{model_name}/table/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [8]:
with open(f'Predictions/{model_name}/table/five-shot/chat-table-r2+i2+m-five-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [None]:
preds