In [164]:
import os
import re
import pickle
import numpy as np
from dotenv import dotenv_values
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage

In [165]:
config = dotenv_values("../.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [166]:
labels_to_text = {
    "datePublished": "date published",
    "isbn": "isbn",
    "numberOfPages": "number of pages",
    "worstRating": "worst rating",
    "priceCurrency": "price currency",
    "publisher": "publisher",
    "author": "author",
    "bookFormat": "book format",
    "inLanguage": "in language",
    "ratingValue": "rating value",
    "description": "description",
    "price": "price",
    "image": "image",
    "url": "url",
    "availability": "availability",
    "genre": "genre",
    "bestRating": "best rating",
    "itemCondition": "item condition",
    "review": "review",
    "startDate": "start date",
    "endDate": "end date",
    "location": "location",
    "validFrom": "valid from",
    "eventStatus": "event status",
    "eventAttendanceMode": "event attendance mode",
    "organizer": "organizer",
    "category": "category",
    "validThrough": "valid through",
    "telephone": "telephone",
    "duration": "duration",
    "email": "email",
    "streetAddress": "street address",
    "addressRegion": "region of address",
    "addressLocality": "locality of address",
    "priceRange": "price range",
    "postalCode": "postal code",
    "addressCountry": "address country",
    "faxNumber": "fax number",
    "dateCreated": "date created",
    "director": "director",
    "contentRating": "content rating",
    "actor": "actor",
    "inAlbum": "in album",
    "byArtist": "by artist",
    "jobTitle": "job title",
    "birthDate": "birth date",
    "gender": "gender",
    "nationality": "nationality",
    "weight": "weight",
    "releaseDate": "release date",
    "copyrightYear": "copyright year",
    "bookEdition": "book edition",
    "headline": "headline",
    "text": "text",
    "address": "address",
    "performer": "performer",
    "longitude": "longitude",
    "latitude": "latitude",
    "currenciesAccepted": "currencies accepted",
    "checkoutTime": "checkout time",
    "checkInTime": "check-in time",
    "amenityFeature": "amenity feature",
    "paymentAccepted": "payment accepted",
    "availableLanguage": "available language",
    "brand": "brand",
    "openingHours": "opening hours",
    "datePosted": "date posted",
    "employmentType": "employment type",
    "hiringOrganization": "hiring organization",
    "opens": "opens",
    "dayOfWeek": "day of week",
    "closes": "closes",
    "productionCompany": "production company",
    "countryOfOrigin": "country of origin",
    "numTracks": "number of tracks",
    "track": "track",
    "givenName": "given name",
    "familyName": "family name",
    "birthPlace": "birth place",
    "honorificSuffix": "honorific suffix",
    "alumniOf": "alumni of",
    "deathDate": "death date",
    "measurements": "measurements",
    "unitCode": "unit code",
    "productID": "product ID",
    "unitText": "unit text",
    "availableDeliveryMethod": "available delivery method",
    "model": "model",
    "manufacturer": "manufacturer",
    "color": "color",
    "gtin": "gtin",
    "material": "material",
    "servingSize": "serving size",
    "recipeInstructions": "recipe instructions",
    "recipeIngredient": "recipe ingredient",
    "cookTime": "cook time",
    "prepTime": "prep time",
    "totalTime": "total time",
    "nutrition": "nutrition",
    "recipeCategory": "recipe category",
    "recipeCuisine": "recipe cuisine",
    "cookingMethod": "cooking method",
    "suitableForDiet": "suitable for diet",
    "servesCuisine": "serves cuisine",
    "awayTeam": "away team",
    "homeTeam": "home team",
    "partOfSeries": "part of series",
    "episodeNumber": "episode number"
}

In [167]:
text_to_label = {
    "date published": "datePublished",
    "isbn": "isbn",
    "number of pages": "numberOfPages",
    "worst rating": "worstRating",
    "price currency": "priceCurrency",
    "publisher": "publisher",
    "author": "author",
    "book format": "bookFormat",
    "in language": "inLanguage",
    "rating value": "ratingValue",
    "description": "description",
    "price": "priceRange",
    "image": "image",
    "url": "url",
    "availability": "availability",
    "genre": "genre",
    "best rating": "bestRating",
    "item condition": "itemCondition",
    "review": "review",
    "start date": "startDate",
    "end date": "endDate",
    "location": "location",
    "valid from": "validFrom",
    "event status": "eventStatus",
    "event attendance mode": "eventAttendanceMode",
    "organizer": "organizer",
    "category": "category",
    "valid through": "validThrough",
    "telephone": "telephone",
    "duration": "duration",
    "email": "email",
    "street address": "streetAddress",
    "region of address": "addressRegion",
    "locality of address": "addressLocality",
    "price range": "priceRange",
    "postal code": "postalCode",
    "address country": "addressCountry",
    "fax number": "faxNumber",
    "date created": "dateCreated",
    "director": "director",
    "content rating": "contentRating",
    "actor": "actor",
    "in album": "inAlbum",
    "by artist": "byArtist",
    "job title": "jobTitle",
    "birth date": "birthDate",
    "gender": "gender",
    "nationality": "nationality",
    "weight": "weight",
    "release date": "releaseDate",
    "copyright year": "copyrightYear",
    "book edition": "bookEdition",
    "headline": "headline",
    "text": "text",
    "address": "address",
    "performer": "performer",
    "longitude": "longitude",
    "latitude": "latitude",
    "currencies accepted": "currenciesAccepted",
    "checkout time": "checkoutTime",
    "check-in time": "checkInTime",
    "amenity feature": "amenityFeature",
    "payment accepted": "paymentAccepted",
    "available language": "availableLanguage",
    "brand": "brand",
    "opening hours": "openingHours",
    "date posted": "datePosted",
    "employment type": "employmentType",
    "hiring organization": "hiringOrganization",
    "opens": "opens",
    "day of week": "dayOfWeek",
    "closes": "closes",
    "production company": "productionCompany",
    "country of origin": "countryOfOrigin",
    "number of tracks": "numTracks",
    "track": "track",
    "given name": "givenName",
    "family name": "familyName",
    "birth place": "birthPlace",
    "honorific suffix": "honorificSuffix",
    "alumni of": "alumniOf",
    "death date": "deathDate",
    "measurements": "measurements",
    "unit code": "unitCode",
    "product ID": "productID",
    "unit text": "unitText",
    "available delivery method": "availableDeliveryMethod",
    "model": "model",
    "manufacturer": "manufacturer",
    "color": "color",
    "gtin": "gtin",
    "material": "material",
    "serving size": "servingSize",
    "recipe instructions": "recipeInstructions",
    "recipe ingredient": "recipeIngredient",
    "cook time": "cookTime",
    "prep time": "prepTime",
    "total time": "totalTime",
    "nutrition": "nutrition",
    "recipe category": "recipeCategory",
    "recipe cuisine": "recipeCuisine",
    "cooking method": "cookingMethod",
    "suitable for diet": "suitableForDiet",
    "serves cuisine": "servesCuisine",
    "away team": "awayTeam",
    "home team": "homeTeam",
    "part of series": "partOfSeries",
    "episode number": "episodeNumber"
}

## Load test (and training) set

In [168]:
with open('sotabv2-cpa-train-table.pkl', "rb") as f:
    train = pickle.load(f)
with open('sotabv2-cpa-sample-test-table.pkl', "rb") as f:
    test = pickle.load(f)

examples = [example[1] for example in test ]
labels = [l for example in test for l in example[2]]

train_examples = [ example[1] for example in train ]
train_example_labels = []
for table in train:
    col_labels = """"""
    for i, l in enumerate(table[2]):
        col_labels += f"""Column {i+2}: {labels_to_text[l]}\n"""
    train_example_labels.append(col_labels.strip())

In [169]:
test[0]

['Book_antipodean.com_September2020_CPA.json.gz',
 ["Column 1 || Column 2 || Column 3 || Column 4 || \nOn Coxalgia, or Hip Disease. || Collins, || InStock || 65.0||\nSimonds Saws & Knives, Gold Medals and Highest Awards Everywhere. Atlanta 1895. Art Nouveau poster. || Livermore & Knight Co, ||  || ||\nNorth Island, New Zealand, Sheet 3, northeast corner from Tauranga to Dannevirke, folding map on linen. || Marcus F. Marks, Government Printer. || InStock || 85.0||\nTrade Receipts for wine and beer from Perth, Australia merchants 'H. Sherwood & Co., Wine & Spirit Merchants' and 'D. Nathan & Co., Bottle and Case Merchant'. ||  || InStock || 75.0||\nThe American Trans-Continental Route via New York Central & Hudson River R. R. and Connections, the Only 4 Track Line. New York to Omaha; the Union and Central Pacific Railways, Omaha to San Francisco; and the Pacific Mail Steamship Company's Lines, from San Francisco to destination. ||  || InStock || 375.0||\n"],
 ['publisher', 'availability',

In [170]:
labels[0]

'publisher'

In [1064]:
train_examples[0]

['Column 1 || Column 2 || Column 3 || Column 4 || \nA Cidade Perdida || 2020-07-10 || 9789722539739 || 728||\nA Cúpula - livro 1 || 2016-04-08 || 9789722531924 || 704||\nMisery || 2013-09-13 || 9789722527118 || 480||\nA Cúpula - Livro 2 || 2016-08-05 || 9789722532457 || 656||\nO Olho de Deus || 2019-05-10 || 9789722537636 || 528||\n']

In [172]:
train_example_labels[0]

'Column 2: date published\nColumn 3: isbn\nColumn 4: number of pages'

#also a way to read the data but when '' is added to the first column

with open('sotabv2-cpa-train-table.pkl', "rb") as f:
    train = pickle.load(f)
with open('sotabv2-cpa-sample-test-table.pkl', "rb") as f:
    test = pickle.load(f)

examples = [example[1] for example in test ]
labels = [l for example in test for l in example[2]]

train_examples = [ example[1] for example in train ]
train_example_labels = []
for table in train:
    col_labels = """"""
    col_labels += f"""Column 1: name\n"""
    for i, l in enumerate(table[2]):
        if i != 0:
            col_labels += f"""Column {i+1}: {labels_to_text[l]}\n"""
    train_example_labels.append(col_labels.strip())

In [173]:
labels_joined = ", ".join([labels_to_text[label] for label in list(set(labels))]) 
labels_joined

'cook time, event status, recipe instructions, employment type, currencies accepted, number of tracks, valid from, check-in time, start date, headline, best rating, episode number, worst rating, date published, unit text, manufacturer, color, part of series, genre, availability, rating value, death date, book edition, opens, family name, product ID, item condition, in language, closes, material, checkout time, cooking method, event attendance mode, region of address, isbn, longitude, review, suitable for diet, number of pages, in album, price, weight, location, given name, address, recipe category, nationality, book format, recipe ingredient, brand, email, price currency, available delivery method, date posted, price range, by artist, copyright year, performer, duration, hiring organization, unit code, locality of address, payment accepted, end date, organizer, home team, description, away team, honorific suffix, amenity feature, birth place, date created, content rating, birth date, p

In [174]:
model_name = 'gpt-3.5-turbo-1106'
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name)

## Choose setup: zero-shot, one-shot or five-shot

add comment: skip or do not annotate or classify first column (column 1)

ZERO SHOT CPA TABLE

In [50]:
#ROLE
nr="zero"
#prompt_name = "r"  #two columns ...
prompt_name = "r1"  #two or more ...

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}. Answer with ONLY ONE of the labels from the provided label-set! Notice, you must not classify Column 1!"))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))

    res = chat(messages)
    preds.append(res.content)
    

In [99]:
#ROLE
nr="zero"
prompt_name = "r2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}. Answer with ONLY ONE of the labels from the provided label-set for each column! Notice, you must not classify Column 1"))

    res = chat(messages)
    preds.append(res.content)

In [175]:
#ROLE
nr="zero"
prompt_name = "r3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to predict the relationship between the first column with the rest of the columns from a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}. Notice, do not annotate Column 1"))

    res = chat(messages)
    preds.append(res.content)

In [194]:
#ROLE
nr="zero"
prompt_name = "r4"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}."))

    res = chat(messages)
    preds.append(res.content)

In [847]:
#ROLE
nr="zero"
prompt_name = "r5"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to identify the correlation of each column with the initial column in the table and label these relations with ONLY ONE of the following labels that are separated with a comma: {labels_joined}."))
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}."))

    res = chat(messages)
    preds.append(res.content)

In [212]:
#ROLE + INSTRUCTIONS
nr="zero"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 5. Ensure that you use only one label from the provided set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [234]:
#ROLE + INSTRUCTIONS
nr="zero"
prompt_name = "r2+i2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [249]:
#ROLE + INSTRUCTIONS
nr="zero"
prompt_name = "r+i2"   #1+2

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [349]:
#ROLE + INSTRUCTIONS + StepByStep
nr="zero"
prompt_name = "r2+i2+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..


In [367]:
#ROLE + INSTRUCTIONS + StepByStep
nr="zero"
prompt_name = "r2+i2+s_b_s.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [456]:
#ROLE + INSTRUCTIONS + MOTIVATION
nr="zero"
prompt_name = "r2+i2+m"

preds = []
for example in examples:
   messages = []
    
   messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
   messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
   messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    
   messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
   res = chat(messages)
   preds.append(res.content)
    

In [474]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
nr="zero"
prompt_name = "r2+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
   
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [498]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION  P2
nr="zero"
prompt_name = "r+i2+s_b_s+m.1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Take your time and think well before answering!"))
    
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [515]:
#ROLE + StepByStep + MOTIVATION + INSTRUCTIONS 
nr="zero"
prompt_name = "r+s_b_s+m+i2"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..


In [547]:
#ROLE + INSTRUCTIONS (StepByStep, motivation) 
nr="zero"
#prompt_name = "r+i2(sbs-m)"
prompt_name = "r+i2(sbs-m)2"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    #messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Let's think step by step. 8. Note, your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5. Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. Do not explain your answer. 7. Let's think step by step. 8. Note, your answer is very important. Take your time and think well before answering!"))


    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [562]:
#ROLE + INSTRUCTIONS (motivation) 
nr="zero"
prompt_name = "r+i2(m)"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [596]:
#ROLE + MOTIVATION 
nr="zero"
#prompt_name = "r1+m"
prompt_name = "r2+m"


preds = []
for example in examples:
    messages = []
    
    #messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [712]:
#ROLE + MOTIVATION + INSTRUCTIONS 
nr="zero"
prompt_name = "r+m+i2"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering! Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [820]:
#ROLE + CONTEXT
nr="zero"
prompt_name = "r+c"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [876]:
#ROLE + CONTEXT
nr="zero"
prompt_name = "r+c1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [889]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT
nr="zero"
prompt_name = "r+i2(m)+c1"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)
    

In [905]:
#ROLE + (StepByStep)INSTRUCTIONS + CONTEXT
nr="zero"
prompt_name = "r2+i2+s_b_s.1+c1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

RE RUN DIS

In [924]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT + EXAMPLE
nr="zero"
prompt_name = "r+i2(m)+c.example"


preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, here is an example how you could solve a CPA task: 'Classify the relationship between these table columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni. '"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: pet names"))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

ONE SHOT CPA TABLE

In [69]:
import random
#One-shot table + instructions + roles   #NOT M

nr="one"
prompt_name = "r1+i"

preds = []
for example in examples:
    messages = []
    

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}. Notice, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))
 
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo-1106 in organization org-JnRe4IF9kM1kFkJzhKlurHyV on tokens per min (TPM): Limit 60000, Used 58585, Requested 1711. Please try again in 296ms. Visit https://platform.openai.com/account/rate-limits to learn more..


In [627]:
#ROLE
import random 

nr="one"
prompt_name = "r2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [612]:
#ROLE
import random 

nr="one"
prompt_name = "r3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to predict the relationship between the first column with the rest of the columns from a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [303]:
#ROLE
import random 

nr="one"
prompt_name = "r4"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [262]:
#ROLE + INSTRUCTIONS
import random

nr="one"
prompt_name = "r2+i2"

preds = []
for example in examples:
    messages = []
    

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [385]:
#ROLE + INSTRUCTIONS + StepByStep
import random

nr="one"
prompt_name = "r2+i2+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [401]:
#ROLE + INSTRUCTIONS + StepByStep
import random

nr="one"
prompt_name = "r2+i2+s_b_s.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [657]:
#ROLE + INSTRUCTIONS + MOTIVATION
import random

nr="one"
prompt_name = "r2+i2+m"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [696]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
import random

nr="one"
prompt_name = "r2+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [727]:
#ROLE + StepByStep + MOTIVATION + INSTRUCTIONS 
import random

nr="one"
prompt_name = "r+s_b_s+m+i2"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [744]:
#ROLE + INSTRUCTIONS (motivation) 
import random

nr="one"
prompt_name = "r+i2(m)"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [943]:
#ROLE + CONTEXT
import random

nr="one"
prompt_name = "r+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))

   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Fri, 26 Jan 2024 19:02:44 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '84bb0300cee22c0f-STR', 'alt-svc': 'h3=":443"; ma=86400'}.


In [958]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT
import random

nr="one"
prompt_name = "r+i2(m)+c1"

preds = []
for example in examples:
    messages = []
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))
     
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [974]:
#ROLE + (StepByStep)INSTRUCTIONS + CONTEXT
import random

nr="one"
prompt_name = "r2+i2+s_b_s.1+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))
     
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


and dis

In [986]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT + EXAMPLE
import random

nr="one"
prompt_name = "r+i2(m)+c.example"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, here is an example how you could solve a CPA task: 'Classify the relationship between these table columns: Columm1: Dog, Cat, Dog. Column2: lis, moli, brauni. '"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: pet names"))
   
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [1002]:
#ROLE + (StepByStep)INSTRUCTIONS + EXAMPLE CONTEXT
import random

nr="one"
prompt_name = "r2+i2+s_b_s.1+cTEST"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    index = random.randint(0, len(train_examples)-1)
    messages.append(SystemMessage(content=f"CONTEXT: Classify the relationship between these table columns: {train_examples[index]}"))
    
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [1022]:
#ROLE + (StepByStep)INSTRUCTIONS + EXAMPLE CONTEXT  (2)
import random

nr="one"
prompt_name = "r2+i2+s_b_s.1+cTEST2"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    index = random.randint(0, len(train_examples)-1)
    messages.append(SystemMessage(content=f"CONTEXT: Classify the relationship between these table columns: {train_examples[index]}, {train_example_labels[index]}"))

    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

FIVE SHOT CPA TABLE

In [None]:
import random
#Five-shot table + instructions + roles   # NOT M

nr="five"
prompt_name = "r1+i"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two or more columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}. Notice, you must not classify Column 1!"))
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a relationship that best represents the relationship between that column and the first column of the table. 4. Answer with only one selected relationship for each column with the format Column2: relationship. Don't return any relationship for the first column! 5. Answer only with labels from the provided label set!"))
    

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE
import random

nr="five"
prompt_name = "r3"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to predict the relationship between the first column with the rest of the columns from a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE
import random

nr="five"
prompt_name = "r4"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table, note that you must not annotate Column 1 and must annotate the rest with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE + INSTRUCTIONS
import random

nr="five"
prompt_name = "r2+i2"

preds = []
for example in examples:
    messages = []
    

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#ROLE + INSTRUCTIONS + StepByStep
import random

nr="five"
prompt_name = "r2+i2+s_b_s"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS + StepByStep
import random

nr="five"
prompt_name = "r2+i2+s_b_s.1"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS + MOTIVATION
import random

nr="five"
prompt_name = "r2+i2+m"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS + StepByStep + MOTIVATION
import random

nr="five"
prompt_name = "r2+i2+s_b_s+m"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content="Let's think step by step."))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + StepByStep + MOTIVATION + INSTRUCTIONS 
import random

nr="five"
prompt_name = "r+s_b_s+m+i2"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your answer is very important. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))

    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + INSTRUCTIONS (motivation) 
import random

nr="five"
prompt_name = "r+i2(m)"

preds = []
for example in examples:
    messages = []
     
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + CONTEXT 
import random

nr="five"
prompt_name = "r+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


In [None]:
#ROLE + (StepByStep)INSTRUCTIONS + CONTEXT
import random

nr="five"
prompt_name = "r2+i2+s_b_s.1+c1"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas."))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to label the columns of a given table, aiming to identify the correlation of each column with the initial column in the table."))
     
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


and dis

In [None]:
#ROLE + INSTRUCTIONS(MOTIVATION) + CONTEXT + EXAMPLE
import random

nr="five"
prompt_name = "r+i2(m)+c.example"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1.Review the provided input and organize it into a table format. 2.Carefully examine the values within each cell of the table. 3.For every column in the table, choose a single label that best represents the relationship between that column and the first column of the table. 4. Note, you must not annotate Column 1!. 5.Answer with your selected label for each column, following the format: 'Column2: SelectedLabel, Column3: SelectedLabel, ...'. 6. Ensure that you use ONLY ONE label from the provided label set for each column, and separate your responses with commas. 7. Your answer is very important. Take your time and think well before answering!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, here is an example how you could solve a CPA task: 'Classify the relationship between these table columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni. '"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: pet names"))
     

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify the relationship between these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)


Save predictions

In [1088]:
preds

['Column 2: manufacturer\nColumn 3: availability\nColumn 4: price',
 'Column 2: event title\nColumn 3: event details',
 'Column 2: headline\nColumn 3: organizer\nColumn 4: email',
 'Column 2: contact number\nColumn 3: address country\nColumn 4: postal code',
 'Column 2: director\nColumn 3: performer\nColumn 4: image',
 'Column 2: rating value\nColumn 3: content rating\nColumn 4: genre\nColumn 5: director',
 'Column 2: nationality\nColumn 3: job title',
 'Column 2: birth date\nColumn 3: gender\nColumn 4: job title',
 'Column 2: job title\nColumn 3: telephone\nColumn 4: gender',
 'Column 2: description\nColumn 3: in album\nColumn 4: item condition',
 'Column 2: author\nColumn 3: price currency\nColumn 4: publisher\nColumn 5: item condition\nColumn 6: price',
 'Column 2: author\nColumn 3: description\nColumn 4: headline',
 'Column 2: start date\nColumn 3: location\nColumn 4: event attendance mode\nColumn 5: valid from\nColumn 6: event status',
 'Column 2: description, Column 3: descriptio

In [44]:
""" import os
directory = f'Predictions/{model_name}/column/zero-shot'
os.makedirs(directory, exist_ok=True) """

In [1089]:
file_name=f'Predictions/{model_name}/table/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

## Evaluation

In [1091]:
predictions = []
i=0
for j, table_preds in enumerate(preds):
   
    table_number = len(test[j][2])
    
    if "Class:" in table_preds:
        table_preds = table_preds.split("Class:")[1]
      

    if ":" in table_preds or "-" in table_preds:
        if ":" in table_preds:
            separator = ":"
            start = 1
            end = table_number+1
        else:
            separator = "-"  
            start = 1
            end = table_number+1
    else:
        separator = ","
        start = 0
        end = table_number
        
    col_preds = table_preds.split(separator)[start:end]
    
    for pred in col_preds:
        i+=1
     
        if "\n" in pred:
            pred = pred.split('\n')[0].strip()
        if "," in pred:
            pred = pred.split(",")[0].strip()
   
        if '(' in pred:
            pred = pred.split("(")[0].strip()
       
        if '.' in pred:
            pred = pred.split(".")[0].strip()
        pred = pred.strip().lower()
        
        if pred in text_to_label:
            predictions.append(text_to_label[pred])
        else:
            print(f"For test example {i} out of label space prediction: {pred}")
            predictions.append('-')
  
    if len(col_preds) < table_number:
        for m in range(0, table_number-len(col_preds)):
            predictions.append('-')
            i+=1

For test example 4 out of label space prediction: event title
For test example 5 out of label space prediction: event details
For test example 9 out of label space prediction: contact number
For test example 86 out of label space prediction: alma mater
For test example 88 out of label space prediction: product id
For test example 90 out of label space prediction: width
For test example 91 out of label space prediction: height
For test example 150 out of label space prediction: product id
For test example 153 out of label space prediction: product id
For test example 154 out of label space prediction: product id
For test example 173 out of label space prediction: check-out time
For test example 221 out of label space prediction: height
For test example 229 out of label space prediction: calories
For test example 230 out of label space prediction: fat
For test example 231 out of label space prediction: protein
For test example 232 out of label space prediction: carbohydrates
For test exa

In [1093]:
predictions

['manufacturer',
 'availability',
 'priceRange',
 '-',
 '-',
 'headline',
 'organizer',
 'email',
 '-',
 'addressCountry',
 'postalCode',
 'director',
 'performer',
 'image',
 'ratingValue',
 'contentRating',
 'genre',
 'director',
 'nationality',
 'jobTitle',
 'birthDate',
 'gender',
 'jobTitle',
 'jobTitle',
 'telephone',
 'gender',
 'description',
 'inAlbum',
 'itemCondition',
 'author',
 'priceCurrency',
 'publisher',
 'itemCondition',
 'priceRange',
 'author',
 'description',
 'headline',
 'startDate',
 'location',
 'eventAttendanceMode',
 'validFrom',
 'eventStatus',
 'description',
 'description',
 'description',
 'description',
 'description',
 'description',
 'description',
 'location',
 'checkInTime',
 'inLanguage',
 'priceRange',
 'brand',
 'openingHours',
 'image',
 'email',
 'availability',
 'ratingValue',
 'bestRating',
 'review',
 'priceCurrency',
 '-',
 'duration',
 'contentRating',
 'productionCompany',
 'director',
 'duration',
 'releaseDate',
 'contentRating',
 'perf

In [185]:
def map_cpa_to_labels(preds, test, text_to_label):
    # Map predictions to label space
    predictions = []
    i=0
    num = 0
    for j, table_preds in enumerate(preds):
        # How many columns does the table have? : To control cases when less/more classes are returned
        table_number = len(test[j][2])
        
        if "Class:" in table_preds:
            table_preds = table_preds.split("Class:")[1]
        
        #Break predictions into either \n or ,
        if ":" in table_preds or "-" in table_preds:
            if ":" in table_preds:
                separator = ":"
                start = 1
                end = table_number+1
            else:
                separator = "-"  
                start = 1
                end = table_number+1
        else:
            separator = ","
            start = 0
            end = table_number
            
        col_preds = table_preds.split(separator)[start:end]
        
        for pred in col_preds[:table_number-1]:
            i+=1
            
            # Remove break lines
            if "\n" in pred:
                pred = pred.split('\n')[0].strip()
            # Remove commas
            if "," in pred:
                pred = pred.split(",")[0].strip()
            # Remove paranthesis
            if '(' in pred:
                pred = pred.split("(")[0].strip()
            #Remove points
            if '.' in pred:
                pred = pred.split(".")[0].strip()

            #Remove punctuation
            pred = re.sub(r'[^\w\s]','',pred)
            # Lower-case prediction
            pred = pred.strip().lower()
            
            if pred in text_to_label:
                predictions.append(text_to_label[pred])
            else:
                fin = ""
                for la in text_to_label:
                    if la in pred:
                        fin = la
                        break
                if fin == "":
                    fin2 = ""
                    for la in text_to_label:
                        if pred in la:
                            fin2 = la
                            break
                    
                    if fin2=="":
                        # print(f"For test example {i} out of label space prediction: {pred}")
                        predictions.append('-')
                        num +=1
                    else:
                        predictions.append(text_to_label[fin2])
                else:
                    predictions.append(text_to_label[fin])
        
        # If more/less predictions for table
        if len(col_preds) < table_number-1:
            for m in range(0, table_number-1-len(col_preds)):
                predictions.append('-')
                num +=1
                i+=1
    return predictions, num

### Calculate Precision, Recall, Macro-F1 and Micro-F1

In [1103]:
def calculate_f1_scores(y_tests, y_preds, num_classes, types):

    y_tests = [types.index(y) for y in y_tests]
    y_preds = [types.index(y) for y in y_preds]
    
    
    cm = np.zeros(shape=(num_classes,num_classes))
    
    for i in range(len(y_tests)):
        cm[y_preds[i]][y_tests[i]] += 1
        
    report = {}
    
    for j in range(len(cm[0])):
        report[j] = {}
        report[j]['FN'] = 0
        report[j]['FP'] = 0
        report[j]['TP'] = cm[j][j]

        for i in range(len(cm)):
            if i != j:
                report[j]['FN'] += cm[i][j]
        for k in range(len(cm[0])):
            if k != j:
                report[j]['FP'] += cm[j][k]

        precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
        recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])
        f1 = 2*precision*recall / (precision + recall)
        
        if np.isnan(f1):
            f1 = 0
        if np.isnan(precision):
            f1 = 0
        if np.isnan(recall):
            f1 = 0

        report[j]['p'] =  precision
        report[j]['r'] =  recall
        report[j]['f1'] = f1
    
    all_fn = 0
    all_tp = 0
    all_fp = 0

    for r in report:
        if r != num_classes-1:
            all_fn += report[r]['FN']
            all_tp += report[r]['TP']
            all_fp += report[r]['FP']
        
    class_f1s = [ report[class_]['f1'] for class_ in report]
    class_p = [ 0 if np.isnan(report[class_]['p']) else report[class_]['p'] for class_ in report]
    class_r = [ 0 if np.isnan(report[class_]['r']) else report[class_]['r'] for class_ in report]
    macro_f1 = sum(class_f1s[:-1]) / (num_classes-1)
    
    p =  sum(class_p[:-1]) / (num_classes-1)
    r =  sum(class_r[:-1]) / (num_classes-1)
    micro_f1 = all_tp / ( all_tp + (1/2 * (all_fp + all_fn) )) 
    
    per_class_eval = {}
    for index, t in enumerate(types[:-1]):
        per_class_eval[t] = {"Precision":class_p[index], "Recall": class_r[index], "F1": class_f1s[index]}
    
    evaluation = {
        "Micro-F1": micro_f1,
        "Macro-F1": macro_f1,
        "Precision": p,
        "Recall": r
    }
    
    return [ evaluation, per_class_eval]


In [1104]:
types = list(set(labels))
types = types + ["-"] if "-" in predictions else types
evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)

  precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
  f1 = 2*precision*recall / (precision + recall)
  recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])


In [1105]:
evaluation

{'Micro-F1': 0.6209016393442623,
 'Macro-F1': 0.570189806116365,
 'Precision': 0.606314291621309,
 'Recall': 0.57524784885896}

In [1109]:
messages

[SystemMessage(content='You are a great Table Annotation Specialist and your task is to classify the relationship between the first column and the rest of the columns of a given table with ONLY ONE of the following labels that are separated with comma: cook time, event status, recipe instructions, employment type, currencies accepted, number of tracks, valid from, check-in time, start date, headline, best rating, episode number, worst rating, date published, unit text, manufacturer, color, part of series, genre, availability, rating value, death date, book edition, opens, family name, product ID, item condition, in language, closes, material, checkout time, cooking method, event attendance mode, region of address, isbn, longitude, review, suitable for diet, number of pages, in album, price, weight, location, given name, address, recipe category, nationality, book format, recipe ingredient, brand, email, price currency, available delivery method, date posted, price range, by artist, cop

In [1107]:
per_class_eval

{'cookTime': {'Precision': 0, 'Recall': 0.0, 'F1': 0},
 'eventStatus': {'Precision': 0.5714285714285714,
  'Recall': 0.8,
  'F1': 0.6666666666666666},
 'recipeInstructions': {'Precision': 0.16666666666666666,
  'Recall': 0.25,
  'F1': 0.2},
 'employmentType': {'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0},
 'currenciesAccepted': {'Precision': 0, 'Recall': 0.0, 'F1': 0},
 'numTracks': {'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0},
 'validFrom': {'Precision': 0.0, 'Recall': 0.0, 'F1': 0},
 'checkInTime': {'Precision': 0.3333333333333333, 'Recall': 0.5, 'F1': 0.4},
 'startDate': {'Precision': 0.5833333333333334,
  'Recall': 0.7,
  'F1': 0.6363636363636365},
 'headline': {'Precision': 0.0, 'Recall': 0.0, 'F1': 0},
 'bestRating': {'Precision': 0.625,
  'Recall': 0.5555555555555556,
  'F1': 0.5882352941176471},
 'episodeNumber': {'Precision': 0.4, 'Recall': 1.0, 'F1': 0.5714285714285715},
 'worstRating': {'Precision': 0.0, 'Recall': 0.0, 'F1': 0},
 'datePublished': {'Precision': 0.5, 'Recall': 

## Error Analysis

In [1108]:
errors = 0
for i in range(len(predictions)):
    if predictions[i] != labels[i]:
        errors += 1
        print(f"Predicted as {predictions[i]} when it was {labels[i]}")
errors

Predicted as manufacturer when it was publisher
Predicted as priceRange when it was price
Predicted as - when it was eventStatus
Predicted as - when it was eventAttendanceMode
Predicted as headline when it was organizer
Predicted as organizer when it was telephone
Predicted as - when it was faxNumber
Predicted as director when it was actor
Predicted as performer when it was director
Predicted as ratingValue when it was actor
Predicted as inAlbum when it was category
Predicted as itemCondition when it was availability
Predicted as priceRange when it was price
Predicted as description when it was headline
Predicted as headline when it was text
Predicted as startDate when it was endDate
Predicted as location when it was address
Predicted as validFrom when it was startDate
Predicted as description when it was longitude
Predicted as description when it was latitude
Predicted as description when it was ratingValue
Predicted as description when it was worstRating
Predicted as description when

206

### Re-load previous preds files

In [1110]:
with open(f'Predictions/{model_name}/table/{nr}-shot/chat-table-{prompt_name}-{nr}-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [158]:
with open(f'Predictions/{model_name}/table/zero-shot/chat-table-r2-zero-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [1112]:
preds

['Column 2: manufacturer\nColumn 3: availability\nColumn 4: price',
 'Column 2: event title\nColumn 3: event details',
 'Column 2: headline\nColumn 3: organizer\nColumn 4: email',
 'Column 2: contact number\nColumn 3: address country\nColumn 4: postal code',
 'Column 2: director\nColumn 3: performer\nColumn 4: image',
 'Column 2: rating value\nColumn 3: content rating\nColumn 4: genre\nColumn 5: director',
 'Column 2: nationality\nColumn 3: job title',
 'Column 2: birth date\nColumn 3: gender\nColumn 4: job title',
 'Column 2: job title\nColumn 3: telephone\nColumn 4: gender',
 'Column 2: description\nColumn 3: in album\nColumn 4: item condition',
 'Column 2: author\nColumn 3: price currency\nColumn 4: publisher\nColumn 5: item condition\nColumn 6: price',
 'Column 2: author\nColumn 3: description\nColumn 4: headline',
 'Column 2: start date\nColumn 3: location\nColumn 4: event attendance mode\nColumn 5: valid from\nColumn 6: event status',
 'Column 2: description, Column 3: descriptio