In [1]:
import os
import re
import pickle
import numpy as np
import pandas as pd
from dotenv import dotenv_values
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage

In [2]:
config = dotenv_values('../.env')
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [4]:
labels_to_text = {
    "datePublished": "date published",
    "isbn": "isbn",
    "numberOfPages": "number of pages",
    "worstRating": "worst rating",
    "priceCurrency": "price currency",
    "publisher": "publisher",
    "author": "author",
    "bookFormat": "book format",
    "inLanguage": "in language",
    "ratingValue": "rating value",
    "description": "description",
    "price": "price",
    "image": "image",
    "url": "url",
    "availability": "availability",
    "genre": "genre",
    "bestRating": "best rating",
    "itemCondition": "item condition",
    "review": "review",
    "startDate": "start date",
    "endDate": "end date",
    "location": "location",
    "validFrom": "valid from",
    "eventStatus": "event status",
    "eventAttendanceMode": "event attendance mode",
    "organizer": "organizer",
    "category": "category",
    "validThrough": "valid through",
    "telephone": "telephone",
    "duration": "duration",
    "email": "email",
    "streetAddress": "street address",
    "addressRegion": "region of address",
    "addressLocality": "locality of address",
    "priceRange": "price range",
    "postalCode": "postal code",
    "addressCountry": "address country",
    "faxNumber": "fax number",
    "dateCreated": "date created",
    "director": "director",
    "contentRating": "content rating",
    "actor": "actor",
    "inAlbum": "in album",
    "byArtist": "by artist",
    "jobTitle": "job title",
    "birthDate": "birth date",
    "gender": "gender",
    "nationality": "nationality",
    "weight": "weight",
    "releaseDate": "release date",
    "copyrightYear": "copyright year",
    "bookEdition": "book edition",
    "headline": "headline",
    "text": "text",
    "address": "address",
    "performer": "performer",
    "longitude": "longitude",
    "latitude": "latitude",
    "currenciesAccepted": "currencies accepted",
    "checkoutTime": "checkout time",
    "checkInTime": "check-in time",
    "amenityFeature": "amenity feature",
    "paymentAccepted": "payment accepted",
    "availableLanguage": "available language",
    "brand": "brand",
    "openingHours": "opening hours",
    "datePosted": "date posted",
    "employmentType": "employment type",
    "hiringOrganization": "hiring organization",
    "opens": "opens",
    "dayOfWeek": "day of week",
    "closes": "closes",
    "productionCompany": "production company",
    "countryOfOrigin": "country of origin",
    "numTracks": "number of tracks",
    "track": "track",
    "givenName": "given name",
    "familyName": "family name",
    "birthPlace": "birth place",
    "honorificSuffix": "honorific suffix",
    "alumniOf": "alumni of",
    "deathDate": "death date",
    "measurements": "measurements",
    "unitCode": "unit code",
    "productID": "product ID",
    "unitText": "unit text",
    "availableDeliveryMethod": "available delivery method",
    "model": "model",
    "manufacturer": "manufacturer",
    "color": "color",
    "gtin": "gtin",
    "material": "material",
    "servingSize": "serving size",
    "recipeInstructions": "recipe instructions",
    "recipeIngredient": "recipe ingredient",
    "cookTime": "cook time",
    "prepTime": "prep time",
    "totalTime": "total time",
    "nutrition": "nutrition",
    "recipeCategory": "recipe category",
    "recipeCuisine": "recipe cuisine",
    "cookingMethod": "cooking method",
    "suitableForDiet": "suitable for diet",
    "servesCuisine": "serves cuisine",
    "awayTeam": "away team",
    "homeTeam": "home team",
    "partOfSeries": "part of series",
    "episodeNumber": "episode number"
}

In [4]:
len(labels_to_text)

108

In [5]:
text_to_label = {
    "date published": "datePublished",
    "isbn": "isbn",
    "number of pages": "numberOfPages",
    "worst rating": "worstRating",
    "price currency": "priceCurrency",
    "publisher": "publisher",
    "author": "author",
    "book format": "bookFormat",
    "in language": "inLanguage",
    "rating value": "ratingValue",
    "description": "description",
    "price": "priceRange",
    "image": "image",
    "url": "url",
    "availability": "availability",
    "genre": "genre",
    "best rating": "bestRating",
    "item condition": "itemCondition",
    "review": "review",
    "start date": "startDate",
    "end date": "endDate",
    "location": "location",
    "valid from": "validFrom",
    "event status": "eventStatus",
    "event attendance mode": "eventAttendanceMode",
    "organizer": "organizer",
    "category": "category",
    "valid through": "validThrough",
    "telephone": "telephone",
    "duration": "duration",
    "email": "email",
    "street address": "streetAddress",
    "region of address": "addressRegion",
    "locality of address": "addressLocality",
    "price range": "priceRange",
    "postal code": "postalCode",
    "address country": "addressCountry",
    "fax number": "faxNumber",
    "date created": "dateCreated",
    "director": "director",
    "content rating": "contentRating",
    "actor": "actor",
    "in album": "inAlbum",
    "by artist": "byArtist",
    "job title": "jobTitle",
    "birth date": "birthDate",
    "gender": "gender",
    "nationality": "nationality",
    "weight": "weight",
    "release date": "releaseDate",
    "copyright year": "copyrightYear",
    "book edition": "bookEdition",
    "headline": "headline",
    "text": "text",
    "address": "address",
    "performer": "performer",
    "longitude": "longitude",
    "latitude": "latitude",
    "currencies accepted": "currenciesAccepted",
    "checkout time": "checkoutTime",
    "check-in time": "checkInTime",
    "amenity feature": "amenityFeature",
    "payment accepted": "paymentAccepted",
    "available language": "availableLanguage",
    "brand": "brand",
    "opening hours": "openingHours",
    "date posted": "datePosted",
    "employment type": "employmentType",
    "hiring organization": "hiringOrganization",
    "opens": "opens",
    "day of week": "dayOfWeek",
    "closes": "closes",
    "production company": "productionCompany",
    "country of origin": "countryOfOrigin",
    "number of tracks": "numTracks",
    "track": "track",
    "given name": "givenName",
    "family name": "familyName",
    "birth place": "birthPlace",
    "honorific suffix": "honorificSuffix",
    "alumni of": "alumniOf",
    "death date": "deathDate",
    "measurements": "measurements",
    "unit code": "unitCode",
    "product ID": "productID",
    "unit text": "unitText",
    "available delivery method": "availableDeliveryMethod",
    "model": "model",
    "manufacturer": "manufacturer",
    "color": "color",
    "gtin": "gtin",
    "material": "material",
    "serving size": "servingSize",
    "recipe instructions": "recipeInstructions",
    "recipe ingredient": "recipeIngredient",
    "cook time": "cookTime",
    "prep time": "prepTime",
    "total time": "totalTime",
    "nutrition": "nutrition",
    "recipe category": "recipeCategory",
    "recipe cuisine": "recipeCuisine",
    "cooking method": "cookingMethod",
    "suitable for diet": "suitableForDiet",
    "serves cuisine": "servesCuisine",
    "away team": "awayTeam",
    "home team": "homeTeam",
    "part of series": "partOfSeries",
    "episode number": "episodeNumber"
}

## Load test (and training) set

In [6]:
with open('sotabv2-cpa-train-column.pkl', "rb") as f:
    train = pickle.load(f)
with open('sotabv2-cpa-sample-test-column.pkl', "rb") as f:
    test = pickle.load(f) 
examples = [example[2] for example in test ]     
labels = [example[3] for example in test ]

train_examples = [ example[2] for example in train ]
train_labels = [ labels_to_text[example[3]] for example in train ]

In [7]:
len(train)

109994

In [8]:
len(test)

509

In [9]:
labels_joined = ", ".join([labels_to_text[label] for label in list(set(labels))])   
print(labels_joined)

rating value, model, worst rating, unit code, location, recipe instructions, best rating, total time, measurements, payment accepted, price range, alumni of, recipe ingredient, valid from, cooking method, release date, serving size, recipe category, employment type, manufacturer, telephone, production company, copyright year, end date, available language, family name, checkout time, death date, honorific suffix, actor, price, color, serves cuisine, headline, weight, url, prep time, in album, postal code, part of series, gender, book format, performer, given name, by artist, date created, review, author, isbn, check-in time, number of tracks, fax number, availability, address, available delivery method, description, event attendance mode, nutrition, episode number, away team, unit text, gtin, closes, longitude, home team, amenity feature, date published, date posted, region of address, birth place, address country, day of week, nationality, hiring organization, recipe cuisine, start dat

In [10]:
model_name = 'gpt-3.5-turbo-1106'
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model=model_name, max_tokens=4)   #max_tokens=4

## Choose setup: zero-shot, one-shot or five-shot

CPA COLUMN


ZERO-SHOT

In [None]:
#role
nr="zero"
prompt_name = "r"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))

    messages.append(HumanMessage(content=f"Classify this relation: {example}"))

    print(messages)
    res = chat(messages)
    preds.append(res.content)

In [37]:
#role + instructions 
nr="zero"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))

    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [51]:
#role + instructions + step by step
nr="zero"
prompt_name = "r+i+s_b_s"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))

    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [68]:
#role + instructions + motivation
nr="zero"
prompt_name = "r+i+m"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [86]:
#role + instructions (motivation as instruction)
nr="zero"
prompt_name = "r+i(m)"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation! 6. Your answer is very important. Take your time and think well before answering!"))
 
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions + CONTEXT
nr="zero"
prompt_name = "r+i+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to annotate the relation between two given columns."))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

ONE-SHOT

In [150]:
#role
import random

nr="one"
prompt_name = "r"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [262]:
#role
import random

nr="one"
prompt_name = "r2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions
import random 
 
nr="one"
prompt_name = "r+i"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions  P2
import random 
 
nr="one"
prompt_name = "r+i2"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between Column 2 with Column 1. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [189]:
#role + instructions + step by step
import random

nr="one"
prompt_name = "r+i+s_b_s"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo-1106 in organization org-JnRe4IF9kM1kFkJzhKlurHyV on tokens per min (TPM): Limit 60000, Used 59586, Requested 669. Please try again in 255ms. Visit https://platform.openai.com/account/rate-limits to learn more..


In [None]:
#role + instructions + motivation
import random

nr="one"
prompt_name = "r+i+m"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions + CONTEXT
import random

nr="one"
prompt_name = "r+i+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to annotate the relation between two given columns."))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [10]:
#role + instructions + CONTEXT example
import random

nr="one"
prompt_name = "r2+i2+c.example"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between Column 2 with Column 1. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, here is an example how you could solve a CPA task: 'Classify the relationship between these two columns: Columm1: Dog, Cat, Dog.  Column2: lis, moli, brauni.'"
                                  "First we check Columm1: Dog, Cat, Dog."   
                                  "Now we check Column2: lis, moli, brauni. Analyze Column 2 in relation to Column 1. Predict the relation between Column 2 and Column 1"
                                  "Answer: Column 2: pet names"))
    
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
#role + instructions + CONTEXT test
import random

nr="one"
prompt_name = "r2+i2+c.test"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between Column 2 with Column 1. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
  
    messages.append(SystemMessage(content=f"CONTEXT: Classify this relation: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

FIVE-SHOT

In [23]:
#role
import random

nr= "five"
prompt_name = "r2"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [41]:
#role + instructions
import random 
 
nr= "five"
prompt_name = "r2+i2"

preds = []
for example in examples:
    messages = []

    #messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between two columns with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    #messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between these two columns. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between Column 2 with Column 1. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


In [55]:
#role + instructions + step by step
import random

nr= "five"
prompt_name = "r2+i2+s_b_s"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Let's think step by step. Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between Column 2 with Column 1. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [75]:
#role + instructions + motivation
import random

nr= "five"
prompt_name = "r2+i2+m"

preds = []
for example in examples:
    messages = []

    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between Column 2 with Column 1. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    messages.append(SystemMessage(content="Your answer is very important. Take your time and think well before answering!"))

    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


In [92]:
#role + instructions + CONTEXT
import random

nr= "five"
prompt_name = "r2+i2+c"

preds = []
for example in examples:
    messages = []
    
    messages.append(SystemMessage(content=f"You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: {labels_joined}."))
    messages.append(SystemMessage(content="Your instructions are: 1. Review the provided column values. 2. Carefully examine the values of the two columns. 3. Select a single label that best represents the relationship between Column 2 with Column 1. 4. Answer with your final selected label. 5. Ensure that you answer with ONLY ONE label from the provided label-set for each relation!"))
    messages.append(SystemMessage(content=f"CONTEXT: Column Property Annotation is a sub-task of Table Annotation and refers to predicting the semantic relation between two or more columns. You have the same task, you are required to annotate the relation between two given columns."))
    
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify this relation: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify this relation: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [95]:
preds

['description',
 'availability',
 'It seems that the',
 'event status',
 'event status',
 'in album',
 'telephone',
 'email',
 'telephone',
 'country of origin',
 'product ID',
 'actor',
 'director',
 'image',
 'actor',
 'content rating',
 'genre',
 'director',
 'nationality',
 'job title',
 'birth date',
 'gender',
 'The relationship between Column',
 'job title',
 'telephone',
 'gender',
 'category',
 'product ID',
 'item condition',
 'author',
 'price',
 'publisher',
 'availability',
 'number of pages',
 'by artist',
 'in language',
 'author',
 'date posted',
 'address',
 'event status',
 'date created',
 'in language',
 'latitude',
 'latitude',
 'rating value',
 'rating value',
 'rating value',
 'availability',
 'availability',
 'location',
 'check-in time',
 'in language',
 'price range',
 'location',
 'opening hours',
 'image',
 'email',
 'opening hours',
 'rating value',
 'rating value',
 'rating value',
 'alumni of',
 'job title',
 'duration',
 'content rating',
 'production co

In [96]:
file_name=f'Predictions/{model_name}/column/{nr}-shot/chat-column-{prompt_name}-{nr}-shot.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

## Evaluation

In [98]:
predictions = []
for i, pred in enumerate(preds):
    from_sent = re.findall('"([^"]*)"',pred)
    if len(from_sent) == 0:
        if ":" in pred:
            pred = pred.split(':')[1]
        if "." in pred:
            pred = pred.split('.')[0]
        pred = pred.strip()
        
        if pred in text_to_label:
            predictions.append(text_to_label[pred])
        else:
            if any(label in pred for label in text_to_label):
                for label in text_to_label:
                    if label in pred:
                        predictions.append(text_to_label[label])
                        break
            else:
                print(f"For test example {i} out of label space prediction: {pred}")
                predictions.append('-')
    else:
        if from_sent[0] in text_to_label:
            predictions.append(text_to_label[from_sent[0]])
        else:
            print(f"For test example {i} out of label space prediction: {pred}")
            predictions.append('-')

For test example 2 out of label space prediction: It seems that the
For test example 22 out of label space prediction: The relationship between Column
For test example 72 out of label space prediction: This relation can be
For test example 172 out of label space prediction: This relation is best
For test example 192 out of label space prediction: This relation is best
For test example 220 out of label space prediction: height
For test example 262 out of label space prediction: album
For test example 288 out of label space prediction: It seems that the
For test example 341 out of label space prediction: It seems that the
For test example 386 out of label space prediction: page number
For test example 428 out of label space prediction: album
For test example 431 out of label space prediction: It seems that the
For test example 433 out of label space prediction: album
For test example 439 out of label space prediction: album
For test example 475 out of label space prediction: delivery met

In [113]:
predictions

['description',
 'availability',
 '-',
 'eventStatus',
 'eventStatus',
 'inAlbum',
 'telephone',
 'email',
 'telephone',
 'countryOfOrigin',
 'productID',
 'actor',
 'director',
 'image',
 'actor',
 'contentRating',
 'genre',
 'director',
 'nationality',
 'jobTitle',
 'birthDate',
 'gender',
 '-',
 'jobTitle',
 'telephone',
 'gender',
 'category',
 'productID',
 'itemCondition',
 'author',
 'priceRange',
 'publisher',
 'availability',
 'numberOfPages',
 'byArtist',
 'inLanguage',
 'author',
 'datePosted',
 'address',
 'eventStatus',
 'dateCreated',
 'inLanguage',
 'latitude',
 'latitude',
 'ratingValue',
 'ratingValue',
 'ratingValue',
 'availability',
 'availability',
 'location',
 'checkInTime',
 'inLanguage',
 'priceRange',
 'location',
 'openingHours',
 'image',
 'email',
 'openingHours',
 'ratingValue',
 'ratingValue',
 'ratingValue',
 'alumniOf',
 'jobTitle',
 'duration',
 'contentRating',
 'productionCompany',
 'alumniOf',
 'duration',
 'releaseDate',
 'contentRating',
 'alumniO

### Calculate Precision, Recall, Macro-F1 and Micro-F1

In [101]:
def calculate_f1_scores(y_tests, y_preds, num_classes, types):

    y_tests = [types.index(y) for y in y_tests]
    y_preds = [types.index(y) for y in y_preds]
    
  
    cm = np.zeros(shape=(num_classes,num_classes))
    
    for i in range(len(y_tests)):
        cm[y_preds[i]][y_tests[i]] += 1
        
    report = {}
    
    for j in range(len(cm[0])):
        report[j] = {}
        report[j]['FN'] = 0
        report[j]['FP'] = 0
        report[j]['TP'] = cm[j][j]

        for i in range(len(cm)):
            if i != j:
                report[j]['FN'] += cm[i][j]
        for k in range(len(cm[0])):
            if k != j:
                report[j]['FP'] += cm[j][k]

        precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
        recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])
        f1 = 2*precision*recall / (precision + recall)
        
        if np.isnan(f1):
            f1 = 0
        if np.isnan(precision):
            f1 = 0
        if np.isnan(recall):
            f1 = 0

        report[j]['p'] =  precision
        report[j]['r'] =  recall
        report[j]['f1'] = f1
    
    all_fn = 0
    all_tp = 0
    all_fp = 0

    for r in report:
        if r != num_classes-1:
            all_fn += report[r]['FN']
            all_tp += report[r]['TP']
            all_fp += report[r]['FP']
        
    class_f1s = [ report[class_]['f1'] for class_ in report]
    class_p = [ 0 if np.isnan(report[class_]['p']) else report[class_]['p'] for class_ in report]
    class_r = [ 0 if np.isnan(report[class_]['r']) else report[class_]['r'] for class_ in report]
    macro_f1 = sum(class_f1s[:-1]) / (num_classes-1)
    
    p =  sum(class_p[:-1]) / (num_classes-1)
    r =  sum(class_r[:-1]) / (num_classes-1)
    micro_f1 = all_tp / ( all_tp + (1/2 * (all_fp + all_fn) )) 
    
    per_class_eval = {}
    for index, t in enumerate(types[:-1]):
        per_class_eval[t] = {"Precision":class_p[index], "Recall": class_r[index], "F1": class_f1s[index]}
    
    evaluation = {
        "Micro-F1": micro_f1,
        "Macro-F1": macro_f1,
        "Precision": p,
        "Recall": r
    }
    
    return [ evaluation, per_class_eval]


In [102]:
types = list(set(labels))
types = types + ["-"] if "-" in predictions else types
evaluation, per_class_eval = calculate_f1_scores(labels, predictions, len(types), types)

  precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
  f1 = 2*precision*recall / (precision + recall)
  recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])


In [105]:
evaluation

{'Micro-F1': 0.5583250249252243,
 'Macro-F1': 0.493482587069188,
 'Precision': 0.5581566128134755,
 'Recall': 0.5307669953503287}

In [106]:
messages

[SystemMessage(content='You are a great Table Annotation Specialist and your task is to classify the relationship between Column 1 and Column 2 with ONLY ONE of the following labels that are separated with comma: director, price range, nutrition, serves cuisine, alumni of, manufacturer, number of pages, description, day of week, address, category, by artist, honorific suffix, recipe category, gender, available language, valid from, date posted, end date, longitude, recipe cuisine, checkout time, gtin, currencies accepted, country of origin, opening hours, family name, address country, job title, price, genre, given name, headline, content rating, street address, suitable for diet, nationality, author, payment accepted, copyright year, review, image, location, color, email, check-in time, closes, amenity feature, event status, product ID, date created, date published, worst rating, total time, cooking method, valid through, brand, release date, employment type, text, start date, price c

In [107]:
per_class_eval

{'director': {'Precision': 1.0,
  'Recall': 0.7272727272727273,
  'F1': 0.8421052631578948},
 'priceRange': {'Precision': 0.5, 'Recall': 1.0, 'F1': 0.6666666666666666},
 'nutrition': {'Precision': 1.0, 'Recall': 0.3333333333333333, 'F1': 0.5},
 'servesCuisine': {'Precision': 0.5, 'Recall': 1.0, 'F1': 0.6666666666666666},
 'alumniOf': {'Precision': 0.18181818181818182,
  'Recall': 1.0,
  'F1': 0.3076923076923077},
 'manufacturer': {'Precision': 0.2727272727272727,
  'Recall': 1.0,
  'F1': 0.42857142857142855},
 'numberOfPages': {'Precision': 0.5555555555555556,
  'Recall': 1.0,
  'F1': 0.7142857142857143},
 'description': {'Precision': 0.75, 'Recall': 0.5, 'F1': 0.6},
 'dayOfWeek': {'Precision': 1.0, 'Recall': 0.5, 'F1': 0.6666666666666666},
 'address': {'Precision': 0.14285714285714285,
  'Recall': 0.4,
  'F1': 0.21052631578947364},
 'category': {'Precision': 0.2857142857142857,
  'Recall': 0.2857142857142857,
  'F1': 0.2857142857142857},
 'byArtist': {'Precision': 0.5, 'Recall': 0.166

## Error Analysis

In [109]:
errors = 0
for i in range(len(predictions)):
    if predictions[i] != labels[i]:
        errors += 1
        print(f"Predicted as {predictions[i]} when it was {labels[i]}")
errors

Predicted as description when it was publisher
Predicted as - when it was price
Predicted as eventStatus when it was eventAttendanceMode
Predicted as inAlbum when it was organizer
Predicted as telephone when it was faxNumber
Predicted as countryOfOrigin when it was addressCountry
Predicted as productID when it was postalCode
Predicted as - when it was jobTitle
Predicted as category when it was description
Predicted as productID when it was category
Predicted as priceRange when it was priceCurrency
Predicted as numberOfPages when it was price
Predicted as byArtist when it was author
Predicted as inLanguage when it was headline
Predicted as author when it was text
Predicted as datePosted when it was endDate
Predicted as eventStatus when it was eventAttendanceMode
Predicted as dateCreated when it was startDate
Predicted as inLanguage when it was eventStatus
Predicted as latitude when it was longitude
Predicted as ratingValue when it was worstRating
Predicted as ratingValue when it was bes

229

### Re-load previous preds files

In [110]:
with open(f'Predictions/{model_name}/column/{nr}-shot/chat-column-{prompt_name}-{nr}-shot.pkl', "rb") as f:
    preds = pickle.load(f)

In [111]:
preds

['description',
 'availability',
 'It seems that the',
 'event status',
 'event status',
 'in album',
 'telephone',
 'email',
 'telephone',
 'country of origin',
 'product ID',
 'actor',
 'director',
 'image',
 'actor',
 'content rating',
 'genre',
 'director',
 'nationality',
 'job title',
 'birth date',
 'gender',
 'The relationship between Column',
 'job title',
 'telephone',
 'gender',
 'category',
 'product ID',
 'item condition',
 'author',
 'price',
 'publisher',
 'availability',
 'number of pages',
 'by artist',
 'in language',
 'author',
 'date posted',
 'address',
 'event status',
 'date created',
 'in language',
 'latitude',
 'latitude',
 'rating value',
 'rating value',
 'rating value',
 'availability',
 'availability',
 'location',
 'check-in time',
 'in language',
 'price range',
 'location',
 'opening hours',
 'image',
 'email',
 'opening hours',
 'rating value',
 'rating value',
 'rating value',
 'alumni of',
 'job title',
 'duration',
 'content rating',
 'production co