In [1]:
import time
import numpy as np
from tqdm import tqdm_notebook
import datasets

from utils.eval_utils import micro_precision, micro_recall
from utils.openai_utils import system_prompt_1, system_prompt_2, get_completion_first_query, get_completion_second_query
from utils.index_utils import load_prop_mapping, load_index, top5_relations

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset_name = 'synthie_text' # 'synthie_code', 'rebel', 'synthie_text_pc', 'synthie_code_pc', 'rebel_pc'
split = "test" # "train", "test", "test_small"

dataset = datasets.load_dataset(f"martinjosifoski/SynthIE", dataset_name, split=split)

In [3]:
def transform_targets(sample):
    targets = []
    for t in sample:
        # print(t)
        rel = "_".join(eval(t['predicate'])['surfaceform'].split())
        head = " ".join(eval(t['subject'])['surfaceform'].split("_"))
        tail =  " ".join(eval(t['object'])['surfaceform'].split("_"))
        targets.append(rel + "(" + head + ", " + tail + ")")

    return targets

In [4]:
transform_targets(dataset['triplets'][0])

["record_label(Pickin' Up the Pieces (Fitz and the Tantrums album), Dangerbird Records)",
 "place_of_publication(Pickin' Up the Pieces (Fitz and the Tantrums album), United States)",
 "genre(Pickin' Up the Pieces (Fitz and the Tantrums album), Neo soul)",
 'founded_by(Dangerbird Records, Jeff Castelaz)']

In [5]:
results = []
targets = []

faiss_index = load_index()
prop_names = load_prop_mapping()

for i in range(0, 20):

    text = dataset[i]['text']
    triplets = get_completion_first_query(system_prompt=system_prompt_1, text=text)
    similar_relations = top5_relations(llm_output=triplets, faiss_index=faiss_index, prop_names=prop_names)
    triplets="\n".join(triplets)
    
    res = get_completion_second_query(text=text, triplets=triplets, similar_relations=similar_relations, system_prompt=system_prompt_2)
    target = transform_targets(dataset['triplets'][i])

    print("TEXT: " + text)
    print("RESULTS: " + "\n".join(res), end='\n')
    print('TARGETS: ' + "\n".join(target), end='\n\n')
    
    results.append(res)
    targets.append(target)
    
    time.sleep(1)

micro_precision(results, targets)

TEXT: Pickin' Up the Pieces (Fitz and the Tantrums album) was published in the United States by Dangerbird Records, a label founded by Jeff Castelaz, and is of the Neo soul genre.
RESULTS: published_in(Pickin' Up the Pieces, United States)
published_by(Pickin' Up the Pieces, Dangerbird Records)
founded_by(Dangerbird Records, Jeff Castelaz)
genre(Pickin' Up the Pieces, Neo soul)
TARGETS: record_label(Pickin' Up the Pieces (Fitz and the Tantrums album), Dangerbird Records)
place_of_publication(Pickin' Up the Pieces (Fitz and the Tantrums album), United States)
genre(Pickin' Up the Pieces (Fitz and the Tantrums album), Neo soul)
founded_by(Dangerbird Records, Jeff Castelaz)

TEXT: Primarily Jazz was recorded at Fullerton College.
RESULTS: recorded_at(Primarily Jazz, Fullerton College)
TARGETS: recording_location(Primarily Jazz, Fullerton College)

TEXT: Njombe Region is an instance of a region in Tanzania, with its capital being Njombe Mjini, which is the capital of Njombe Urban District.

0.2465753424657534

In [8]:
micro_precision(results, targets)

0.2465753424657534

In [9]:
micro_recall(results, targets)

0.24324324324324326