In [1]:
import datasets
import json
import numpy as np
import pandas as pd
import random
import re
import time

from ast import literal_eval
from pathlib import Path
from SPARQLWrapper import SPARQLWrapper, RDF, JSON

from amrlib.evaluate.smatch_enhanced import compute_scores, compute_smatch

from transformers import OpenAIGPTTokenizer

import os
import openai

In [None]:
# Notes: gpt4-1106 improves on the task of hallucination detection vs gpt4...

In [2]:
make_call = True

write_data = True

data_path = Path('../data/gpt4-prompts-joint-amr-sparql-qald9-no-oracle_2023-10.json')


In [3]:
# Setting DBPedia endpoint
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

print(sparql.returnFormat)

xml


In [10]:
openai.organization = "your-org"

with open('your-api-2023-key') as f:
    openai.api_key = f.read().strip()

#openai.Engine.list()

In [5]:
with open(data_path, 'r') as f:
    data = json.load(f)
    
len(data)

49

In [6]:
def executability_sparql(query):

    #print(query)

    try:
        sparql.setQuery(query)
        results = sparql.query().convert()
        #print(results.toxml())

        pattern = r'<results distinct="false" ordered="true">\s*</results>'
        match = re.search(pattern, results.toxml())
        print("SPARQL well-formed")
        if not match:
            print("SPARQL RESULT")

    except:
        print("MALFORMED QUERY")

In [11]:
def send_prompt(MESSAGES: list) -> str:

    completion = openai.ChatCompletion.create(
      #model="gpt-3.5-turbo",
      model="gpt-4-1106-preview",
      messages=MESSAGES,
      temperature=1.0,
      max_tokens=512,
      frequency_penalty=0.0,
      presence_penalty=0.0,
    )

    return completion.choices[0].message['content']

In [12]:
completed_data = []

random_indices = random.sample(range(len(data)), 10)

if make_call:
    
    for i in random_indices:
        
        if data[i]['rel_excluded']:
        
            thisPrompt = data[i]['messages']

            completed = False

            while not completed:

                try:
                    completion = send_prompt(thisPrompt)
                    completed = True
                except Exception:
                    print('Waiting 2 secs')
                    print()
                    time.sleep(2)

            print('*****')
            print(f"Question: {data[i]['question']}")
            print(f"Gold AMR: {data[i]['gold_amr']}")
            print()
            print(f"Gold SPARQL: {data[i]['gold_sparql']}")
            print()
            if len(data[i]['rel_excluded'])>0:
                print(f"Rel excluded: {data[i]['rel_excluded']}")
            print()
            print(completion)
            print()

            try: 

                literal_completion = literal_eval(completion)
                data[i]['completion'] = literal_completion

                for item in literal_completion:
                    if item['type']=='AMR':
                        precision, recall, f_score = compute_smatch([item['prediction']], [data[i]['gold_amr']])
                        print('Smatch -> P: %.3f,  R: %.3f,  F: %.3f' % (precision, recall, f_score))
                        print()

                    else:
                        if data[i]['rel_excluded'] in item['prediction']:
                            print(f"HALLUCINATION! {data[i]['rel_excluded']} in SPARQL")
                        else:
                            print("Yeah, no hallucination!")
                        
                        # check well-formedness
                        executability_sparql(item['prediction'])

            except Exception as e:
                data[i]['completion']=completion
                print('**Error**')
                print()
                data[i]['note']='error'

            completed_data.append(data[i])

            time.sleep(2)



*****
Question: wem gehören die universal studios
Gold AMR: (o / own-01 :ARG0 (a / amr-unknown) :ARG1 (c / company :name (n / name :op1 "Universal" :op2 "Studios")))

Gold SPARQL: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> SELECT DISTINCT ?uri WHERE { res:Universal_Studios dbo:owner ?uri }

Rel excluded: owner

[{'rank': 1, 'prediction': 'PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> SELECT ?owner WHERE { res:Universal_Studios dbo:owner ?owner }', 'type': 'SPARQL', 'confidence': 0.62, 'relations': ['owner'], 'verification': [True]}, {'rank': 2, 'prediction': '(o / own-01 :ARG0 (p / person :ARG0-of (h / have-org-role-91 :ARG1 (c / company :name (n / name :op1 "Universal" :op2 "Studios")))) :ARG1 c)', 'type': 'AMR', 'confidence': 0.38, 'relations': [], 'verification': []}]

HALLUCINATION! owner in SPARQL
SPARQL well-formed
Smatch -> P: 0.692,  R: 0.900,  F: 0.783

*****
Question: какие официальные языки на