# QALD Data Question and Query Translation
- Method 0: Answer the QALD-9 questions directly. Instruct ChatGPT to answer the QALD-9 questions directly without querying DBpedia
- Method 1: Translate QALD-9 questions directly. Instruct ChatGPT to translate QALD-9 questions directly, and then query DBpedia using the translated queries.
- Method 2: 1-shot learning from a pair of train question and query. Using the embeddings of the test and train questions to find the most similar train question to the test question. Prompt ChatGPT with the pair of matched train question and query. Instruct ChatGPT to translate a test question to a SPARQL query over DBpedia.
- Method 3: 1-shot learning from a pair of train question and query, and the chain-of-thought of the train query. As in Method 2, include the chain-of-thought of the train query in the prompt, in addition to the pair of matched question and query

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json, re, os, nltk

## Pre-processing QALD Train Data

In [None]:
with open('../data/QALD/9/data/qald-9-train-multilingual.json', 'r') as file:
    train_json = json.load(file)

In [None]:
train_all = pd.DataFrame(train_json['questions'])
train_all

In [None]:
train_all.columns

In [None]:
train_all.question[39]

### Flatten the QALD train data

In [None]:
questions = []
keywords = []
for idx, row in train_all.iterrows():
    question = row['question']
    for q in question:
        try:
            if q['language'] == 'en':
                questions.append(q['string'])
                keywords.append(q['keywords'])
                break
        except:
            print(q)

In [None]:
len(questions), len(keywords)

In [None]:
queries = train_all['query'].apply(lambda r: r['sparql'])

In [None]:
len(queries)

In [None]:
heads = train_all['answers'].apply(lambda r: r[0]).apply(lambda p: p['head']) 

In [None]:
results = train_all['answers'].apply(lambda r: r[0]).apply(lambda p: p['results'])

In [None]:
train_all['question_text'] = questions

In [None]:
train_all['sparql_query'] = queries

In [None]:
train_all['question_keywords'] = keywords

In [None]:
train_all['answer_head'] = heads

In [None]:
train_all['answer_results'] = results

In [None]:
#train_all[['id', 'answertype', 'aggregation', 'onlydbo', 'hybrid', 'question_text', 
#           'question_keywords', 'sparql_query', 'answer_head', 'answer_results', 'question', 
#           'query', 'answers']].to_csv('../data/QALD/9/data/qald-9-train.csv', index=None)

## Pre-processing QALD Test Data

In [None]:
with open('../data/QALD/9/data/qald-9-test-multilingual.json', 'r') as file:
    test_json = json.load(file)

In [None]:
test_all = pd.DataFrame(test_json['questions'])
test_all

In [None]:
test_all.columns

In [None]:
test_all.question[0]

### Flatten the QALD test data

In [None]:
questions = []
keywords = []
for idx, row in test_all.iterrows():
    question = row['question']
    for q in question:
        try:
            if q['language'] == 'en':
                questions.append(q['string'])
                keywords.append(q['keywords'])
                break
        except:
            print(q)

In [None]:
len(questions), len(keywords)

In [None]:
queries = test_all['query'].apply(lambda r: r['sparql'])

In [None]:
len(queries)

In [None]:
heads = test_all['answers'].apply(lambda r: r[0]).apply(lambda p: p['head']) 

In [None]:
results = test_all['answers'].apply(lambda r: r[0]).apply(lambda p: p['results'])

In [None]:
test_all['question_text'] = questions

In [None]:
test_all['sparql_query'] = queries

In [None]:
test_all['question_keywords'] = keywords

In [None]:
test_all['answer_head'] = heads

In [None]:
test_all['answer_results'] = results

In [None]:
#test_all[['id', 'answertype', 'aggregation', 'onlydbo', 'hybrid', 'question_text', 
#           'question_keywords', 'sparql_query', 'answer_head', 'answer_results', 'question', 
#           'query', 'answers']].to_csv('../data/QALD/9/data/qald-9-test.csv', index=None)

## Load the train and test data

In [None]:
train = pd.read_csv('../data/QALD/9/data/qald-9-train.csv')
train.head()

In [None]:
test = pd.read_csv('../data/QALD/9/data/qald-9-test.csv')
test.head()

In [None]:
with open('../data/QALD/9/data/qald-9-test-multilingual.json', 'r') as file:
    test_json = json.load(file)

In [None]:
test_json_df = pd.DataFrame(test_json['questions'])

## Ask Test Questions through GPT-3

In [None]:
import os
openai_key_path = "OPENAI_KEY_PATH"
with open(openai_key_path, 'r') as f:
    openai_key = f.readline()
# My OpenAI Key
os.environ['OPENAI_API_KEY'] = openai_key

In [None]:
import os
import openai

In [None]:
from tqdm import tqdm

In [None]:
gpt_answers = []
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    question_text = row['question_text']
    keywords = row['question_keywords']
    
    answertype = row['answertype']
    answertype_text = ""
    if answertype == 'resource':
        answertype_text = 'resource in DBpedia-2016-04'
    else:
        answertype_text = answertype
    
    answeragg = row['aggregation']
    answeragg_text = "The answers do not need aggregation"
    if answeragg:
        answeragg_text = "The answers need aggregration"
        
    prompt_template = "Use DBpedia-2016-04 knowledge base. \
        Answer the question. No comments. List answers only. \n \
        The keywords in the question are \"{}\". \n\
        Output the answers as {}. \n\
        {}. \n\
        QUESTION: {} \n \
        ANSWER: \n"
    
    prompt = prompt_template.format(keywords, answertype_text, answeragg_text, 
                                    question_text)
    
    
    response = openai.Completion.create(
        #model="text-curie-001",
        #model="text-davinci-003",
        prompt= prompt,
        #prompt=question_text,
        temperature=0.1,
        max_tokens=3500,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    
    gpt_answers.append(response)

In [None]:
len(gpt_answers)

In [None]:
import pickle

In [None]:
#with open('../data/QALD/9/data/qald-test-qpt3-answers-question-keywords.pk', 'wb') as f:
    #pickle.dump(gpt_answers, f)

### Evaluate the GPT_Answers on Test 

In [None]:
with open('../data/QALD/9/data/qald-test-qpt3-answers-question-keywords.pk', 'rb') as f:
    gpt_answers = pickle.load(f)

In [None]:
len(gpt_answers)

In [None]:
# extract the answer terms from gpt_answer_text
gpt_answer_terms = []
for idx, row in test.iterrows():
    answers_text = row['gpt_answers_text']
    terms = answers_text.replace('http://dbpedia.org/resource/', '').replace('dbo:', '').strip().lower().split('\n')
    #terms = answers_text.strip().lower().split('\n')
    gpt_answer_terms.append([t.strip() for t in terms])

In [None]:
len(gpt_answer_terms)

In [None]:
import urllib.parse

gpt_answer_terms_parsed = []
for terms in gpt_answer_terms:
    terms_parsed = []
    for term in terms:
        parsed_string = urllib.parse.unquote(term)
        terms_parsed.append(parsed_string)
    gpt_answer_terms_parsed.append(terms_parsed)

In [None]:
gpt_answer_terms_parsed

In [None]:
answer_terms = []
count = 0
for idx, row in test_json_df.iterrows():
    try:
        bindings = row['answers'][0]['results']['bindings']

        answer_list = []
        for item in bindings:
            for k in item:
                answer_list.append(item[k]['value'])

        terms = []
        for ans in answer_list:
            terms.append(ans.replace('http://dbpedia.org/resource/', '').replace('dbo:', '').strip().lower())
        #if terms not in answer_terms:
        answer_terms.append(terms)
              
    except:
        answer_terms.append([str(row['answers'][0]['boolean']).lower()])
        count += 1
        #print(row['answers'])

In [None]:
len(answer_terms)

In [None]:
predicted = 0
gold = 0
predicted_correct = 0
some_matched = {}
for idx, pred_terms in enumerate(gpt_answer_terms_parsed):
    gold_terms = answer_terms[idx]
    
    predicted +=  len(pred_terms)
    gold += len(gold_terms)
    
    predicted_correct_idx = 0
    for pterm in pred_terms:
        if len(pterm) > 0: # skip an empty string
            for gterm in gold_terms:
                #if pterm ==  gterm:
                if (pterm in gterm) or (gterm in pterm):
                    predicted_correct_idx += 1
                    predicted_correct += 1
                    break # skip correct prediction, don't double count anymore
                
    some_matched[idx] = predicted_correct_idx

In [None]:
precision = predicted_correct / predicted
precision

In [None]:
recall = predicted_correct/gold
recall

In [None]:
f1 = 2 / (1/precision + 1/recall)
f1

## Use ChatGPT to answer and translate the test questions 
- Use ChatGPT to answer the questions directly
- Use ChatGPT to translate user questions to queries
- Use ChatGPT to translate user questions to queries by few-shot learning
- Use ChatGPT to translate user questions to queries by few-shot learning and chain of thought

### User ChatGPT to answer the question directly on DBpedia-03132023

In [None]:
test.columns

In [None]:
msg = """
Answer the question. Output the answer only. No comments in the output. 
The keywords in the question are "{}". 
Output the answers as "{}". 
If the question cannot be answered using the DBpedia-2016-04 knowledge base, 
output "None".

QUESTION: {}

ANSWER:
"""

In [None]:
from tqdm import tqdm

In [None]:
chatgpt_answers = []
count = 0
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #    break
     
    count += 1
    question_text = row['question_text']
    keywords = row['question_keywords']
    
    answertype = row['answertype']
    answertype_text = ""
    if answertype == 'resource':
        answertype_text = 'DBpedia Resource URI(s)'
    else:
        answertype_text = answertype
    
    answeragg = row['aggregation']
    answeragg_text = "The answers do not need aggregation"
    if answeragg:
        answeragg_text = "The answers need aggregration"
        
    msg = """
        Answer the question. No comments. List answers only.  
        The keywords in the question are {}. 
        Output the answers as {}. 

        QUESTION: {}

        ANSWER: 
    """
    
    msg = msg.format(keywords, answertype_text, question_text)
    
    #print(msg)
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant using \
               DBpedia to answer questions."},
            {"role": "user", "content": msg}
        ]
    )

    chatgpt_answers.append(response['choices'][0]['message']['content'])

In [None]:
test['chatgpt_answers_text_DBpedia_2023_03'] = chatgpt_answers

#### Evaluate the results of using ChatGPT to answer the questions directly on DBpedia-03132023

In [None]:
# extract the answer terms from gpt_answer_text
chatgpt_answer_terms = []
for idx, row in test.iterrows():
    answers_text = row['chatgpt_answers_text_DBpedia_2023_03']
    terms = answers_text.replace('https://dbpedia.org/resource/', '').\
    replace('http://dbpedia.org/resource/', '').\
    replace('dbo:', '').strip().lower().split('\n')
    #terms = answers_text.strip().lower().split('\n')
    chatgpt_answer_terms.append([t.strip() for t in terms])

In [None]:
chatgpt_answers

In [None]:
len(chatgpt_answer_terms)

In [None]:
import urllib.parse

chatgpt_answer_terms_parsed = []
for terms in chatgpt_answer_terms:
    terms_parsed = []
    for term in terms:
        parsed_string = urllib.parse.unquote(term)
        terms_parsed.append(parsed_string)
    chatgpt_answer_terms_parsed.append(terms_parsed)

In [None]:
chatgpt_answer_terms_parsed

In [None]:
test.columns

In [None]:
answer_terms = []
count = 0
for idx, row in test_json_df.iterrows():
    try:
        bindings = row['answers'][0]['results']['bindings']

        answer_list = []
        for item in bindings:
            for k in item:
                answer_list.append(item[k]['value'])

        terms = []
        for ans in answer_list:
            terms.append(ans.replace('http://dbpedia.org/resource/', '').replace('dbo:', '').strip().lower())
        #if terms not in answer_terms:
        answer_terms.append(terms)
              
    except:
        answer_terms.append([str(row['answers'][0]['boolean']).lower()])
        count += 1
        #print(row['answers'])

In [None]:
len(answer_terms)

In [None]:
answer_terms

In [None]:
# Evaluate the precision and recall based on the total numbers of 
# gold answers and predicted answers
predicted = 0
gold = 0
predicted_correct = 0
some_matched = {}
pre_gold_lengths = []
for idx, pred_terms in enumerate(chatgpt_answer_terms_parsed):
    gold_terms = answer_terms[idx]
    
    predicted +=  len(pred_terms)
    gold += len(gold_terms)
    
    pre_gold_lengths.append((idx, len(pred_terms), len(gold_terms)))
    
    predicted_correct_idx = False
    for pterm in pred_terms:
        if len(pterm) > 0: # skip an empty string
            for gterm in gold_terms:
                #if pterm ==  gterm:
                pterm = pterm.replace("_", " ")
                gterm = gterm.replace("_", " ")
                if (pterm in gterm) or (gterm in pterm):
                    predicted_correct_idx = True
                    predicted_correct += 1
                    break # this pterm is a correct prediction, skip to next pterm
                          # don't double count this pterm anymore
                
    some_matched[idx] = predicted_correct_idx

In [None]:
pre_gold_lengths

In [None]:
precision = predicted_correct / predicted
precision

In [None]:
recall = predicted_correct/gold
recall

In [None]:
f1 = 2 / (1/precision + 1/recall)
f1

In [None]:
adj_precision = (predicted_correct-240) / (predicted-240)
adj_recall = (predicted_correct-240)/ (gold - 1714)
adj_f1 = 2 / (1/adj_precision +  1/adj_recall)
print('adj_precision:{},\nadj_recall:{},\nadj_f1:{}'.format(adj_precision, adj_recall, adj_f1))
predicted_correct, predicted, gold

### Use ChatGPT to translate user questions to queries

In [None]:
# For each the test SPARQL query, query the DBpedia endpoint in
# March, 2023

from SPARQLWrapper import SPARQLWrapper, JSON

# set up the SPARQL endpoint URL
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setReturnFormat(JSON)

gold_query_results = []
count = 0
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #    break
     
    count += 1
    gold_query = row['sparql_query']
    
    sparql.setQuery(gold_query)
    
    try:
        ret = sparql.queryAndConvert()

        gold_query_results.append(ret)
    except Exception as e:
        gold_query_results.append('ERROR')
        print(e)

In [None]:
gold_query_results

In [None]:
test['gold_query_results_03132023'] = gold_query_results

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

In [None]:
# ChatGPT translate user questions to SPARQL Queries directly

chatgpt_queries = []
count = 0
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #    break
     
    count += 1
    question_text = row['question_text']
    keywords = row['question_keywords']
    
    answertype = row['answertype']
    answertype_text = ""
    if answertype == 'resource':
        answertype_text = 'DBpedia Resource URI(s)'
    else:
        answertype_text = answertype
    
    answeragg = row['aggregation']
    answeragg_text = "The answers do not need aggregation"
    if answeragg:
        answeragg_text = "The answers need aggregration"
        
    msg = """
        Translate the following question to SPARQL query on the 
        DBpedia knowledge base. The output query should include 
        all necessary prefixes for querying the current DBpedia endpoint. 
        No comments. Output SPARQL query only.
        The query should return answers as {}.
        
        QUESTION: {} 
        
        QUERY: 
    """
    
    msg = msg.format(answertype_text, question_text)
    
    #print(msg)
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant focusing on \
             DBpedia. You will translate user questions to SPARQL queries on \
             the current DBpedia knowledge base."},
            {"role": "user", "content": msg}
        ]
    )

    chatgpt_queries.append(response['choices'][0]['message']['content'])

In [None]:
chatgpt_queries

In [None]:
chatgpt_queries_text = [" ".join(item.split('\n')).strip() for item in chatgpt_queries]

In [None]:
test['chatgpt_query_DBpedia_2023_03'] = chatgpt_queries_text

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

In [None]:
test.columns

#### Retrieve chatgpt_query_results_DBpedia_2023_03

In [None]:
test = pd.read_csv('../data/QALD/9/data/qald-9-test.csv')
test.columns

In [None]:
# Retrieve the chatgpt_query_results
# For each the test GPT query, query the DBpedia endpoint in
# March, 2023

from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm import tqdm
import ast

# set up the SPARQL endpoint URL
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setReturnFormat(JSON)

chatgpt_query_results = []
count = 0
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #   break
     
    count += 1
    chatgpt_query = row['chatgpt_query_DBpedia_2023_03']
    
    sparql.setQuery(chatgpt_query)
    
    try:
        ret = sparql.queryAndConvert()

        chatgpt_query_results.append(ret)
    except Exception as e:
        chatgpt_query_results.append(e)
        print(e)

In [None]:
len(chatgpt_query_results)

In [None]:
test['chatgpt_query_results_DBpedia_2023_03'] = chatgpt_query_results

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

In [None]:
test.columns

### Use ChatGPT to translate user questions to queries by few-shot learning

In [None]:
train = pd.read_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv')
train.head()

In [None]:
train.shape

In [None]:
test = pd.read_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv')
test.columns

#### Embed Train Questions

In [None]:
# imports

import tiktoken

from openai.embeddings_utils import get_embedding

In [None]:
# embedding model parameters
embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"  # this the encoding for text-embedding-ada-002
max_tokens = 8000  # the maximum for text-embedding-ada-002 is 8191

In [None]:
# Ensure you have your API key set in your environment per the README: https://github.com/openai/openai-python#usage

# This may take a few minutes
train_embeddings = []
for idx, row in tqdm(train.iterrows(), total=train.shape[0]):
    question_text = row['question_text']
    
    train_embeddings.append(get_embedding(question_text, engine=embedding_model))

In [None]:
train['train_question_embedding'] = train_embeddings

In [None]:
train.iloc[9].train_question_embedding

In [None]:
#train.to_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv', index=None)

In [None]:
# Load the question embeddings
import pandas as pd
import numpy as np

datafile_path = "../data/QALD/9/data/qald-9-train.csv"

train = pd.read_csv(datafile_path)
train["train_question_embedding"] = train.train_question_embedding.apply(eval).apply(np.array)

#### Embed test question and search similar train questions

In [None]:
from openai.embeddings_utils import get_embedding, cosine_similarity

In [None]:
def search_question_query(df, input_question, n=3):
    input_question_embedding = get_embedding(
        input_question,
        engine="text-embedding-ada-002"
    )
    df["similarity"] = df.train_question_embedding.apply(lambda x: cosine_similarity(x, input_question_embedding))

    results = \
        df.sort_values("similarity", ascending=False) \
        .head(n)[['question_text', 'sparql_query']]
    
    return input_question_embedding, list(results['question_text'].values), list(results['sparql_query'].values)

In [None]:
test_question_embeddings = []
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    test_question = row['question_text']
    question_embedding = get_embedding(
        test_question,
        engine="text-embedding-ada-002"
    )
    test_question_embeddings.append(question_embedding)

In [None]:
test['test_question_embedding'] = test_question_embeddings

In [None]:
#test = pd.read_csv('../data/QALD/9/data/qald-9-test.csv')
#test["test_question_embedding"] = test.test_question_embedding.apply(eval).apply(np.array)

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

In [None]:
train = pd.read_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv')

In [None]:
train["train_question_embedding"] = train.train_question_embedding.apply(eval).apply(np.array)

In [None]:
#train.to_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv', index=None)

In [None]:
from openai.embeddings_utils import get_embedding, cosine_similarity

train_matched_questions = []
train_matched_queries = []
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    test_question_embedding = row['test_question_embedding']
    
    train_embeddings_similarities = train_embeddings.question_embedding.apply(lambda x: cosine_similarity(x,test_question_embedding))
    
    max_idx = train_embeddings_similarities.idxmax()
    
    results = \
        train_embeddings.iloc[max_idx][['question', 'query']]
    
    matched_question = results['question']
    matched_query = results['query']

    train_matched_questions.append(matched_question)
    train_matched_queries.append(matched_query)

#### Few-shot learning on matched train question only

In [None]:
from tqdm import tqdm

chatgpt_train_fewshot_query = []

count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    if count > 4:
        break
    count += 1
    
    test_question = row['question_text']

    test_question_embedding = row['test_question_embedding']
    
    train_question_idx = train.index[train['question_text'] == test_question].tolist()
    
    if len(train_question_idx) > 0: # find a matched train question, skip to generate new query
        chatgpt_train_fewshot_query.append(train.iloc[train_question_idx[0]]['sparql_query'])
    
    else:

        train_embeddings_similarities = train.train_question_embedding.\
        apply(lambda x: cosine_similarity(x,test_question_embedding))

        top5_idx = train_embeddings_similarities.sort_values(ascending=False)[:5].index

        top5_train_questions = list(train.iloc[top5_idx].question_text.values)
        top5_train_queries = list(train.iloc[top5_idx].sparql_query.values)

        sys_question_query = """
            Learn the following example question and corresponding query.
            
            Question: {} 
            Query: {}
        """
        sys_question_query = sys_question_query.format(top5_train_questions[0], 
                                                  top5_train_queries[0])
    
        msg = """
           Only use the terms defined in the DBpedia ontology. 
           Translate the following question to SPARQL query. 
           Output query only. No comments. 
           Output syntactically correct query only. 
           Some common prefixes can be used in the query:
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX dbr: <http://dbpedia.org/resource/>
            PREFIX dbc: <http://dbpedia.org/resource/Category:>
            PREFIX foaf: <http://xmlns.com/foaf/0.1/>
            PREFIX dc: <http://purl.org/dc/elements/1.1/>
            PREFIX dct: <http://purl.org/dc/terms/>
        
           Question:{}
           Query:
        """
        msg = msg.format(test_question)

        response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "you are a helpful assistant focusing on DBpedia. \
            You will translate a user question to a SPARQL on the DBpedia knowledge base."},
            {"role": "system", "content": sys_question_query},
            {"role": "user", "content": msg}
            ]
        )


        chatgpt_train_fewshot_query.append(response['choices'][0]['message']['content'].\
                                   strip().replace('\n', ' ' ))

In [None]:
chatgpt_train_fewshot_query

In [None]:
test.columns

In [None]:
len(chatgpt_train_fewshot_query)

In [None]:
chatgpt_train_fewshot_query

In [None]:
test['chatgpt_train_1fewshot_query'] = chatgpt_train_fewshot_query

#### Save train and test embeddings narry in Disk

In [None]:
#train[['id', 'answertype', 'aggregation', 'onlydbo', 'hybrid', 'question_text',
#       'question_keywords', 'sparql_query', 'train_question_embedding']].\
#to_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv', index=None)

In [None]:
#test[['id', 'answertype', 'aggregation', 'onlydbo', 'hybrid', 'question_text',
#       'question_keywords', 'sparql_query', 'test_question_embedding']].\
#       to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

#### Retrieve chatgpt_train_fewshot_query results

In [None]:
# Retrieve the chatgpt_train_3fewshot_query_results
# query the DBpedia endpoint in
# March, 2023

from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm import tqdm
import ast

# set up the SPARQL endpoint URL
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setReturnFormat(JSON)

chatgpt_train_fewshot_query_results = []
count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #    break
     
    count += 1
    chatgpt_train_fewshot_query = row['chatgpt_train_1fewshot_query']
    
    sparql.setQuery(chatgpt_train_fewshot_query)
    
    try:
        ret = sparql.queryAndConvert()

        chatgpt_train_fewshot_query_results.append(ret)
    except Exception as e:
        chatgpt_train_fewshot_query_results.append(e)
        print(e)

In [None]:
test.iloc[125].sparql_query

In [None]:
chatgpt_train_fewshot_query_results

In [None]:
test['chatgpt_train_1fewshot_query_results'] = chatgpt_train_fewshot_query_results

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

In [None]:
test.columns

### Use ChatGPT to translate user questions to queries by few-shot learning WITH MASKED ENTITIEs

#### First, mask entities in train and test questions

In [None]:
from tqdm import tqdm

masked_results = []

count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    #if count > 20:
    #    break
    #count += 1
    
    if (idx > -1) and (idx < 200):
    #if idx < 3:
        t_question = row['question_text']
        t_query = row['sparql_query']

        question_keywords = row['question_keywords']

        msg = """
            Identify any named entities in the question and replace 
            the discovered named entities with [MASK1], [MASK2], etc.
            One mask for each named entity only.
            Merge consecutive masks into one mask.
            No comments. Output masked question only.
           
            Question:{}
            Masked Question:
        """
        msg = msg.format(t_question)

        response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "you are a helpful assistant focusing on\
                identifying named entities and replacing them with masks."},
                {"role": "user", "content": msg}
                ]
        )


        masked_results.append(response['choices'][0]['message']['content'])
                                   #strip().replace('\n', ' ' ))

In [None]:
masked_results

In [None]:
#with open('../data/QALD/9/data/test_masked_questions.txt', 'w') as file:
#    for item in masked_results:
#        file.write(item + '\n')

In [None]:
masked_cleaned = []
with open('../data/QALD/9/data/test_masked_questions.txt', 'r') as file:
    lines = file.readlines()
    for line in lines:
        masked_cleaned.append(line.strip())
masked_cleaned

In [None]:
len(masked_cleaned)

In [None]:
test['masked_question'] = masked_cleaned

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

In [None]:
from tqdm import tqdm

masked_results = []

count = 0

for idx, row in tqdm(train.iterrows(), total=train.shape[0]):
    
    #if count > 20:
    #    break
    #count += 1
    
    if (idx > 381 ) and (idx < 383):
    #if idx < 3:
        t_question = row['question_text']
        t_query = row['sparql_query']

        question_keywords = row['question_keywords']

        msg = """
            Identify any named entities in the SPARQL query and replace 
            the identified named entities with [MASK1], [MASK2], etc.
            One mask for each named entity only.
            Merge consecutive masks into one mask.
            Output masked query only. No comments. 
           
            Query:{}
            Masked Query:
        """
        msg = msg.format(t_query)

        response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "you are a helpful assistant focusing on\
                identifying named entities and replacing them with masks."},
                {"role": "user", "content": msg}
                ]
        )


        masked_results.append(response['choices'][0]['message']['content'])
                                   #strip().replace('\n', ' ' ))

In [None]:
masked_results

In [None]:
len(masked_results)

In [None]:
#masked_20 = []
for item in masked_results:
    masked_20.append(item)
masked_20

In [None]:
len(masked_20)

In [None]:
#with open('../data/QALD/9/data/test_masked_queries.txt', 'w') as file:
#    for item in masked_20:
#        file.write(item + '\n')

In [None]:
masked_cleaned = []
with open('../data/QALD/9/data/train_masked_queries.txt', 'r') as file:
    lines = file.readlines()
    for line in lines:
        if len(line) > 0:
            masked_cleaned.append(line.strip())
masked_cleaned

In [None]:
len(masked_cleaned)

In [None]:
train['masked_query'] = masked_cleaned

In [None]:
#train.to_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv', index=None)

### Get Chain of Thought for train and test masked queries

In [None]:
from tqdm import tqdm

masked_cot = []

count = 0

for idx, row in tqdm(train.iterrows(), total=train.shape[0]):
    
    #if count > 4:
    #    break
    count += 1
    
    if (idx > 381) and (idx < 383):
    #if idx < 3:
        m_query = row['masked_query']


        msg = """
               Briefly explain the following query in logical steps as a chain of thought. 
               Explain in natural language. 
               Forget what you have about the query before. 
               Assume you are trying to construct the query again.
               Treat the mask variables, [MASK1], [MASK2] as real entities.
               No comments. Output the steps only. 
               Do not include the original query in the explanation. 

               QUERY:{}
               THOUGHT:
        """
        msg = msg.format(m_query)

        response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "you are a helpful assistant focusing on DBpedia. \
                 You will explain a masked SPARQL query in logical steps to help reconstruct the query. "},
                {"role": "user", "content": msg}
                ]
        )


        masked_cot.append(response['choices'][0]['message']['content'].\
                                       strip().replace('\n', ' ' ))

In [None]:
len(masked_cot)

In [None]:
train['masked_cot']=masked_cot_fix

In [None]:
#train.to_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv', index=None)

In [None]:
from tqdm import tqdm

masked_cot = []

count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    #if count > 4:
    #    break
    count += 1
    
    if (idx > 99) and (idx < 200):
    #if idx < 3:
        m_query = row['masked_query']


        msg = """
               Briefly explain the following query in logical steps as a chain of thought. 
               Explain in natural language. 
               Forget what you have about the query before. 
               Assume you are trying to construct the query again.
               Treat the mask variables, [MASK1], [MASK2] as real entities.
               No comments. Output the steps only. 
               Do not include the original query in the explanation. 

               QUERY:{}
               THOUGHT:
        """
        msg = msg.format(m_query)

        response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "you are a helpful assistant focusing on DBpedia. \
                 You will explain a masked SPARQL query in logical steps to help reconstruct the query. "},
                {"role": "user", "content": msg}
                ]
        )


        masked_cot.append(response['choices'][0]['message']['content'].\
                                       strip().replace('\n', ' ' ))

In [None]:
masked_cot

In [None]:
test['masked_cot']=masked_cot_50

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Embed Train Masked Questions

In [None]:
# imports

import tiktoken

from openai.embeddings_utils import get_embedding

In [None]:
# embedding model parameters
embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"  # this the encoding for text-embedding-ada-002
max_tokens = 8000  # the maximum for text-embedding-ada-002 is 8191

In [None]:
# Ensure you have your API key set in your environment per the README: https://github.com/openai/openai-python#usage

# This may take a few minutes
train_masked_embeddings = []
for idx, row in tqdm(train.iterrows(), total=train.shape[0]):
    question_text = row['masked_question']
    
    train_masked_embeddings.append(get_embedding(question_text, engine=embedding_model))

In [None]:
train['train_masked_question_embedding'] = train_masked_embeddings

In [None]:
#train.to_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv', index=None)

### Embed test question and search similar train questions

In [None]:
from openai.embeddings_utils import get_embedding, cosine_similarity

In [None]:
# search through the train for matched question
def search_question(df, input_question, n=3, pprint=True, masked=False):
    input_question_embedding = get_embedding(
        input_question,
        engine="text-embedding-ada-002"
    )
    if masked:
        df["similarity"] = df.train_masked_question_embedding.apply(lambda x: cosine_similarity(x, input_question_embedding))
    else:
        df["similarity"] = df.train_question_embedding.apply(lambda x: cosine_similarity(x, input_question_embedding))

    results = (
        df.sort_values("similarity", ascending=False)
        .head(n)[['question_text', 'masked_question']]
    )
    
    
    #if pprint:
    #    for r in results:
    #        print(r[:200])
    #        print()
    return results


In [None]:
def search_question_query(df, input_question, n=3, masked=False):
    input_question_embedding = get_embedding(
        input_question,
        engine="text-embedding-ada-002"
    )
    if masked:
        df["similarity"] = df.train_masked_question_embedding.apply(lambda x: cosine_similarity(x, input_question_embedding))
    else:
        df["similarity"] = df.train_question_embedding.apply(lambda x: cosine_similarity(x, input_question_embedding))

    results = \
        df.sort_values("similarity", ascending=False) \
        .head(n)[['question_text', 'sparql_query', 'masked_question', 'masked_query']]
    
    return input_question_embedding, list(results['question_text'].values), list(results['sparql_query'].values),\
                  list(results['masked_question'].values), list(results['masked_query'].values)

In [None]:
test_question_embeddings = []
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    test_question = row['masked_question']
    question_embedding = get_embedding(
        test_question,
        engine="text-embedding-ada-002"
    )
    test_question_embeddings.append(question_embedding)

In [None]:
test['test_question_embedding'] = test_question_embeddings

In [None]:
#test = pd.read_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv')
#test["test_question_embedding"] = test.test_question_embedding.apply(eval).apply(np.array)

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test.csv-with-embeddings-cot', index=None)

In [None]:
#train = pd.read_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv')

In [None]:
#train["train_question_embedding"] = train.train_question_embedding.apply(eval).apply(np.array)

In [None]:
#train.to_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv', index=None)

In [None]:
from openai.embeddings_utils import get_embedding, cosine_similarity

train_matched_questions = []
train_matched_queries = []
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    test_question_embedding = row['test_question_embedding']
    
    train_embeddings_similarities = train_embeddings.question_embedding.apply(lambda x: cosine_similarity(x,test_question_embedding))
    
    max_idx = train_embeddings_similarities.idxmax()
    
    results = \
        train_embeddings.iloc[max_idx][['question', 'query']]
    
    matched_question = results['question']
    matched_query = results['query']

    train_matched_questions.append(matched_question)
    train_matched_queries.append(matched_query)

### Few-shot learning on matched train question and chain of thought

In [None]:
from tqdm import tqdm

chatgpt_train_fewshot_query = []

count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    #if idx < 3:
    if (idx > -1) and (idx < 200):
    
        test_question = row['question_text']
        
        test_masked_question = row['masked_question']

        train_question_idx = train.index[train['question_text'] == test_question].tolist()

        if len(train_question_idx) > 0: # find a matched train question, skip to generate new query
            chatgpt_train_fewshot_query.append(train.iloc[train_question_idx[0]]['sparql_query'])

        else:
            
            test_masked_question_embedding = get_embedding(
                test_masked_question,
                engine="text-embedding-ada-002"
            )

            train_masked_embeddings_similarities = train.train_masked_question_embedding.\
            apply(lambda x: cosine_similarity(x,test_masked_question_embedding))

            top5_idx = train_masked_embeddings_similarities.sort_values(ascending=False)[:5].index

            top5_train_masked_questions = list(train.iloc[top5_idx].masked_question.values)
            top5_train_masked_queries = list(train.iloc[top5_idx].masked_query.values)
            top5_masked_cots = list(train.iloc[top5_idx].masked_cot.values)
            
            top5_train_questions = list(train.iloc[top5_idx].question_text.values)
            top5_train_queries = list(train.iloc[top5_idx].sparql_query.values)
            top5_cots = list(train.iloc[top5_idx].train_cot.values)
            
            msg = """
               Study the following example question, logical steps, 
               and SPARQL query provided:
           
               Question:{}
               Logical steps:{}
               SPARQL query: {}
           
               Now, using the learned pattern and logical steps, translate 
               the new question below into a SPARQL query.
               Write a syntactically corect SPARQL query only.
               The query should return answers as {}.
               Include all required prefixes in the query. 
               No comments. Output query only.
               
               New question: {}
               Query: 
            """
            msg = msg.format(top5_train_questions[0], top5_cots[0], \
                             top5_train_queries[0], answertype_text, test_question)

            #print(msg)
            
            response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role":"system", "content":"Reset your memory and start with a \
                 clean slate. Disregard any previous information or context from \
                 our conversation. "},
                {"role": "system", "content": "Now you are a helpful assistant focusing on DBpedia. \
                You translate a user question to a SPARQL query to answer the question \
                using the DBpedia knowledge base."},
                {"role": "user", "content": msg}
                ]
            )


            chatgpt_train_fewshot_query.append(response['choices'][0]['message']['content'].\
                                       strip().replace('\n', ' ' ))

In [None]:
chatgpt_train_fewshot_query

In [None]:
test['chatgpt_nomasked_train_cot_fewshot_query'] = chatgpt_train_fewshot_query

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Retrieve chatgpt_train_cot_fewshot_query results

In [None]:
# Retrieve the chatgpt_train_3fewshot_query_results
# query the DBpedia endpoint in
# March, 2023

from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm import tqdm
import ast

# set up the SPARQL endpoint URL
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setReturnFormat(JSON)

chatgpt_train_cot_fewshot_query_results = []
count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #    break
     
    count += 1
    chatgpt_train_fewshot_query = row['chatgpt_nomasked_train_cot_fewshot_query']
    
    sparql.setQuery(chatgpt_train_fewshot_query)
    
    try:
        ret = sparql.queryAndConvert()

        chatgpt_train_cot_fewshot_query_results.append(ret)
    except Exception as e:
        chatgpt_train_cot_fewshot_query_results.append(e)
        print(e)

In [None]:
test['chatgpt_nomasked_train_cot_fewshot_query_results'] = chatgpt_train_cot_fewshot_query_results

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Few-shot learning on matched train question only

In [None]:
test.columns

In [None]:
train.columns

In [None]:
from tqdm import tqdm

chatgpt_train_fewshot_query = []

count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    #if idx < 3:
    if (idx > -1) and (idx < 200):
    
        test_question = row['question_text']
        
        test_masked_question = row['masked_question']

        train_question_idx = train.index[train['question_text'] == test_question].tolist()

        if len(train_question_idx) > 0: # find a matched train question, skip to generate new query
            chatgpt_train_fewshot_query.append(train.iloc[train_question_idx[0]]['sparql_query'])

        else:
            
            test_masked_question_embedding = get_embedding(
                test_masked_question,
                engine="text-embedding-ada-002"
            )

            train_masked_embeddings_similarities = train.train_masked_question_embedding.\
            apply(lambda x: cosine_similarity(x,test_masked_question_embedding))

            top5_idx = train_masked_embeddings_similarities.sort_values(ascending=False)[:5].index

            top5_train_masked_questions = list(train.iloc[top5_idx].masked_question.values)
            top5_train_masked_queries = list(train.iloc[top5_idx].masked_query.values)
            top5_masked_cots = list(train.iloc[top5_idx].masked_cot.values)
            
            top5_train_questions = list(train.iloc[top5_idx].question_text.values)
            top5_train_queries = list(train.iloc[top5_idx].sparql_query.values)
            top5_cots = list(train.iloc[top5_idx].train_cot.values)
            
            msg = """
               Study the following example question and SPARQL query provided:
           
               Question:{}
               SPARQL query: {}
           
               Now, using the learned pattern to translate 
               the new question below into a SPARQL query.
               Write a syntactically corect SPARQL query only.
               The query should return answers as {}.
               Include all required prefixes in the query. 
               No comments. Output query only.
               
               New question: {}
               Query: 
            """
            msg = msg.format(top5_train_questions[0], \
                             top5_train_queries[0], answertype_text, test_question)

            #print(msg)
            
            response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role":"system", "content":"Reset your memory and start with a \
                 clean slate. Disregard any previous information or context from \
                 our conversation. "},
                {"role": "system", "content": "Now you are a helpful assistant focusing on DBpedia. \
                You translate a user question to a SPARQL query to answer the question \
                using the DBpedia knowledge base."},
                {"role": "user", "content": msg}
                ]
            )


            chatgpt_train_fewshot_query.append(response['choices'][0]['message']['content'].\
                                       strip().replace('\n', ' ' ))

In [None]:
chatgpt_train_fewshot_query

In [None]:
len(chatgpt_train_fewshot_query)

In [None]:
test['chatgpt_nomasked_train_only_fewshot_query'] = chatgpt_train_fewshot_query

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Retrieve chatgpt_train_only_fewshot_query results

In [None]:
# Retrieve the chatgpt_train_3fewshot_query_results
# query the DBpedia endpoint in
# March, 2023

from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm import tqdm
import ast

# set up the SPARQL endpoint URL
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setReturnFormat(JSON)

chatgpt_train_cot_fewshot_query_results = []
count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #    break
     
    count += 1
    chatgpt_train_fewshot_query = row['chatgpt_nomasked_train_only_fewshot_query']
    
    sparql.setQuery(chatgpt_train_fewshot_query)
    
    try:
        ret = sparql.queryAndConvert()

        chatgpt_train_cot_fewshot_query_results.append(ret)
    except Exception as e:
        chatgpt_train_cot_fewshot_query_results.append(e)
        print(e)

In [None]:
test['chatgpt_nomasked_train_only_fewshot_query_results'] = chatgpt_train_cot_fewshot_query_results

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Few-shot learning on matched train question 3-shot only

In [None]:
from tqdm import tqdm

chatgpt_train_fewshot_query = []

count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    #if idx < 3:
    if (idx > 51) and (idx < 200):
    
        test_question = row['question_text']
        
        test_masked_question = row['masked_question']

        train_question_idx = train.index[train['question_text'] == test_question].tolist()

        if len(train_question_idx) > 0: # find a matched train question, skip to generate new query
            chatgpt_train_fewshot_query.append(train.iloc[train_question_idx[0]]['sparql_query'])

        else:
            
            test_masked_question_embedding = get_embedding(
                test_masked_question,
                engine="text-embedding-ada-002"
            )

            train_masked_embeddings_similarities = train.train_masked_question_embedding.\
            apply(lambda x: cosine_similarity(x,test_masked_question_embedding))

            top5_idx = train_masked_embeddings_similarities.sort_values(ascending=False)[:5].index

            top5_train_masked_questions = list(train.iloc[top5_idx].masked_question.values)
            top5_train_masked_queries = list(train.iloc[top5_idx].masked_query.values)
            top5_masked_cots = list(train.iloc[top5_idx].masked_cot.values)
            
            top5_train_questions = list(train.iloc[top5_idx].question_text.values)
            top5_train_queries = list(train.iloc[top5_idx].sparql_query.values)
            top5_cots = list(train.iloc[top5_idx].train_cot.values)
            
            msg = """
               Study the following example questions and SPARQL queries provided:
           
               Question:{}
               SPARQL query: {}
               
               Question:{}
               SPARQL query: {}
               
               Question:{}
               SPARQL query: {}
           
               Now, using the learned patterns to translate 
               the new question below into a SPARQL query.
               Write a syntactically corect SPARQL query only.
               The query should return answers as {}.
               Include all required prefixes in the query. 
               No comments. Output query only.
               
               New question: {}
               Query: 
            """
            msg = msg.format(top5_train_questions[0], top5_train_queries[0], \
                             top5_train_questions[1], top5_train_queries[1], \
                             top5_train_questions[2], top5_train_queries[2], \
                             answertype_text, test_question)

            #print(msg)
            
            response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role":"system", "content":"Reset your memory and start with a \
                 clean slate. Disregard any previous information or context from \
                 our conversation. "},
                {"role": "system", "content": "Now you are a helpful assistant focusing on DBpedia. \
                You translate a user question to a SPARQL query to answer the question \
                using the DBpedia knowledge base."},
                {"role": "user", "content": msg}
                ]
            )


            chatgpt_train_fewshot_query.append(response['choices'][0]['message']['content'].\
                                       strip().replace('\n', ' ' ))

In [None]:
chatgpt_train_fewshot_query

In [None]:
#test['chatgpt_nomasked_train_only_3fewshot_query'] = chatgpt_train_fewshot_query
test['chatgpt_nomasked_train_only_3fewshot_query'] = temp_query_52

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Retrieve chatgpt_train_only_3fewshot_query results

In [None]:
# Retrieve the chatgpt_train_3fewshot_query_results
# query the DBpedia endpoint in
# March, 2023

from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm import tqdm
import ast

# set up the SPARQL endpoint URL
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setReturnFormat(JSON)

chatgpt_train_cot_fewshot_query_results = []
count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #    break
     
    count += 1
    chatgpt_train_fewshot_query = row['chatgpt_nomasked_train_only_3fewshot_query']
    
    sparql.setQuery(chatgpt_train_fewshot_query)
    
    try:
        ret = sparql.queryAndConvert()

        chatgpt_train_cot_fewshot_query_results.append(ret)
    except Exception as e:
        chatgpt_train_cot_fewshot_query_results.append(e)
        print(e)

In [None]:
test['chatgpt_nomasked_train_only_3fewshot_query_results'] = chatgpt_train_cot_fewshot_query_results

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Evaluate the chatgpt_train_only_3fewshot_query_results

In [None]:
import json, ast

In [None]:
test.shape

In [None]:
# retrieve query results
import ast

query_results_terms = []
count = 0
for idx, row in test.iterrows():
    try:
        bindings = ast.literal_eval(row['gold_query_results_DBpedia_2023_03'])['results']['bindings']

        answer_list = []
        for item in bindings:
            for k in item:
                answer_list.append(item[k]['value'])

        terms = []
        for ans in answer_list:
            terms.append(ans.replace('http://dbpedia.org/resource/', '').replace('dbo:', '').strip().lower())
        #if terms not in answer_terms:
        query_results_terms.append(terms)
              
    except:
        print(row['gold_query_results_DBpedia_2023_03'])
        ex_ans = ast.literal_eval(row['gold_query_results_DBpedia_2023_03'])['boolean']
        if ex_ans:
            query_results_terms.append([str(ex_ans).lower()])
        else:
            query_results_terms.append([])
        count += 1

In [None]:
len(query_results_terms)

In [None]:
query_results_terms

In [None]:
test.columns

In [None]:
# retrieve chatgpt train 3fewshot query results
import ast

chatgpt_query_results_terms = []
count = 0
for idx, row in test.iterrows():
    try:
        bindings = row['chatgpt_nomasked_train_only_3fewshot_query_results']['results']['bindings']
        #bindings = ast.literal_eval(row['chatgpt_train_1fewshot_query_results'])['results']['bindings']

        answer_list = []
        for item in bindings:
            for k in item:
                answer_list.append(item[k]['value'])

        terms = []
        for ans in answer_list:
            terms.append(ans.replace('http://dbpedia.org/resource/', '').replace('dbo:', '').strip().lower())
        #if terms not in answer_terms:
        chatgpt_query_results_terms.append(terms)
    except TypeError:
        chatgpt_query_results_terms.append(['ERROR ERROR ERROR'])
    except SyntaxError:
        chatgpt_query_results_terms.append(['ERROR ERROR ERROR'])
              
    except:
        print(row['chatgpt_nomasked_train_only_3fewshot_query_results'])
        ex_ans = row['chatgpt_nomasked_train_only_3fewshot_query_results']['boolean']
        #ex_ans = ast.literal_eval(row['chatgpt_train_1fewshot_query_results'])['boolean']
        if ex_ans:
            chatgpt_query_results_terms.append([str(ex_ans).lower()])
        else:
            chatgpt_query_results_terms.append([])
        count += 1

In [None]:
len(chatgpt_query_results_terms)

In [None]:
chatgpt_query_results_terms

In [None]:
# Evaluate the precision and recall based on the total numbers of 
# gold answers and predicted answers
predicted = 0
gold = 0
predicted_correct = 0
some_matched = {}
pre_gold_lengths = []
for idx, pred_terms in enumerate(chatgpt_query_results_terms):
    
    gold_terms = query_results_terms[idx]
    
    predicted +=  len(pred_terms)
    gold += len(gold_terms)
    
    pre_gold_lengths.append((idx, len(pred_terms), len(gold_terms)))
    
    if (len(pred_terms) > 0) and (len(gold_terms) > 0):
        predicted_correct_idx = False
        for pterm in pred_terms:
            if len(pterm) > 0: # skip an empty string
                for gterm in gold_terms:
                    if len(gterm) > 0:
                        #if pterm ==  gterm:
                        pterm = pterm.replace("_", " ")
                        gterm = gterm.replace("_", " ")
                        if (pterm in gterm) or (gterm in pterm):
                            predicted_correct_idx = True
                            predicted_correct += 1
                            #break # this pterm is a correct prediction, skip to next pterm
                                    # don't double count this pterm anymore

        some_matched[idx] = predicted_correct_idx

In [None]:
pre_gold_lengths

In [None]:
some_matched[0]

In [None]:
precision = predicted_correct / (predicted)
precision

In [None]:
recall = predicted_correct/gold
recall

In [None]:
f1 = 2 / (1/precision + 1/recall)
f1

In [None]:
adj_precision = (predicted_correct-240) / (predicted - 240)
adj_recall = (predicted_correct-240) / (gold - 1714)
adj_f1 = 2 / (1/adj_precision +  1/adj_recall)
print('adj_precision:{},\nadj_recall:{},\nadj_f1:{}'.format(adj_precision, adj_recall, adj_f1))
predicted_correct, predicted, gold

In [None]:
# Evaluate the precision and recall based on the total numbers of test questions
count = 0
predicted_correct = 0
some_matched = {}
for idx, pred_terms in enumerate(chatgpt_query_results_terms):
    gold_terms = query_results_terms[idx]
    
    count += 1
    
    if (len(pred_terms) > 0) and (len(gold_terms) > 0):
        predicted_correct_idx = False
        for pterm in pred_terms:
            if len(pterm) > 0: # skip an empty string
                for gterm in gold_terms:
                    if len(gterm) > 0:
                        #if pterm ==  gterm:
                        #pterm = pterm.replace("_", " ")
                        #gterm = gterm.replace("_", " ")
                        if not predicted_correct_idx:
                            #if (pterm in gterm) or (gterm in pterm):
                            if pterm == gterm:
                                predicted_correct_idx = True
                                predicted_correct += 1
                        else:
                            pass
                
        some_matched[idx] = predicted_correct_idx
    elif (len(pred_terms) == 0) and (len(gold_terms) == 0):
        predicted_correct += 1
        some_matched[idx] = True

In [None]:
total = 0
for k in some_matched:
    if some_matched[k]:
        total += 1
total

In [None]:
count

In [None]:
predicted_correct

In [None]:
precision = predicted_correct / count
precision

In [None]:
recall = predicted_correct/count
recall

In [None]:
f1 = 2 / (1/precision + 1/recall)
f1

### Explain test query in chain of thought without word limit

In [None]:
from tqdm import tqdm

cot = []

count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    #if count > 4:
    #    break
    count += 1
    
    test_query = row['sparql_query']
    
    #if idx < 3:
    if (idx > 123) and (idx < 200):
        msg = """
               Briefly explain the following query in logical steps as a chain of thought. 
               Explain in natural language. 
               Forget what you have about the query before. 
               Assume you are trying to construct the query again. 
               No comments. Output the steps only. 
               Do not include the original query in the explanation. 

               QUERY:{}
               THOUGHT:
        """
        msg = msg.format(test_query)

        response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "you are a helpful assistant focusing on DBpedia."},
                {"role": "user", "content": msg}
                ]
        )


        cot.append(response['choices'][0]['message']['content'].\
                                       strip().replace('\n', ' ' ))

In [None]:
cot

In [None]:
test['cot_noWordLimit']=temp_cot_124

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### ChatGPT get test query based on ChainOfThought

In [None]:
from tqdm import tqdm

chatgpt_cot_query = []

count = 0

for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    
    #if count > 4:
    #    break
    count += 1
    
    test_question = row['question_text']

    cot = row['cot_noWordLimit']
    
    answertype = row['answertype']
    answertype_text = ""
    if answertype == 'resource':
        answertype_text = 'DBpedia Resource URI(s)'
    else:
        answertype_text = answertype
    
    #if idx < 3:
    if (idx > 120) and (idx < 200):
        msg = """
               Translate the question to SPARQL query on DBpedia. 
               Forget what you have about the question and query before. 
               Follow the steps in the chain of thought to construct the query. 
               Output query only. No comments. 
               Include all required prefixes in the query. 
               Return the answers as {}.

               QUESTION: {}

               THOUGHT: {}

               QUERY: 
        """
        msg = msg.format(answertype_text, test_question, cot)

        response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "you are a helpful assistant focusing on DBpedia. \
                You will translate a user question to a SPARQL query on the DBpedia knowledge base."},
                {"role": "user", "content": msg}
                ]
        )


        chatgpt_cot_query.append(response['choices'][0]['message']['content'].\
                                       strip().replace('\n', ' ' ))

In [None]:
test['chatgpt_cot_noWordLimit_query'] = temp_cot_query_121

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Retrieve chatgpt_cot_noWordLimit_query results

In [None]:
# Retrieve the chatgpt_cot_query_results

from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm import tqdm
import ast

# set up the SPARQL endpoint URL
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setReturnFormat(JSON)

chatgpt_cot_query_results = []
count = 0
for idx, row in tqdm(test.iterrows(), total=test.shape[0]):
    #if count > 3:
    #    break 
    count += 1
    
    gpt_query = row['chatgpt_cot_noWordLimit_query']
    
    sparql.setQuery(gpt_query)
    
    try:
        ret = sparql.queryAndConvert()

        chatgpt_cot_query_results.append(ret)
    except Exception as e:
        chatgpt_cot_query_results.append(e)
        print(e)

In [None]:
test['chatgpt_cot_noWordLimit_query_results'] = chatgpt_cot_query_results

In [None]:
#test.to_csv('../data/QALD/9/data/qald-9-test-with-embeddings-cot.csv', index=None)

### Evaluate the chatgpt_cot_noWordLimit_query_results

In [None]:
# retrieve query results
import ast

query_results_terms = []
count = 0
for idx, row in test.iterrows():
    try:
        bindings = ast.literal_eval(row['gold_query_results_DBpedia_2023_03'])['results']['bindings']

        answer_list = []
        for item in bindings:
            for k in item:
                answer_list.append(item[k]['value'])

        terms = []
        for ans in answer_list:
            terms.append(ans.replace('http://dbpedia.org/resource/', '').replace('dbo:', '').strip().lower())
        #if terms not in answer_terms:
        query_results_terms.append(terms)
              
    except:
        print(row['gold_query_results_DBpedia_2023_03'])
        ex_ans = ast.literal_eval(row['gold_query_results_DBpedia_2023_03'])['boolean']
        if ex_ans:
            query_results_terms.append([str(ex_ans).lower()])
        else:
            query_results_terms.append([])
        count += 1

In [None]:
len(query_results_terms)

In [None]:
# retrieve gpt query results
import ast

chatgpt_cot_query_results_terms = []
count = 0
for idx, row in test.iterrows():
    try:
        #bindings = ast.literal_eval(row['gpt_fewshot_query_results'])['results']['bindings']
        bindings = row['chatgpt_cot_noWordLimit_query_results']['results']['bindings']

        answer_list = []
        for item in bindings:
            for k in item:
                answer_list.append(item[k]['value'])

        terms = []
        for ans in answer_list:
            terms.append(ans.replace('http://dbpedia.org/resource/', '').replace('dbo:', '').strip().lower())
        #if terms not in answer_terms:
        chatgpt_cot_query_results_terms.append(terms)
    except SyntaxError:
        chatgpt_cot_query_results_terms.append(['ERROR ERROR ERROR'])
        
    except TypeError:
        chatgpt_cot_query_results_terms.append(['ERROR ERROR ERROR'])
              
    except:
        print(row['chatgpt_cot_noWordLimit_query_results'])
        #ex_ans = ast.literal_eval(row['gpt_query_fewshot_results_DBpedia'])['boolean']
        ex_ans = row['chatgpt_cot_noWordLimit_query_results']['boolean']
        if ex_ans:
            chatgpt_cot_query_results_terms.append([str(ex_ans).lower()])
        else:
            chatgpt_cot_query_results_terms.append([])
        count += 1

In [None]:
len(chatgpt_cot_query_results_terms)

In [None]:
# Evaluate the precision and recall based on the total numbers of 
# gold answers and predicted answers
predicted = 0
gold = 0
predicted_correct = 0
some_matched = {}

pred_gold_lengths = []
for idx, pred_terms in enumerate(chatgpt_cot_query_results_terms):
    
    gold_terms = query_results_terms[idx]
    
    predicted +=  len(pred_terms)
    gold += len(gold_terms)
    
    pred_gold_lengths.append((idx, len(pred_terms), len(gold_terms)))
    
    if (len(pred_terms) > 0) and (len(gold_terms) > 0):

        predicted_correct_idx = False
        for pterm in pred_terms:
            if len(pterm) > 0: # skip an empty string
                for gterm in gold_terms:
                    if len(gterm) > 0:
                        if pterm ==  gterm:
                        #pterm = pterm.replace("_", " ")
                        #gterm = gterm.replace("_", " ")
                        #if (pterm in gterm) or (gterm in pterm):
                            predicted_correct_idx = True
                            predicted_correct += 1
                            break # this pterm is a correct prediction, skip to next pterm
                                    # don't double count this pterm anymore

        some_matched[idx] = predicted_correct_idx

In [None]:
pred_gold_lengths

In [None]:
pres = 0
gols = 0
for _, pre, gol in pred_gold_lengths:
    pres += pre
    gols += gol
pres, gols

In [None]:
precision = (predicted_correct) / (predicted)
precision

In [None]:
recall = predicted_correct/gold
recall

In [None]:
f1 = 2 / (1/precision + 1/recall)
f1

In [None]:
adj_precision = (predicted_correct - 240) / (predicted - 10000 -1)
adj_recall = (predicted_correct - 240) / (gold - 44 - 1714)
adj_f1 = 2 / (1/adj_precision +  1/adj_recall)
print('adj_precision:{},\nadj_recall:{},\nadj_f1:{}'.format(adj_precision, adj_recall, adj_f1))
predicted_correct, predicted, gold

In [None]:
# Evaluate the precision and recall based on the total numbers of test questions
count = 0
predicted_correct = 0
some_matched = {}
for idx, pred_terms in enumerate(chatgpt_cot_query_results_terms):
    gold_terms = query_results_terms[idx]
    
    count += 1
    
    if (len(pred_terms) > 0) and (len(gold_terms) > 0):
        predicted_correct_idx = False
        for pterm in pred_terms:
            if len(pterm) > 0: # skip an empty string
                for gterm in gold_terms:
                    if len(gterm) > 0:
                        pterm = pterm.replace("_", " ")
                        gterm = gterm.replace("_", " ")
                        if not predicted_correct_idx:
                            if (pterm in gterm) or (gterm in pterm):
                                predicted_correct_idx = True
                                predicted_correct += 1
                        else:
                            pass
                
        some_matched[idx] = predicted_correct_idx
    elif (len(pred_terms) == 0) and (len(gold_terms) == 0):
        predicted_correct += 1
        some_matched[idx] = True

In [None]:
total = 0
for k in some_matched:
    if some_matched[k]:
        total += 1
total

In [None]:
count

In [None]:
predicted_correct

In [None]:
precision = predicted_correct / count
precision

In [None]:
recall = predicted_correct/count
recall

In [None]:
f1 = 2 / (1/precision + 1/recall)
f1

### Explain train query in chain of thought and few-shot learning

In [None]:
train.columns

In [None]:
from tqdm import tqdm

train_cot = []

count = 0

for idx, row in tqdm(train.iterrows(), total=train.shape[0]):
    
    #if count > 4:
    #    break
    count += 1
    
    if idx > 290:
        train_query = row['sparql_query']


        msg = """
               Briefly explain the following query in logical steps as a chain of thought. 
               Explain in natural language. 
               Forget what you have about the query before. 
               Assume you are trying to construct the query again. 
               No comments. Output the steps only. 
               Do not include the original query in the explanation. 

               QUERY:{}
               THOUGHT:
        """
        msg = msg.format(train_query)

        response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "you are a helpful assistant focusing on DBpedia. \
                 You will explain SPARQL query in logical steps to help reconstruct the query. "},
                {"role": "user", "content": msg}
                ]
        )


        train_cot.append(response['choices'][0]['message']['content'].\
                                       strip().replace('\n', ' ' ))

In [None]:
train['train_cot']=train_cot

In [None]:
#train.to_csv('../data/QALD/9/data/qald-9-train-with-embeddings-cot.csv', index=None)