# SPICE Predictions

In [1]:
import json
import pandas as pd
from typing import Dict, List, Tuple, Callable
from dotenv import load_dotenv
import numpy as np
import pathlib
import os
from Models import *
from Prompts import *
from Requests import *
from Entity_Resolver import *
from Database_Query import *
from Dataset_stats import *
from Import import get_file_paths_pathlib, get_file_paths
from Export import append_to_dataframe_and_export_pathlib

load_dotenv()

True

## Check Setup

### Import Dataset

In [24]:
input_folder_path = pathlib.PurePath('./SPICE_dataset_pp/test/QA_0')
file_name = 'QA_90'
spice_df = pd.read_csv(pathlib.PurePath(input_folder_path, f"{file_name}.csv"))

spice_df.head()

Unnamed: 0,turnID,question_type_id,question_type,description,speaker,entities_in_utterance,relations,type_list,utterance,all_response_entities,sparql_query
0,test#QA_0#QA_90#0,1.0,Simple Question (Direct),Simple Question,USER,{'Q3035075': 'Dominique Colas'},{'P69': 'educated at'},{'Q41176': 'building'},What is the building where Dominique Colas was...,{},
1,test#QA_0#QA_90#0,,,,SYSTEM,{'Q3268957': 'Lycée Thiers'},{},{},Lycée Thiers,{'Q3268957': 'Lycée Thiers'},SELECT ?x WHERE { wd:Q3035075 wdt:P69 ?x . ?x ...
2,test#QA_0#QA_90#1,2.0,Simple Question (Coreferenced),Simple Question|Single Entity|Indirect,USER,{'Q3035075': 'Dominique Colas'},{'P184': 'doctoral advisor'},{'Q502895': 'common name'},Who has that one as doctor advisor ?,{},
3,test#QA_0#QA_90#1,,,,SYSTEM,{'Q15943337': 'Alexandra Goujon'},{},{},Alexandra Goujon,{'Q15943337': 'Alexandra Goujon'},SELECT ?x WHERE { ?x wdt:P184 wd:Q3035075 . ?x...
4,test#QA_0#QA_90#2,5.0,Verification (Boolean) (All),"Verification|3 entities, 2 direct, 2(direct) a...",USER,"{'Q15943337': 'Alexandra Goujon', 'Q80721': 'C...",{'P27': 'country of citizenship'},{'Q15617994': 'designation for an administrati...,Is that person a citizen of Camerota and Prato...,{},


### Call LLM APIs

#### Local Server (Vicuna/LLaMA)

In [None]:
def convert_question_to_sparql_local_server(model: ModelType, dataframe: pd.DataFrame, question_index: int, print_prompt_template: bool = False) -> Tuple[str, float]:
    prompt_template = get_few_shot_chat_history_prompt(dataframe, question_index)
    
    if print_prompt_template:
        print(f"Prompt template: {prompt_template}")

    return send_to_local_server_chat(prompt_template, model.value, max_tokens=128)
     
model = ModelType.VICUNA
question_index = 6
sparql_query = spice_df.iloc[question_index + 1]['sparql_query']
result = convert_question_to_sparql_local_server(model, spice_df, question_index, True)
print(f"""
Output: {result[0]}
Execution time: {result[1]}
""")
print(f"Expected output: {sparql_query}")

#### OpenAI API

In [20]:
def convert_question_to_sparql_openai(model: ModelType, dataframe: pd.DataFrame, question_index: int, print_prompt_template: bool = False) -> Tuple[str, float]:
    prompt_template = get_few_shot_chat_history_prompt(dataframe, question_index, True)
    
    if print_prompt_template:
        print(f"Prompt template: {prompt_template}")

    return send_to_openai_chat(prompt_template, model.value, max_tokens=128)
    
model = ModelType.GPT3
question_index = 0
sparql_query = spice_df.iloc[index + 1]['sparql_query']
result = convert_question_to_sparql_openai(model, spice_df, question_index, True)
print(f"""
Output: {result[0]}
Execution time: {result[1]}
""")
print(f"Expected output: {sparql_query}")

Prompt template: [{'role': 'system', 'content': "Generate a SPARQL query that answers the given 'Input question:'. Use 'Entities:', 'Relations:' and 'Types:' specified in the prompt to generate the query. The SPARQL query should be compatible with the Wikidata knowledge graph. Prefixes like 'wdt' and 'wd' have already been defined. No language tag is required. Use '?x' as variable name in the SPARQL query. Remember to provide only a SPARQL query in the response without any notes, comments, or explanations."}, {'role': 'user', 'content': "Input question: Is New York City the place of death of Cirilo Villaverde ?\nEntities: {'Q727043': 'Cirilo Villaverde', 'Q60': 'New York City'}\nRelations: {'P20': 'place of death'}\nTypes: {'Q56061': 'administrative territorial entity'}"}, {'role': 'assistant', 'content': 'SPARQL query: ASK { wd:Q727043 wdt:P20 wd:Q60 .  }'}, {'role': 'user', 'content': "Input question: How many works of art express Michael Jordan or pain ?\nEntities: {'Q41421': 'Micha

## Automatic Predictions According to Question Category Distribution

In [8]:
# Path to the full test dataset
distribution_path = pathlib.PurePath("../SPICE_dataset/test")
# Path to the subset of the test dataset
input_path = pathlib.PurePath('./SPICE_dataset_pp/test/')
# Path to the output folder for the predictions
output_path = pathlib.PurePath('./Results/csv/Predictions/Test')

# Choose a model to use
model = ModelType.LORA

# Choose a prompt generator function according to the prompt type
# Note: use system prompt true for OpenAI and false for local server (for the local server, the system message is added in the FastChat library code: fastchat/conversation.py)
prompt_type = 'zero-shot-chat-history' # 'zero-shot-chat-history' or 'few-shot-chat-history'
prompt_generator = lambda df, x: get_zero_shot_chat_history_prompt(df, x, False)

# Choose the size of the subset of the test set
sample_size = 1500

### Helper Functions

In [3]:
def extract_folder_file_name(input_file: pathlib.Path) -> Dict[str, str]:
    ''' Extracts the last folder and file name from a given path '''
    return f"{input_file.parent.name}", f"{input_file.stem}"

# file_path = pathlib.PurePath('./SPICE_dataset_pp/test/QA_0/QA_90.csv')
# print(extract_folder_file_name(file_path))

In [4]:
def get_predicted_files(path: pathlib.Path, model: ModelType, prompt_type: str):
    ''' Returns a list of file paths in the folder (path) for a given model and prompt type '''
    file_paths = get_file_paths_pathlib(path)
    # Exclude all file paths that do not contain the model name and promtp type
    file_paths = [file_path for file_path in file_paths if model.value in file_path.parts[-1] and f"{prompt_type}.csv" in file_path.parts[-1]]
    return file_paths

# test_path = pathlib.PurePath('./Results/csv/Predictions/Test')
# test_model = ModelType.LORA
# test_prompt_type = 'zero-shot-chat-history'
# print(len(get_predicted_files(test_path, test_model, test_prompt_type)))

In [5]:
def has_prediction_been_made(input_file: pathlib.Path, predicted_files: List[pathlib.Path], turnId: str) -> bool:
    ''' Returns True if a prediction has already been made for the given turnId, otherwise False '''
    input_folder_file_name = extract_folder_file_name(input_file)

    for predicted_file in predicted_files:
        if input_folder_file_name[0] in predicted_file.parts and input_folder_file_name[1] in predicted_file.parts[-1]:
            
            # Check if the turnId is contained in the predicted_file
            dataframe = pd.read_csv(predicted_file)
            # Check if column turnID contains turnId
            if turnId in dataframe['turnID'].values:
                return True
    return False

# test_input_file = pathlib.PurePath('./SPICE_dataset_pp/test/QA_0/QA_90.csv')
# test_predicted_files = get_predicted_files(pathlib.PurePath('./Results/csv/Predictions/Test'), ModelType.GPT3, 'zero-shot-chat-history')
# test_turnId = 'test#QA_0#QA_90#0'
# print(has_prediction_been_made(test_input_file, test_predicted_files, test_turnId))

In [6]:
def create_prediction_for_subcategory(subcategory: str, missing_predictions: int, model: ModelType, input_path: pathlib.Path, output_path: pathlib.Path, prompt_generator: Callable[[str], str], prompt_type: str):
    ''' Creates missing predictions for a given question subcategory and exports them to the output folder '''
    # Change the max tokens if needed
    max_tokens = 128
    predicted_files = get_predicted_files(output_path, model, prompt_type)
    input_files = get_file_paths_pathlib(input_path)

    for input_file in input_files:
        if missing_predictions <= 0:
            break

        dataframe = pd.read_csv(input_file)

        for conv_index, row in dataframe.iterrows():
            question_type = row['question_type']
            question_description = row['description']
            turnId = row['turnID']

            if type(question_type) != str or question_type == '':
                continue

            if type(question_description) == str and question_description != '':
                question_type += f" [{question_description}]"

            # Check if this conversational turn is of concern
            if question_type != subcategory:
                continue

            # Check if this prediction has already been made
            if has_prediction_been_made(input_file, predicted_files, turnId):
                continue

            # Create the prediction
            print(f"Creating prediction for {turnId}")
            prompt_template = prompt_generator(dataframe, conv_index)
            folder_file_name = extract_folder_file_name(input_file)
            folder_name = folder_file_name[0]
            file_name = folder_file_name[1]
            export_path = pathlib.Path(output_path.joinpath(folder_name))
            start_time = time.time()

            if (model is ModelType.LLAMA or model is ModelType.VICUNA or model is ModelType.LORA):
                response = send_to_local_server_chat(prompt_template, model.value, max_tokens=max_tokens)
            elif (model is ModelType.GPT3):
                response = send_to_openai_chat(prompt_template, model.value, max_tokens=max_tokens)

            prediction = response[0]
            execution_time = response[1]
            append_to_dataframe_and_export_pathlib(dataframe, conv_index, prediction, execution_time, model, prompt_type, export_path, file_name)

            print(f"Finished prediction for {turnId} in {time.time() - start_time}s")
            missing_predictions -= 1

            # Export desired response
            if (conv_index + 1 < len(dataframe)):
                append_to_dataframe_and_export_pathlib(dataframe, conv_index + 1, "", 0, model, prompt_type, export_path, file_name)
    
            if missing_predictions <= 0:
                break
    
    if missing_predictions > 0:
        print(f"Could not find enough samples for {subcategory} ({missing_predictions} missing)")

# create_prediction_for_subcategory('Logical Reasoning (All) [Logical|Difference|Single_Relation]', 1, model, input_path, output_path, prompt_generator, prompt_type)

### Check Required and Existing Predictions for each Question Sub-Category

In [12]:
required_samples = count_required_predictions_per_subcategory(sample_size, distribution_path)
print(f"Required samples: {required_samples}")
print("Amount of required samples: ", sum(required_samples.values()))

missing_samples = count_missing_predictions_per_subcategory(sample_size, distribution_path, output_path, model, prompt_type)
print(f"Missing samples: {missing_samples}")
# print length of all samples that have a value other than 0
print("Amount of categories with missing samples: ", len([x for x in missing_samples.values() if x != 0]))
print("Amount of missing samples: ", sum(missing_samples.values()))

Required samples: {'Logical Reasoning (All) [Logical|Difference|Single_Relation|Incomplete]': 0, 'Logical Reasoning (All) [Logical|Difference|Single_Relation]': 1, 'Quantitative Reasoning (All) [Quantitative|Min/Max|Single entity type]': 2, 'Logical Reasoning (All) [Logical|Intersection|Single_Relation|Incomplete]': 2, 'Comparative Reasoning (Count) (All) [Comparative|Count over More/Less|Single entity type|Indirect]': 2, 'Logical Reasoning (All) [Logical|Difference|Multiple_Relation]': 2, 'Comparative Reasoning (All) [Comparative|More/Less|Single entity type|Indirect]': 2, 'Clarification [Comparative|More/Less|Single entity type|Indirect]': 3, 'Clarification [Comparative|Count over More/Less|Single entity type|Indirect]': 3, 'Comparative Reasoning (Count) (All) [Comparative|Count over More/Less|Mult. entity type|Indirect]': 3, 'Comparative Reasoning (All) [Comparative|More/Less|Mult. entity type|Indirect]': 3, 'Clarification [Comparative|More/Less|Mult. entity type|Indirect]': 3, 'Cla

### Create All Missing Predictions

In [None]:
def create_predictions_for_distribution(sample_size: int, distribution_path: str, model: ModelType, input_path: str, output_path: str, prompt_generator: Callable[[str], str], prompt_type: str):
    missing_samples = count_missing_predictions_per_subcategory(sample_size, distribution_path, output_path, model, prompt_type)
    
    for _, (missing_sample, missing_predictions) in enumerate(missing_samples.items()):
        print(f"Creating {missing_predictions} predictions for {missing_sample}")
        
        create_prediction_for_subcategory(missing_sample, missing_predictions, model, input_path, output_path, prompt_generator, prompt_type)

create_predictions_for_distribution(sample_size, distribution_path, model, input_path, output_path, prompt_generator, prompt_type)