### iterate all generated text in a folder

In [55]:
import os

def get_files_in_folder(folder_path):
    """
    Get a list of all files in a folder.

    Args:
        folder_path (str): Path to the folder.

    Returns:
        list: List of file names.
    """
    files = []
    try:
        for entry in os.listdir(folder_path):
            entry_path = os.path.join(folder_path, entry)
            if os.path.isfile(entry_path):
                files.append(entry)
    except FileNotFoundError:
        print(f"The folder '{folder_path}' does not exist.")
    except Exception as e:
        print(f"An error occurred: {e}")
    
    return files

# Example usage:
folder_path = "results/generated_text/llama3.2_3b_instruct_lora"
file_list = get_files_in_folder(folder_path)
print("Files in folder:", file_list[0:5])


Files in folder: ['AQ2094.txt', 'AQ0318.txt', 'AQ1748.txt', 'AQ1953.txt', 'AQ0495.txt']


### using re to clean the generated text and extract the target SPARQL only

In [None]:
# import re

# textfile = 'results/generated_text/llama3.2_3b_instruct_lora/AQ0002.txt'

# # Load the content of the text file
# with open(textfile, 'r') as file:
#     text = file.read()

# # Regular expression to extract text between "Generated SPARQL:" and the second occurrence of "SELECT"
# match = re.search(r"Generated SPARQL:(.*?SELECT.*?)SELECT", text, re.DOTALL)

# # Extract and clean the matched text
# if match:
#     extracted_text = match.group(1).strip()  # Extract up to but not including the second "SELECT"
#     print("Extracted Text:")
#     print(extracted_text)
# else:
#     print("No match found.")

### Iterate all files to extract the SPARQL queries, and save them in a new foldder

In [56]:
import re
import os


folder_path = "results/generated_text/llama3.2_3b_instruct_lora"
file_list = get_files_in_folder(folder_path)

# Loop through each file in the folder
failed_files = []
sparql_folder = "results/clean-sparql/llama3.2_3b_instruct_lora"
for file in file_list:
    file_path = os.path.join(folder_path, file)
    with open(file_path, 'r') as f:
        text = f.read()
        match = re.search(r"Generated SPARQL:(.*?SELECT.*?)SELECT", text, re.DOTALL)
        if match:
            extracted_text = match.group(1).strip()
            print(f"File: {file}\nExtracted Text:\n{extracted_text}\n")
            # save the extracted text to a new file
            new_file_path = os.path.join(sparql_folder, f"{file}")
            with open(new_file_path, 'w') as new_file:
                new_file.write(extracted_text)
        else:
            print(f"No match found in file: {file}\n")
            failed_files.append(file)

No match found in file: AQ2094.txt

File: AQ0318.txt
Extracted Text:
SELECT?model?model_lbl
     WHERE {
      ?dataset       a                orkgc:Dataset;
                      rdfs:label      ?dataset_lbl.
         FILTER (str(?dataset_lbl) = "ImageNet ReaL")
      ?benchmark      orkgp:HAS_DATASET      ?dataset;
                      orkgp:HAS_EVALUATION   ?eval.
        ?eval           orkgp:HAS_VALUE        ?value;
                         rdfs:label              ?value_lbl.
      ?cont         orkgp:HAS_BENCHMARK     ?benchmark.
        ?cont        rdfs:label               ?cont_lbl.
      ?model      orkgp:HAS_MODEL          ?cont;
                   orkgp:P31               ?model.
        ?model      rdfs:label               ?model_lbl.
       } CONTAIN?eval
      UNION

No match found in file: AQ1748.txt

No match found in file: AQ1953.txt

File: AQ0495.txt
Extracted Text:
SELECT?paper?paper_lbl
     WHERE {
      ?dataset       a                orkgc:Dataset;
             

### BLEU

In [None]:
pip install nltk scikit-learn rouge-score


In [57]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge import Rouge

def calculate_bleu(reference, hypothesis):
    """
    Calculate BLEU score between reference and hypothesis.
    """
    smooth = SmoothingFunction().method1
    return sentence_bleu([reference.split()], hypothesis.split(), smoothing_function=smooth)

def calculate_rouge(reference, hypothesis):
    """
    Calculate ROUGE scores (ROUGE-1, ROUGE-2, ROUGE-L).
    """
    rouge = Rouge()
    scores = rouge.get_scores(hypothesis, reference, avg=True)
    return scores


In [None]:
# # read the test_questions.csv file to get the ground truth sparql queries
# import pandas as pd

# # Load the CSV file
# csv_file = 'xueli_data/test_questions.csv'
# df = pd.read_csv(csv_file)
# # print(df.head())

# # given an id, get the corresponding question and sparql query
# id = 'AQ1475'
# question = df[df['id'] == id]['question'].values[0]
# sparql = df[df['id'] == id]['query'].values[0]
# # print(f"Question: {question}\nSPARQL: \n{sparql}")

In [58]:
import pandas as pd


# Load the CSV file with the test questions
csv_file = 'xueli_data/test_questions.csv'
df = pd.read_csv(csv_file)
# print(df.head())

bleu_score_list = []
rouge_1_score_list = []
rouge_2_score_list = []
rouge_l_score_list = []

# Loop through each file in the folder
sparql_folder = "results/clean-sparql/llama3.2_3b_instruct_lora"
sparql_list = get_files_in_folder(sparql_folder)
# print("Files in folder:", file_list[0:5])

# Attension: not all the file in the test_questions.csv are in the clean_sparql folder
for file in sparql_list:
    # Load the content of the text file
    with open(os.path.join(sparql_folder, file), 'r') as f:
        generated_sparql = f.read()
        question_id = file.split('.')[0]
        # print(f'generated_sparql: {generated_sparql}')
    # Get the ground truth SPARQL query
    sparql = df[df['id'] == question_id]['query'].values[0]

    # Calculate BLEU and ROUGE scores
    bleu_score = calculate_bleu(sparql, generated_sparql)
    rouge_scores = calculate_rouge(sparql, generated_sparql)
    rouge_1 = rouge_scores['rouge-1']['f']
    rouge_2 = rouge_scores['rouge-2']['f']
    rouge_l = rouge_scores['rouge-l']['f']

    # Append the scores to the lists
    bleu_score_list.append(bleu_score)
    rouge_1_score_list.append(rouge_1)
    rouge_2_score_list.append(rouge_2)
    rouge_l_score_list.append(rouge_l)

# calculate the average scores
avg_bleu_score = sum(bleu_score_list) / len(bleu_score_list)
avg_rouge_1_score = sum(rouge_1_score_list) / len(rouge_1_score_list)
avg_rouge_2_score = sum(rouge_2_score_list) / len(rouge_2_score_list)
avg_rouge_l_score = sum(rouge_l_score_list) / len(rouge_l_score_list)
print(f"Average BLEU Score: {avg_bleu_score:.2f}")
print(f"Average ROUGE-1 Score: {avg_rouge_1_score:.2f}")
print(f"Average ROUGE-2 Score: {avg_rouge_2_score:.2f}")
print(f"Average ROUGE-L Score: {avg_rouge_l_score:.2f}")

Average BLEU Score: 0.48
Average ROUGE-1 Score: 0.73
Average ROUGE-2 Score: 0.63
Average ROUGE-L Score: 0.65
