In [10]:
import os
import re 
from rdflib import Graph

g = Graph()
g.parse("xueli_data/ORKG RDF dump 14.02.2023.nt", format="nt")
print(len(g))

1133217


In [9]:
import signal

# Define a timeout handler
class TimeoutException(Exception):
    pass


def timeout_handler(signum, frame):
    raise TimeoutException("SPARQL query timed out after 3 minutes.")


# Set the signal for the alarm
signal.signal(signal.SIGALRM, timeout_handler)


# Run the sparql query and get the results
def get_files_in_folder(folder_path):
    """
    Get a list of all files in a folder.

    Args:
        folder_path (str): Path to the folder.

    Returns:
        list: List of file names.
    """
    files = []
    try:
        for entry in os.listdir(folder_path):
            entry_path = os.path.join(folder_path, entry)
            if os.path.isfile(entry_path):
                files.append(entry)
    except FileNotFoundError:
        print(f"The folder '{folder_path}' does not exist.")
    except Exception as e:
        print(f"An error occurred: {e}")
    
    return files


# Loop through each file in the clean_sparql folder
sparql_folder = "results/clean-sparql/llama3.2_3b_lora_terminal"
sparql_list = get_files_in_folder(sparql_folder)
sparql_result_orkg_folder = "results/sparql-result-on-orkg/llama3.2_3b_lora_terminal"
os.makedirs(sparql_result_orkg_folder, exist_ok=True)

prefixes = """
            PREFIX orkgr: <http://orkg.org/orkg/resource/>
            PREFIX orkgc: <http://orkg.org/orkg/class/>
            PREFIX orkgp: <http://orkg.org/orkg/predicate/>
"""

# Attension: not all the file in the test_questions.csv are in the clean_sparql folder
i = 0
failed_files = []
errors = []
for file in sparql_list:
    i += 1
    print(f"Processing file {i}/{len(sparql_list)}")
    # Load the content of the text file
    with open(os.path.join(sparql_folder, file), 'r') as f:
        # remove the ```sparql and ``` from the generated sparql
        generated_sparql = f.read().replace('```sparql', ' ').replace('```', ' ').strip()
        # remove the prefix namespace if it exists
        generated_sparql = re.sub(r'PREFIX.*\n', '', generated_sparql)
        question_id = file.split('.')[0]
        try:
            print(f"Starting for question {question_id}:")
            # Start the timeout timer (3 minutes)
            signal.alarm(180)
            results = g.query(f"{prefixes}\n{generated_sparql}")
            # Cancel the timeout alarm
            signal.alarm(0)
            with open(f"{sparql_result_orkg_folder}/{question_id}.txt", 'w') as f:
                for r in results:
                    f.write(str(r))
                print(f"Done for question {question_id}\n")
        except TimeoutException as e:
            print(f"Timeout: SPARQL query for question {question_id} took longer than 3 minutes and was aborted.")
            failed_files.append(question_id)
            errors.append(e)

        except Exception as e:
            failed_files.append(question_id)
            errors.append(e)
            print(f"Failed to run the sparql query for question {question_id}: \n{e}\n")
            continue
        finally:
            # Ensure the alarm is always disabled after execution
            signal.alarm(0)

Processing file 1/513
Starting for question AQ2094:
Done for question AQ2094

Processing file 2/513
Starting for question AQ0318:
Done for question AQ0318

Processing file 3/513
Starting for question AQ1748:
Done for question AQ1748

Processing file 4/513
Starting for question AQ1953:
Done for question AQ1953

Processing file 5/513
Starting for question AQ0495:
Done for question AQ0495

Processing file 6/513
Starting for question AQ0278:
Done for question AQ0278

Processing file 7/513
Starting for question AQ2309:
Done for question AQ2309

Processing file 8/513
Starting for question AQ0522:
Done for question AQ0522

Processing file 9/513
Starting for question AQ0536:
Done for question AQ0536

Processing file 10/513
Starting for question AQ2321:
Done for question AQ2321

Processing file 11/513
Starting for question AQ0250:
Done for question AQ0250

Processing file 12/513
Starting for question AQ0287:
Done for question AQ0287

Processing file 13/513
Starting for question AQ1827:
Done for

KeyboardInterrupt: 

In [None]:
n = len(failed_files)
with open(f"{sparql_result_orkg_folder}/failed_files.txt", 'w') as f:
    for i in range(n):
        f.write(f"{failed_files[i]}: {errors[i]}\n")