# Setup and Config

In [1]:
def load_file_to_string(file_path):
  """Loads the content of a file into a string.

  Args:
    file_path: The path to the file.

  Returns:
    The content of the file as a string.
  """
  with open(file_path, 'r') as file:
    file_contents = file.read()
  return file_contents

In [None]:
# Define the file path and load its content into the 'schema' variable
file_path = 'Axiomatization/Complex/Axioms.txt'
file_string = load_file_to_string(file_path)
schema = file_string

In [3]:
# Define a list of competency questions (CQs)
CQs = [
    "What are all the hazard events, their name, start date, end date?",
    "What are all the places, their names?"
    ]

In [4]:
# Define a list of competency questions (CQs)
def fill_prompt_template(template_text, values_dict):
    for key, value in values_dict.items():
        template_text = template_text.replace(f"{{{key}}}", value)
    return template_text

In [1]:
# Temperature Config for LLM
temperature = 0.8

In [5]:
# Define the initial system message for the language model
initial_system_message = """"
You are an expert in knowledge graphs and SPARQL query generation. Your task is to generate SPARQL queries based on provided competency questions and a given schema.

Guidelines:
Use the schema provided in the context block to determine appropriate classes, properties, and relationships.
 - Ensure queries follow SPARQL syntax and use prefixes correctly.
 - Generate queries that efficiently retrieve relevant data while optimizing performance.
 - If multiple valid queries exist, choose the most concise and efficient one.
 - Preserve the intent of the competency question while ensuring syntactic correctness.
 - Give only the SPARQL query and nothing else
"""

In [6]:
# Define the template prompt for the language model
template_prompt = """
Task:
Write a SPARQL query that answers the following competency question:
{Insert_CQ_here}

Requirements:
- Use the schema to determine correct URIs and relationships.
- Ensure the query retrieves the necessary information efficiently.
- Provide the full SPARQL query without placeholders.

Context:
Below is the schema of a knowledge graph:
{Insert_schema_here}
"""

# Mistral

In [7]:
# Set the Mistral API key
mistral_api_key = ""

In [8]:
!pip install mistralai



In [9]:
# Import necessary libraries for Mistral
import os
from mistralai import Mistral

In [10]:
# Initialize the Mistral client
mistral_client = Mistral(api_key=mistral_api_key)

In [11]:
# Define a function to perform inference with Mistral
def inference_with_mistral( prompt, model = "mistral-large-2411"):
  """Performs inference with the Mistral language model.

  Args:
    prompt: The prompt string.
    model: The name of the Mistral model to use.

  Returns:
    A tuple containing the model's response and the raw response data.
  """
  mistal_messages = []
  mistal_messages.append({"role": "system", "content": initial_system_message})
  mistral_prompt = prompt
  mistal_messages.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = mistal_messages,
    temperature = temperature
)

  mistal_messages.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [12]:
# Import necessary libraries for data processing
import pandas as pd
import time

# Iterate through the competency questions and perform inference with Mistral
cq_mistral_results = []
for cq in CQs:
  print("*"*10)
  input_data = {
        "Insert_CQ_here": cq,
        "Insert_schema_here": schema,
    }
  filled_prompt = fill_prompt_template(template_prompt, input_data)
  mistral_analysis_result, mistral_analysis_raw = inference_with_mistral(filled_prompt)
  time.sleep(1)
  cq_mistral_results.append((cq, filled_prompt, mistral_analysis_result, mistral_analysis_raw))
  print(cq)
  print("*"*10)
  print("\n\n\n")

# Create a pandas DataFrame from the results
df = pd.DataFrame(cq_mistral_results, columns=['CQ', 'Prompt', 'Mistral_Analysis_Result', 'Mistral_Analysis_Raw'])

# Save the results to an Excel file
excel_file_path = f'{file_path.split("/")[-1]}_cq_mistral_results.xlsx'
df.to_excel(excel_file_path, index=False)

print(f"Excel file saved to: {excel_file_path}")

**********
What are all the hazard events, their name, start date, end date?
**********




**********
What are all the places, their names?
**********




Excel file saved to: cq_mistral_results.xlsx


# Gemini

In [13]:
# Set the Gemini API key
gemini_api_key = ""

In [14]:
# Import necessary libraries for Gemini
from google import genai
from google.genai import types

# Initialize the Gemini client
gemini_client = genai.Client(api_key=gemini_api_key)

In [15]:
# Define a function to perform inference with Gemini
def inference_with_gemini(prompt, model = "gemini-2.0-flash"):
  """Performs inference with the Gemini language model.

  Args:
    prompt: The prompt string.
    model: The name of the Gemini model to use.

  Returns:
    A tuple containing the model's response and the raw response data.
  """
  gemini_prompt = prompt
  response = gemini_client.models.generate_content(
      model=model,
      config=types.GenerateContentConfig(
          system_instruction=initial_system_message,
          temperature=temperature
          ),
      contents=gemini_prompt
      )
  return response.text, response.model_dump_json(indent = 4)

In [16]:
# Import necessary libraries for data processing
import pandas as pd
import time

# Iterate through the competency questions and perform inference with Gemini
cq_gemini_results = []
for cq in CQs:
  print("*"*10)
  input_data = {
        "Insert_CQ_here": cq,
        "Insert_schema_here": schema,
    }
  filled_prompt = fill_prompt_template(template_prompt, input_data)
  gemini_analysis_result, gemini_analysis_raw = inference_with_gemini(filled_prompt)
  time.sleep(1)
  cq_gemini_results.append((cq, filled_prompt, gemini_analysis_result, gemini_analysis_raw))
  print(cq)
  print("*"*10)
  print("\n\n\n")

# Create a pandas DataFrame from the results
df = pd.DataFrame(cq_gemini_results, columns=['CQ', 'Prompt', 'Gemini_Analysis_Result', 'Gemini_Analysis_Raw'])

# Save the results to an Excel file
excel_file_path = f'{file_path.split("/")[-1]}_cq_Gemini_results.xlsx'
df.to_excel(excel_file_path, index=False)

print(f"Excel file saved to: {excel_file_path}")

**********
What are all the hazard events, their name, start date, end date?
**********




**********
What are all the places, their names?
**********




Excel file saved to: cq_Gemini_results.xlsx
