In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON
import rdflib
from rdflib import Graph, URIRef, BNode, RDF
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
import openai
import pandas as pd
import google.generativeai as genai
import json

In [2]:
# create empty RDF graph
org_ontology = Graph()
org_ontology.parse("F:/main_proj/era_ontology.ttl")   # parse our complete ontology
endpoint_url ='https://virtuoso.ecdp.tech.ec.europa.eu/sparql'

#GOOGLE_API_KEY=userdata.get('gemini_key')
genai.configure(api_key= "")

In [3]:
# function that excute SPARQL query and return results
def execute_sparql_query(endpoint_url, query):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query().convert()
    return result

In [532]:
#Function to show JSON results in a readable manner
def json_result_to_dataframe(json_result):
    cols = json_result["head"]["vars"]
    data = [
        {key: value["value"] for key, value in row.items()}
        for row in json_result["results"]["bindings"]
    ]
    df = pd.DataFrame(data, columns=cols)
    return df


In [19]:
# Creating a reduced ontology from the original ontology
def create_reduced_ontology():
  sparql_query_classes = "select distinct ?class FROM <http://data.europa.eu/949/graph/rinf> where { ?subj a ?class . }"
  sparql_query_properties = "select distinct ?pred FROM <http://data.europa.eu/949/graph/rinf> where { ?subj ?pred ?obj . }"

  s1 = execute_sparql_query(endpoint_url, sparql_query_classes )    #returns JSON format
  s2 = execute_sparql_query(endpoint_url, sparql_query_properties)  #returns JSON Formet

  relevant_classes = []
  for item in s1['results']['bindings']:
    x = item.get("class", {}).get("value", "")
    relevant_classes.append(x)

  relevant_properties = []
  for item in s2["results"]["bindings"]:
    x = item.get("pred", {}).get("value", "")
    relevant_properties.append(x)
  
  reduced_graph = Graph()
  r_classes = Graph()
  r_prop = Graph() 
  
  for classes in relevant_classes:
    r_classes += org_ontology.triples((URIRef(classes),None,None))
    reduced_graph += org_ontology.triples((URIRef(classes),None,None))  #.triples() function returns triples that matches the given pattern.
  
  for prop in relevant_properties:
    for s,p,o in org_ontology:
        if p == URIRef(prop) and not isinstance(s,BNode):
            reduced_graph.add((s,p,o))
            r_prop.add((s,p,o))
  
  r_classes.serialize(destination="F:/main_proj/reduced_classes.ttl", format="turtle")
  r_prop.serialize(destination="F:/main_proj/reduced_properties.ttl", format="turtle")
  reduced_graph.serialize(destination="F:/main_proj/reduced_graph.ttl", format="turtle")
 
  return reduced_graph

In [20]:
# build vocabulary of main classes, class instances and properties from the reduced graph.
def extract_ontology_structure(reduced_graph):
    
    main_classes = set()  
    properties = set()
    instances_of_class = set()

    for s, p, o in reduced_graph:
            if p == RDF.type and o != rdflib.RDFS.Class: #check if the object is class of main ontology and not a general RDFS:class
                main_classes.add(str(o))
                instances_of_class.add(str(s))
            else:
                properties.add(str(p))

    ontology_structure = {}
    ontology_structure["classes"] = list(main_classes)
    ontology_structure["properties"] = list(properties)
    ontology_structure["instances"] = list(instances_of_class)
    
    return ontology_structure

In [22]:
reduced_graph = create_reduced_ontology()
ontology_information = extract_ontology_structure(reduced_graph)

In [15]:
def MyVocab_prompt(question, ontology_information): #working perfenctly with temperture 0.4
    
    prompt = (
      f"Consider the below ontology Information:\n"
      f"{ontology_information}\n"
      f"Based on the given ontology above, generate a SPARQL query for the question delimited by triple backticks taking into consideration the below restrictions:"
      f"-Only use classes, properties and instances URIs defined in the ontology_information provided."
      f"-use full URI."
      f"-Use simple letters such as x for variables."
      f"-Always include the \"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\" in your answer."
      f"Question: ```{question}```")
    return prompt

In [16]:
#prompt with reduced graph
def prompt_ntGraph(question,reduced_graph):
  rg = reduced_graph.serialize(format='turtle')
  prompt = (
      f"Given the following RDF graph serialized in Turtle format: \n{rg}\n"
      f"Generate a correct SPARQL query for the question delimited by Parentheses following the below restrictions:"
      f"-Only use classes, properties and instances defined in the RDF graph.\n"
      f"-Do not use any modifiers unless it is instrcuted\n"
      f"-Always Use namespaces prefix bindings."
      f"-Always include prefixes and namespaces used in the RDF in your response.\n"
      f"-Always include the \"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\" in your answer."
      f"-Always include the \"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \" in your answer."
      f"Question: ({question})"
  )
  return prompt

In [17]:
def structured_prompt(question, reduced_graph):
    rg = reduced_graph.serialize(format = 'turtle')
    prompt = f"""
    Consider the RDF graph:\n {rg}
    Generate a SPARQL query for the given question: {question}.
    Instructions to Follow:
    -Always Use namespaces prefix bindings.
    -Always include prefixes and namespaces used in the RDF in your response.
    -Always include the \"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\" in your answer.
    -Always include the \"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \" in your answer.
    """
    return prompt

#- Do not use modifiers unless it is instrcuted.

In [10]:
# API to chatgpt 3.5 Turbo
def openai_api(prompt):
  completion =[]
  openai.api_key = ""

  completion = openai.chat.completions.create(
      model="gpt-3.5-turbo",
      temperature = 0.4,
      messages=[
          {"role":"user", "content": prompt}
      ]
  )
  return completion

In [249]:
# API To Gemini
def gemini_api(prompt):

  model = genai.GenerativeModel('gemini-pro')
  response = model.generate_content(prompt,generation_config=genai.types.GenerationConfig(
      candidate_count=1,
      max_output_tokens=1000,
      temperature=0.4)
  )
  return response

In [247]:
nl_input = "what is the maximum temperture range for the track with label TRL1?"

In [248]:
questions_list = ["Please give me a list of all operational points.",
"Please give me a list of distinct train detection systems.",
"Please give me a list of all national railway lines.",
"Please give me a list of all national railway lines, Limit to 100 results.",
"Please give me a list of all national railway lines, return the name of the resource only, Limit to 100 results.",
"Please give me all operational points names and then filter results to find all operational points in Bournemouth.",
"give me a list of all tunnel names.",
"give me the length of all section of lines, return section of line label and corresponding length.",
"give me the Unique OP ID for the operational point Siding_BBD9.",
"Please give me a list of distinct train detection systems.",
"what is the maximum temperture range for the track with label TRL1.",
"give me a list of all tracks that have maximum temperture range of 40.",
"in which country is the operational point Rakenduspunkt Kiisa is located?.",
"which country has the largest number of operational points?.",
"which country has the largest number of tunnels?.",
"Get me all information about operational point Siding_BBD9.",
"Give me the country of operational point labeled as Siding_BBD9.",
"list all siding values for the Siding_BBD9 operational point.",
"Please give me all section of lines and then filter results to find bournemouth.",
"which country has the longest length of section of lines?."
]

In [None]:
#Generate in context Prompts to be fed to LLM

prompt1 = MyVocab_prompt(nl_input,ontology_information)
prompt2 = prompt_ntGraph(nl_input,reduced_graph)
prompt3 = structured_prompt(nl_input,reduced_graph)

prompt_list = [prompt1,prompt2,prompt3]

# run prompt_list against using openAI API
print("# " + nl_input)
print("# gpt-3.5-turbo")
for item in prompt_list:
 answer = openai_api(item)
 print(answer.choices[0].message.content)
 print("----------------------------------------------\n")

# run prompt_list against using Gemini API
print("# " + nl_input)
print("# Gemini" )
response_list = []
for item in prompt_list:
  response = gemini_api(item)
  response_list.append(response.text)

  print(response.text)
  print("----------------------------------------------\n")
