In [1]:
import pandas as pd
import os, glob

from owlready2 import *
import owlready2
print(owlready2.VERSION)


import importlib.util
import sys
spec = importlib.util.spec_from_file_location("rdfutils", "../../../utils/rdfutils.py")
u = importlib.util.module_from_spec(spec)
sys.modules["rdfutils"] = u
spec.loader.exec_module(u)

from datetime import datetime

def NOW():
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    return "Current Time = "+ str(current_time)

%load_ext autoreload
%autoreload 2




0.40


In [4]:
from dotenv import load_dotenv
load_dotenv('.env')
import json, requests
import importlib.util
import sys

spec = importlib.util.spec_from_file_location("llm", "../../../utils/llm.py")
h = importlib.util.module_from_spec(spec)
sys.modules["llm"] = h
spec.loader.exec_module(h)

URL = os.getenv("KG_URL_FCT")
TOK3N = os.getenv("KG_TOKEN")

In [2]:
onto = get_ontology("WIP.owl").load()
dIDct = u.createDict(onto) 

PBNThing
BenefReturn
Benef
Article
Risk
ISO_Scale
RiskHealth
RiskType
Stakeholder
Stakeholder_Type
Technology
ISO_Impact
ISO_Purpose
StakeholderGroup
TechGroup
Mitigation
BP_Enabler
BP_Transmission
Blueprint
BP_Scale
BP_Phase
BP_Permanent
BP_Type
BP_Intervention
PBNCategory
RiskGroup
CAO_item
BP_Theme
BP_Category
aBlueprint


In [3]:
comments = u.checkComments(onto)

ID: 0 	 Author: Luc Jonveaux
ID: 1 	 License: CC BY-NC-SA
ID: 2 	 Language: English
ID: 3 	 Project: PROBONO
ID: 4 	 Task: T3.5
ID: 5 	 Description: Creation of a knowledge graph based on a litterature review, augmented by use of LLMs.
ID: 6 	 Changes from: 0.0
ID: 7 	 Changes: Creation of the knowledge graph
ID: 8 	 Changes from: 0.1
ID: 9 	 Changes: Adding relations, simplified ontology, closest items, and descriptions
ID: 10 	 Repository: https://github.com/mm80843/T3.5/
ID: 11 	 TODOs: Quite a number! Creating metaRisks, making it accessible..
ID: 12 	 Creation: 11/11/2023
ID: 13 	 Version: 0.3
ID: 14 	 VersionComment: New blueprints added in 0.3
ID: 15 	 Library: owlready2==0.45


# Testing custom SPARQL queries

In [102]:
from IPython.display import display, Markdown

In [108]:
def EX(REQ):
    return list(default_world.sparql(REQ))
    
def ask(QUESTION,prefix="sparql",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed=""):
    CONTEXT  = "You are an expert in the sparql language."
    CONTEXT += "You will have to provide  a sparql request that counts the number of classes in a knowledge graph, \
          please answer with the request as between ```sparql ``` tags, and then provide details of how the request is built."
    REQ = {
        "context": CONTEXT,
        "question": QUESTION,
        "model": MODEL,
        "token": TOK3N,
        "overwrite": overwrite,
        "source": "local-sparqlqueries",
        "seed" : seed
    }

    H = h.hashme(CONTEXT+QUESTION+seed)
    cached = "cache/"+prefix+"_"+H+".json"
    if not os.path.isfile(cached) or overwrite:
        x = requests.post(URL+"ask/", json = REQ)
        #print(x.text)
        answer = json.loads(x.text)["answer"]
        h.svt(cached,answer)
    else:
        answer = h.ldt(cached)
    A = [x.replace("sparql\n","").strip() for x in answer.split("```") if len(x)]

    display(Markdown((A[-1])))
    display(Markdown("--------"))
    display(Markdown("```sparql\n"+(A[-2])+"```"))
    display(Markdown("--------"))
    B = EX(A[-2])
    return B

In [119]:
specifics ="""The ontology we are reviewing has several classes:
* 'Risk': A specific risk
* 'Mitigation': A mitigation against a risk
* 'Technology': A technology used to mitigate a risk
* 'TechGroup' : A category used to classify Technology
* 'Stakeholder': People related to risks
* 'Article': Sources of information
They are linked using the following properties:
* A 'Risk' has a 'Mitigation' : noted with the property : 'has_RiskMitigation' 
* A 'Risk' has a 'Technology' : noted with the property : 'has_RiskTechnology'
* A 'Risk' impacts a 'Stakeholder' : noted with the property : Risk->'has_RiskSubject'->Stakeholder . Inverse is 'has_SubjectRisk' .
* A 'Risk' can be mitigated by a 'Stakeholder' :  noted with the property : 'has_RiskOwner'
* A 'TechGroup' contains 'Technology', noted with the property:  Technology->'has_TechGroup'->TechGroup . Inverse is 'has_GroupTech' .
Note that all classes have capital letters as part of their names.

This ontology can be added in the sparql request introducing:
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>
"""

In [109]:
A = ask(specifics+"How do I count the number of classes",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="")
A

In the given ontology, the classes are represented by instances of the rdf:type predicate. To count the number of classes, the Sparql query uses the "SELECT" and "COUNT" keywords to count the distinct instances of rdf:type associated with the ontology's namespace. The PREFIX command is then used to define the namespace for the ontology, and the "WHERE" clause is utilized to specify the pattern of the triple. The FILTER clause is included to ensure that the queried instances belong to the specified namespace.

--------

```sparql
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT (COUNT(DISTINCT ?class) AS ?classCount)
WHERE {
  ?s a ?class 
  FILTER(REGEX(STR(?class), "pbn", "i"))
}```

--------

[[29]]

In [110]:
A = ask(specifics+"How do I count how many Technology belong to each TechGroup, and sort in ascending order (largest TechGroup first)?",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="3") # gpt-4-0613
A

In this SPARQL request, we use the `PREFIX` declaration to define the namespace `pbn` as the ontology's URI. The `SELECT` statement is used to select the variables we are interested in, which in this case are `?techGroup` and the count of `?technology` as `?count`. 

The `WHERE` clause is used to specify the patterns to match against the data. Here, we're looking for instances of `pbn:Technology` that have a relationship with their corresponding `?techGroup` using the property `pbn:has_TechGroup`.

The `GROUP BY` clause is used to group the results by the `?techGroup`, and the `COUNT` function is used to count the number of instances for each group.

Finally, the `ORDER BY` clause is used to sort the results in descending order based on the count of technologies in each group.

--------

```sparql
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT ?techGroup (COUNT(?technology) as ?count)
WHERE {
  ?technology a pbn:Technology ;
              pbn:has_TechGroup ?techGroup .
}
GROUP BY ?techGroup
ORDER BY DESC(?count)```

--------

[[WIP.PBN__TechGroup_4, 5805],
 [WIP.PBN__TechGroup_15, 3326],
 [WIP.PBN__TechGroup_2, 2131],
 [WIP.PBN__TechGroup_28, 2018],
 [WIP.PBN__TechGroup_6, 1814],
 [WIP.PBN__TechGroup_37, 1685],
 [WIP.PBN__TechGroup_30, 1271],
 [WIP.PBN__TechGroup_46, 1130],
 [WIP.PBN__TechGroup_0, 1062],
 [WIP.PBN__TechGroup_7, 877],
 [WIP.PBN__TechGroup_33, 716],
 [WIP.PBN__TechGroup_60, 555],
 [WIP.PBN__TechGroup_44, 472],
 [WIP.PBN__TechGroup_13, 333],
 [WIP.PBN__TechGroup_51, 265],
 [WIP.PBN__Technology_2164, 197],
 [WIP.PBN__Technology_2100, 176],
 [WIP.PBN__Technology_1316, 136],
 [WIP.PBN__Technology_2226, 128],
 [WIP.PBN__Technology_2270, 122],
 [WIP.PBN__Technology_2274, 108],
 [WIP.PBN__Technology_2215, 101],
 [WIP.PBN__Technology_572, 96],
 [WIP.PBN__Technology_277, 94],
 [WIP.PBN__Technology_0, 90],
 [WIP.PBN__Technology_896, 85],
 [WIP.PBN__Technology_1447, 84],
 [WIP.PBN__Technology_231, 76],
 [WIP.PBN__Technology_2255, 67],
 [WIP.PBN__Technology_2191, 61],
 [WIP.PBN__Technology_2238, 58],
 [W

In [122]:
A = ask(specifics+"How do I identify the TechGroup, and its label, that has the most Technology items linked to a Risk?",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="5")
A

{"answer":"```sparql\nPREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>\n\nSELECT ?techGroup (COUNT(?technology) AS ?numTechnologies)\nWHERE {\n  ?risk a pbn:Risk ;\n        pbn:has_RiskTechnology ?technology .\n  ?technology pbn:has_TechGroup ?techGroup .\n}\nGROUP BY ?techGroup\nORDER BY DESC(?numTechnologies)\nLIMIT 1\n```\n\nThis SPARQL query is identifying the 'TechGroup' and its label that has the most 'Technology' items linked to a 'Risk'. The query consists of the following elements:\n\n1. First, we specify the prefix for our ontology using `PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>`.\n\n2. We then use a SELECT query to retrieve the 'TechGroup' and count the number of 'Technology' items associated with a 'Risk'. We alias the count as `?numTechnologies`.\n\n3. In the WHERE clause, we specify the pattern where a 'Risk' is linked to a 'Technology' through the property `pbn:has_RiskTechnology`, and the 'Technology' is linked to a 'TechG

This SPARQL query is identifying the 'TechGroup' and its label that has the most 'Technology' items linked to a 'Risk'. The query consists of the following elements:

1. First, we specify the prefix for our ontology using `PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>`.

2. We then use a SELECT query to retrieve the 'TechGroup' and count the number of 'Technology' items associated with a 'Risk'. We alias the count as `?numTechnologies`.

3. In the WHERE clause, we specify the pattern where a 'Risk' is linked to a 'Technology' through the property `pbn:has_RiskTechnology`, and the 'Technology' is linked to a 'TechGroup' using the property `pbn:has_TechGroup`.

4. We use the GROUP BY clause to group the results by the 'TechGroup'.

5. We then use the ORDER BY clause to sort the results in descending order based on the count of 'Technology' items.

6. Finally, we use the LIMIT 1 clause to retrieve only the top 'TechGroup' with the most linked 'Technology' items.

--------

```sparql
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT ?techGroup (COUNT(?technology) AS ?numTechnologies)
WHERE {
  ?risk a pbn:Risk ;
        pbn:has_RiskTechnology ?technology .
  ?technology pbn:has_TechGroup ?techGroup .
}
GROUP BY ?techGroup
ORDER BY DESC(?numTechnologies)
LIMIT 1```

--------

[[WIP.PBN__TechGroup_4, 12740]]

In [127]:
A = ask(specifics+"Let's take the 'pbn:PBN__Risk_1', one of the Risk instance. How do I identify the different individual TechGroup to which the Technology instances linked to the Risk (through has_RiskTechnology) ?",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="5")
A

{"answer":"```sparql\nPREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>\n\nSELECT (COUNT(DISTINCT ?class) as ?classCount)\nWHERE {\n    ?s a ?class\n}\n```\n\nIn the SPARQL request provided, the PREFIX statement is used to define a namespace 'pbn' for the ontology. In this case, it is linked to the ontology in question. \nThe SELECT statement is then used to count the number of distinct classes in the knowledge graph. The COUNT and DISTINCT functions are employed within the SELECT statement to achieve this. The COUNT function counts the number of instances found, while the DISTINCT function ensures that only unique instances are counted. The WHERE statement is utilized to specify the pattern to match, which in this case is any instance (?s) that belongs to a class (?class).\nOnce the request is executed against the knowledge graph, it will return the count of distinct classes present in the ontology."}


In the SPARQL request provided, the PREFIX statement is used to define a namespace 'pbn' for the ontology. In this case, it is linked to the ontology in question. 
The SELECT statement is then used to count the number of distinct classes in the knowledge graph. The COUNT and DISTINCT functions are employed within the SELECT statement to achieve this. The COUNT function counts the number of instances found, while the DISTINCT function ensures that only unique instances are counted. The WHERE statement is utilized to specify the pattern to match, which in this case is any instance (?s) that belongs to a class (?class).
Once the request is executed against the knowledge graph, it will return the count of distinct classes present in the ontology.

--------

```sparql
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT (COUNT(DISTINCT ?class) as ?classCount)
WHERE {
    ?s a ?class
}```

--------

[[34]]

In [129]:
A = ask("How do I count the top 5 classes that have the children with most instances?",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="")
A

In the given SPARQL request:
1. We use the `SELECT` clause to select the `?class` and count the instances of its children.
2. The `WHERE` clause fetches all the triples where a child is a class.
3. The `GROUP BY` clause groups the results by the class.
4. The `COUNT` function counts the instances of children for each class.
5. The `ORDER BY` clause orders the results by the count of instances in descending order.
6. Finally, the `LIMIT` clause restricts the results to the top 5 classes with the most instances as children.

--------

```sparql
SELECT ?class (COUNT(?child) as ?count)
WHERE {
  ?child a ?class .
}
GROUP BY ?class
ORDER BY DESC(?count)
LIMIT 5```

--------

[[owl.NamedIndividual, 88050],
 [WIP.Technology, 23103],
 [WIP.Mitigation, 22926],
 [WIP.Risk, 21127],
 [WIP.Stakeholder, 16094]]

In [130]:
A = ask("I have a specific Risk. How can I search for the list of Stakeholder impacted?",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="")

{"answer":"To retrieve the list of stakeholders impacted by a specific risk, you can use the following SPARQL query:\n\n```sparql\nSELECT ?stakeholder\nWHERE {\n  ?risk a <your-ontology-prefix:Risk> ;\n        <your-ontology-prefix:impactsStakeholder> ?stakeholder .\n}\n```\n\nIn this query:\n- `SELECT ?stakeholder` specifies that we want to retrieve the stakeholders.\n- `WHERE` block is used to specify the pattern that the data should match.\n- `?risk` is a variable representing the specific risk.\n- `<your-ontology-prefix:Risk>` is a specific class representing the risk in your ontology.\n- `<your-ontology-prefix:impactsStakeholder>` is a specific property that represents the impact of a risk on stakeholders.\n- `?stakeholder` is a variable representing the stakeholders impacted by the specific risk.\n\nWhen you run this query on your knowledge graph, it will return the list of stakeholders impacted by the specific risk."}


In this query:
- `SELECT ?stakeholder` specifies that we want to retrieve the stakeholders.
- `WHERE` block is used to specify the pattern that the data should match.
- `?risk` is a variable representing the specific risk.
- `<your-ontology-prefix:Risk>` is a specific class representing the risk in your ontology.
- `<your-ontology-prefix:impactsStakeholder>` is a specific property that represents the impact of a risk on stakeholders.
- `?stakeholder` is a variable representing the stakeholders impacted by the specific risk.

When you run this query on your knowledge graph, it will return the list of stakeholders impacted by the specific risk.

--------

```sparql
SELECT ?stakeholder
WHERE {
  ?risk a <your-ontology-prefix:Risk> ;
        <your-ontology-prefix:impactsStakeholder> ?stakeholder .
}```

--------

ValueError: No existing entity for IRI 'your-ontology-prefix:Risk'! (use error_on_undefined_entities=False to accept unknown entities in SPARQL queries)

In [135]:
EX("""PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>
   SELECT ?Stakeholder
WHERE {
  ?risk a pbn:Risk ;
        pbn:has_RiskSubject ?stakeholder .
}""")

ValueError: Cannot select 'VAR:'?Stakeholder''!