In [2]:
import pandas as pd
import os, glob

from owlready2 import *
import owlready2
print(owlready2.VERSION)


import importlib.util
import sys
spec = importlib.util.spec_from_file_location("rdfutils", "../../../utils/rdfutils.py")
u = importlib.util.module_from_spec(spec)
sys.modules["rdfutils"] = u
spec.loader.exec_module(u)

from datetime import datetime

def NOW():
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    return "Current Time = "+ str(current_time)

%load_ext autoreload
%autoreload 2


0.40




In [3]:
from dotenv import load_dotenv
load_dotenv('.env')
import json, requests
import importlib.util
import sys

spec = importlib.util.spec_from_file_location("llm", "../../../utils/llm.py")
h = importlib.util.module_from_spec(spec)
sys.modules["llm"] = h
spec.loader.exec_module(h)

URL = os.getenv("KG_URL_FCT")
TOK3N = os.getenv("KG_TOKEN")

In [4]:
onto = get_ontology("WIP_w_SPARQL.owl").load()
dIDct = u.createDict(onto) 

In [None]:
comments = u.checkComments(onto)

ID: 0 	 Description: Creation of a knowledge graph based on a litterature review, augmented by use of LLMs.
ID: 1 	 Version: 0.4
ID: 2 	 Library: owlready2==0.45
ID: 3 	 Changes from: 0.3
ID: 4 	 Next: Check formulas for selecting most repeated items in groups
ID: 5 	 Changes: Adding new risks, groups of items, and new relationships
ID: 6 	 Creation: 22/01/2024
ID: 7 	 TODOs: Adding synonyms and solving classification with synonyms
ID: 8 	 VersionComment: Grouping of items added in 0.4
ID: 9 	 Project: PROBONO
ID: 10 	 Next: Linking benefits to mitigations groups
ID: 11 	 License: CC BY-NC-SA
ID: 12 	 Task: T3.5
ID: 13 	 Repository: https://github.com/mm80843/T3.5/
ID: 14 	 Author: Luc Jonveaux
ID: 15 	 Language: English


# Simplified versions

## Only blueprints

In [5]:
pathrdfin  = "WIP_w_SPARQL.owl"
pathrdfout = "../../prod/pbn_t3_5_BP.owl"

ontoOne = owlready2.get_ontology(pathrdfin).load()
ontoOne.metadata.comment[5] = 'limited to blueprints'

with ontoOne:
    for k in ontoOne.classes():
        I = k.instances()
        if len(I):
            K = str(k).split(".")[-1]
            if not( (K.startswith("Blueprint") or K.startswith("BP_") or K == "PBNThing" ) ):
                for i in I:
                    owlready2.destroy_entity(i)
                owlready2.destroy_entity(k)

ontoOne.save(file = pathrdfout)
ontoOne.destroy()
del(ontoOne)
print("File saved at "+pathrdfout)

File saved at ../../prod/pbn_t3_5_BP.owl


In [None]:
pathrdfin  = "WIP_w_SPARQL.owl"
pathrdfout = "../../prod/pbn_t3_5_simple.owl"

ontoBis = owlready2.get_ontology(pathrdfin).load()
ontoBis.metadata.comment[5] = 'limited simple, no blueprints'

banned = ['BP_Enabler',
 'BP_Transmission',
 'Blueprint',
 'BP_Scale',
 'BP_Phase',
 'BP_Permanent',
 'BP_Type',
 'BP_Theme',
 'BP_Category',
 "Benef",
 "BenefReturn",
 "Risk","Mitigation","Technology","Stakeholder"]

with ontoBis:
    for k in ontoBis.classes():
        I = k.instances()
        if len(I):
            K = str(k).split(".")[-1]
            print(K)
            if K in banned:
                print(K,"is banned.")
                for i in I:
                    owlready2.destroy_entity(i)
                owlready2.destroy_entity(k)

ontoBis.save(file = pathrdfout)
ontoBis.destroy()
del(ontoBis)
print("File saved at "+pathrdfout)

NameError: name 'owlready2' is not defined

## Removing Risk, Stakeholder, Mitigations, Benef 

# Testing custom SPARQL queries

## Creating the support helper

In [7]:
from IPython.display import display, Markdown

In [8]:
def EX(REQ):
    return list(default_world.sparql(REQ))
    
def ask(QUESTION,prefix="sparql",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed=""):
    CONTEXT  = "You are an expert in the sparql language."
    CONTEXT += "You will have to provide  a sparql request that counts the number of classes in a knowledge graph, \
          please answer with the request as between ```sparql ``` tags, and then provide details of how the request is built."
    REQ = {
        "context": CONTEXT,
        "question": QUESTION,
        "model": MODEL,
        "token": TOK3N,
        "overwrite": overwrite,
        "source": "local-sparqlqueries",
        "seed" : seed
    }

    H = h.hashme(CONTEXT+QUESTION+seed)
    cached = "cache/"+prefix+"_"+H+".json"
    
    if not os.path.isfile(cached) or overwrite:
        x = requests.post(URL+"ask/", json = REQ)
        #print(x.text)
        answer = json.loads(x.text)["answer"]
        h.svt(cached,answer)
    else:
        answer = h.ldt(cached)
    A = [x.replace("sparql\n","").strip() for x in answer.split("```") if len(x)]
    display(Markdown("### Explanation"))
    display(Markdown((A[-1])))
    display(Markdown("--------\n### Code\n"))
    display(Markdown("```sparql\n"+(A[-2])+"\n```"))
    display(Markdown("--------\n### Request result"))
    B = EX(A[-2])
    return B

In [9]:
specifics ="""The ontology we are reviewing has several classes:
* 'Risk': A specific risk
* 'Mitigation': A mitigation against a risk
* 'Technology': A technology used to mitigate a risk
* 'TechGroup' : A category used to classify Technology
* 'Stakeholder': People related to risks
* 'Article': Sources of information
They are linked using the following properties:
* A 'Risk' has a 'Mitigation' : noted with the property : 'has_RiskMitigation' 
* A 'Risk' has a 'Technology' : noted with the property : 'has_RiskTechnology'
* A 'Risk' impacts a 'Stakeholder' : noted with the property : Risk->'has_RiskSubject'->Stakeholder . Inverse is 'has_SubjectRisk' .
* A 'Risk' can be mitigated by a 'Stakeholder' :  noted with the property : 'has_RiskOwner'
* A 'TechGroup' contains 'Technology', noted with the property:  Technology->'has_TechGroup'->TechGroup . Inverse is 'has_GroupTech' .
Note that all classes have capital letters as part of their names.

This ontology can be added in the sparql request introducing:
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>
"""

## Testing the helper

In [10]:
A = ask(specifics+"How do I count the number of classes",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="")
A

### Explanation

To count the number of classes in the knowledge graph, the SPARQL query uses a SELECT clause with the COUNT aggregate function to count the instances of a class. The WHERE clause specifies that the ?class variable must be an instance of the owl:Class. By using the PREFIX directive, the query associates the pbn namespace with the base ontology URI, enabling the use of the ontology's classes.

The query does not include specific class names as it aims to retrieve a general count of all classes in the ontology. This way, it can capture any additional classes that might be added to the ontology in the future.

--------
### Code


```sparql
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT (COUNT(?class) AS ?classCount)
WHERE {
  ?class a owl:Class
}
```

--------
### Request result

[[0]]

In [11]:
A = ask(specifics+"How do I count how many Technology belong to each TechGroup, and sort in ascending order (largest TechGroup first), keeping the top 7?",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="3") # gpt-4-0613
A

### Explanation

This SPARQL query accomplishes the task of counting how many technologies belong to each TechGroup and sorting them in ascending order (largest TechGroup first) while keeping the top 7. Here's a breakdown of the query:

1. The SPARQL query starts with a PREFIX declaration to define the namespace prefix for the ontology: `PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>`. This prefix will be used to simplify the notation of the ontology classes and properties in the query.

2. The SELECT clause includes the variables `?techGroup` and `(COUNT(?technology) AS ?techCount)`. This part of the query specifies the variables to be selected in the results. `?techGroup` will represent the label of the TechGroup, and `?techCount` will represent the count of technologies associated with each TechGroup.

3. The WHERE clause includes the triple patterns to match the relationships between technologies and TechGroups. It specifies that a `?technology` entity is of type `pbn:Technology` and has the property `pbn:has_TechGroup` with the object `?techGroupObj`. Additionally, it retrieves the label of the `?techGroupObj` using the `rdfs:label` property and assigns it to the variable `?techGroup`.

4. The GROUP BY clause groups the results based on the variable `?techGroup`, which represents the label of the TechGroup.

5. The ORDER BY clause sorts the results in descending order of `?techCount`, which represents the count of technologies for each TechGroup.

6. The LIMIT 7 clause ensures that only the top 7 results are returned, representing the largest TechGroups based on the count of technologies.

Overall, this SPARQL query effectively counts the number of technologies belonging to each TechGroup and provides the results in the desired order and limit.

--------
### Code


```sparql
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>
SELECT ?techGroup (COUNT(?technology) AS ?techCount)
WHERE {
  ?technology a pbn:Technology ;
             pbn:has_TechGroup ?techGroupObj .
  ?techGroupObj a pbn:TechGroup ;
               rdfs:label ?techGroup .
} 
GROUP BY ?techGroup
ORDER BY DESC(?techCount)
LIMIT 7
```

--------
### Request result

[]

In [12]:
EX("""PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>
SELECT ?TechGroup (COUNT(?technology) AS ?techCount)
WHERE {
  ?technology a pbn:Technology ;
             pbn:has_TechGroup ?TechGroupObj .
  ?TechGroupObj a pbn:TechGroup ;
               rdfs:label ?TechGroup .
} 
GROUP BY ?TechGroup
ORDER BY DESC(?techCount)
LIMIT 7""")

[]

In [13]:
A = ask(specifics+"How do I identify the TechGroup, and its label, that has the most Technology items linked to a Risk?",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="6")
A

### Explanation

In this SPARQL request, we first define the prefix `pbn` to represent the ontology namespace. We then use a SELECT query to retrieve the `?techGroup` and count the number of `?technology` items linked to a `?risk` through the properties `has_RiskTechnology`. 

We also include the `has_RiskMitigation` property to ensure that we are only considering risks that have mitigations. We then group the results by `?techGroup` and order the count of technologies in descending order. Finally, we limit the result to only 1 row, which will give us the tech group with the most linked technologies to a risk.

--------
### Code


```sparql
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>
SELECT ?techGroup (COUNT(?technology) as ?numTechnologies)
WHERE {
  ?risk a pbn:Risk ;
        pbn:has_RiskTechnology ?technology ;
        pbn:has_RiskMitigation ?mitigation .
  ?technology pbn:has_TechGroup ?techGroup .
}
GROUP BY ?techGroup
ORDER BY DESC(?numTechnologies)
LIMIT 1
```

--------
### Request result

[]

In [14]:
A = ask(specifics+"How do I count the top 5 classes that have the children with most instances?",overwrite=False,MODEL="gpt-3.5-turbo-1106",seed="")
A

### Explanation

In the given sparql query, we use the provided ontology prefix to specify the namespace of the classes and properties. The query starts by selecting the variables ?class and ?child. We then match all instances ?child of each class ?class using the triple pattern ?child a ?class. The COUNT function is used to count the number of instances for each class, and the results are grouped by ?class. The GROUP BY clause ensures that the count is performed for each unique class. We then order the results by the count in descending order using ORDER BY DESC(?count). Finally, we limit the results to the top 5 classes using the LIMIT 5 clause.

--------
### Code


```sparql
PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT ?class (COUNT(?child) AS ?count)
WHERE {
  ?child a ?class .
} 
GROUP BY ?class
ORDER BY DESC(?count)
LIMIT 5
```

--------
### Request result

[[80, 1]]

In [15]:
import owlready2.sparql.parser
owlready2.sparql.parser._DATA_PROPS = set()

In [16]:
A = EX("""PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT ?riskGroup ?techGroup ?techSubgroup (COUNT(?technology) AS ?techCount)
WHERE {

  ?riskGroup a pbn:RiskGroup .
  ?techSubgroup a pbn:TechSubgroup .
  ?technology a pbn:Technology . 
  ?riskGroup a pbn:RiskGroup . 
  ?techGroup a pbn:TechGroup .

  ?risk pbn:has_RiskGroup ?riskGroup .
  ?risk pbn:has_RiskTechnology ?technology .
  ?technology pbn:has_TechSubgroup ?techSubgroup . 
  ?technology pbn:has_TechGroup ?techGroup . 
       
  FILTER (?riskGroup = pbn:PBN__RiskGroup_4) 
}
GROUP BY ?techSubgroup
ORDER BY DESC(?techCount)
LIMIT 5""")
A

[]

In [17]:
A = EX("""PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT ?riskGroup ?techGroup ?techSubgroup (COUNT(?technology) AS ?techCount)
WHERE {

  ?riskGroup a pbn:RiskGroup .
  ?techSubgroup a pbn:TechSubgroup .
  ?technology a pbn:Technology . 
  ?riskGroup a pbn:RiskGroup . 
  ?techGroup a pbn:TechGroup .

  ?risk pbn:has_RiskGroup ?riskGroup .
  ?risk pbn:has_RiskTechnology ?technology .
  ?technology pbn:has_TechSubgroup ?techSubgroup . 
  ?technology pbn:has_TechGroup ?techGroup . 
       
  FILTER (?riskGroup = pbn:PBN__RiskGroup_4) 
}
GROUP BY ?techGroup
ORDER BY DESC(?techCount)
LIMIT 5""")
A

[]

In [18]:
A = EX("""PREFIX pbn: <https://github.com/mm80843/T3.5/raw/main/pbn_t3_5.owl#>

SELECT ?riskGroup ?techGroup (COUNT(?technology) AS ?techCount)
WHERE {

  ?riskGroup a pbn:RiskGroup .
  ?techGroup a pbn:TechGroup .
  ?technology a pbn:Technology . 
  ?riskGroup a pbn:RiskGroup . 

  ?risk pbn:has_RiskGroup ?riskGroup .
  ?risk pbn:has_RiskTechnology ?technology .
  ?technology pbn:has_TechGroup ?techGroup . 

  FILTER (?riskGroup = pbn:PBN__RiskGroup_4) 
}
GROUP BY ?techGroup
ORDER BY DESC(?techCount)
LIMIT 3""")
A

[]