In [1]:
import pandas as pd
import os, glob, re

from owlready2 import *
import owlready2
print(owlready2.VERSION)


import importlib.util
import sys
spec = importlib.util.spec_from_file_location("rdfutils", "../../../utils/rdfutils.py")
u = importlib.util.module_from_spec(spec)
sys.modules["rdfutils"] = u
spec.loader.exec_module(u)

spec = importlib.util.spec_from_file_location("llm", "../../../utils/llm.py")
h = importlib.util.module_from_spec(spec)
sys.modules["llm"] = h
spec.loader.exec_module(h)

from datetime import datetime

def NOW():
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    return "Current Time = "+ str(current_time)

%load_ext autoreload
%autoreload 2


0.40




In [2]:
import requests, json

In [3]:
onto = get_ontology("../pbn_t3_5_v0.4.owl").load()
comments = u.checkComments(onto)

ID: 0 	 Description: Creation of a knowledge graph based on a litterature review, augmented by use of LLMs.
ID: 1 	 Version: 0.4
ID: 2 	 Library: owlready2==0.45
ID: 3 	 Changes from: 0.3
ID: 4 	 Next: Check formulas for selecting most repeated items in groups
ID: 5 	 Changes: Adding new risks, groups of items, and new relationships
ID: 6 	 Creation: 22/01/2024
ID: 7 	 TODOs: Adding synonyms and solving classification with synonyms
ID: 8 	 VersionComment: Grouping of items added in 0.4
ID: 9 	 Project: PROBONO
ID: 10 	 Next: Linking benefits to mitigations groups
ID: 11 	 License: CC BY-NC-SA
ID: 12 	 Task: T3.5
ID: 13 	 Repository: https://github.com/mm80843/T3.5/
ID: 14 	 Author: Luc Jonveaux
ID: 15 	 Language: English


# Asking the BOK API.

This is the one in the current report, in the bok/data/main_api.py FastAPI app.

In [4]:
def askDef(term,k=10,temp=0.1,overwrite=False,seed=""):
    URL = "http://localhost:5000/ask/"
    #print(term)
    Q = "You are working on the topic of contagious diseases in smart and sustainable cities and neighbourhoods. In this context, what could be a definition or description of the following term: '"+term+"'?\nWrite two paragraphs and avoid bullet point lists."
    REQ = {
    "question": Q,
    "model": "gpt-3.5-turbo-1106",
    "temp": temp,
    "k": k,
    "overwrite": overwrite,
    "source": "local_risk_def",
    "seed": seed
    }

    H = h.hashme(REQ["question"]+str(REQ["seed"])+str(k)+str(temp))
    #print(H)
    cached = "cache/"+H+".md"
    #print(REQ)
    if not os.path.isfile(cached) or overwrite:
        x = requests.post(URL, json = REQ)
        #print(x.text)
        REFS = sorted(list(set(json.loads(x.text)["refs"])))
        answer = json.loads(x.text)["answer"]+"\n\nSources: "+str(REFS)
        h.svt(cached,answer)
    else:
        answer = h.ldt(cached)
    return answer

# Defining new properties

In [5]:
with onto:
    class has_Description(onto.PBNThing >> str):
        label = ["Short description"]
        pass
    class has_Note(onto.PBNThing >> str):
        label = ["Author note"]
        pass

# Getting the description

In [6]:
r = list(onto.RiskSubgroup.instances())
for rsg in r:
    rsg.has_Description = [askDef(rsg.label[0],k=15,temp=0.1,overwrite=True,seed="1")]
    #print(rsg.has_Description[0])

In [7]:
r = list(onto.TechSubgroup.instances())
for rsg in r:
    rsg.has_Description = [askDef(rsg.label[0],k=15,temp=0.1,overwrite=True,seed="1")] 

In [8]:
r = list(onto.StakeholderSubgroup.instances())
for rsg in r:
    rsg.has_Description = [askDef(rsg.label[0],k=15,temp=0.1,overwrite=True,seed="1")] 

In [21]:
r = list(onto.BP_Intervention.instances())
for rsg in r:
    rsg.has_Description = [askDef(rsg.label[0],k=15,temp=0.1,overwrite=True,seed="1")]

# Removing old articles

In [95]:
onto.PBN__Article_310.label

['Rahim_impact_2022 rahim_impact_2022 nan rahim_impact_2022 nan nan rahim_impact_2022 nan nan nan rahim_impact_2022 nan nan nan nan rahim_impact_2022']

In [98]:
a =  list(onto.Article.instances())
gap_articles = [x for x in a if " nan " in x.label[0]]

In [99]:
for indiv in gap_articles :
    destroy_entity(indiv)

# CAO

In [39]:
DICTS_CAO = {}

DICTS_CAO["Structure"] = ["Settlement","Biodiversity","Air","Soil","Water","Communication Network", "Water Cycle", "Energy Cycle","Mobility Network","Nature","Dwelling","Buildings/Blocks","Neighbourhoods/Districts","City/Metropolis","Public Space"," Land Use"]
DICTS_CAO["Interactions"] = ["Living","Working","Shopping","Transport","Health","Education","Arts","Sports","Security","Wealth production", "Wealth Distribution","Commerce/Trade","Finances", "Competitiveness", "Entrepreneurship","Culture/Diversity","Social Expression","Heritage", "Tools&Apps","Open data", "Data in/out", "Performance (equity, resilience, self-sufficiency)"]
DICTS_CAO["Society"] = ["Person", "Family","Visitors","Organizations","Business","Participation","Capacity Development", "Leadership","Visions&Priorities", "Laws&Regulations","Accountability"]
1
DESC_Step1 = "'Structures' contains Environment (Settlements, Biodiversity, Air, Soil, Water), Infrastructure (communication networks, water, energy, matter cycle, mobility,), and Build domain (Dwelling, housing, land use, public spaces). "
DESC_Step1 += "'Interactions' contains Functions, Economy, Culture and Information, focusing on companies, economic sphere, societal, social, economic and information topics. "
DESC_Step1 += "'Society' contains Citizens and Government (inc Persons, familities, organizations, businesses, leadership, vision, laws and regulation, accountability)."
DESC_Step1 = "Use 'Structures' if the risk is in the physical world, 'Interactions' if it is about 'societal, social, economic and information' aspects and Society if talking about governance. "

def classifFwk_step1(lst):
    return [
        {
        "name": "classify_CAO_framework",
        "description": "Function used to classify items according to the evaluation framework, based on what they  impact the most . ",
        "parameters": {
            "type": "object",
            "properties": {
                "sorted_items": {
                    "type": 'array',
                    "items": {
                        "type": 'object',
                        "description": "A type of item",
                        "properties": {
                            "name" :{
                                "type": 'string', 
                                "description": 'The item being considered.'
                            },
                            "category" :{
                                "type": 'string', 
                                "enum":lst,
                                "description": 'The category of item that matches the item being considered. '
                            },
                            "confidence" :{
                                "type": 'string',  
                                "enum":["High","Medium-high","Medium","Low"],
                                "description": 'Confidence that this is the correct item category.'
                            }                            
                        },
                        "required": ["name",'category',"confidence"],
                    }
                },
            },
            "required": ["sorted_items"],
        },
    }
]

In [40]:
CAO_Cats = list(DICTS_CAO.keys())

In [41]:
from dotenv import load_dotenv
load_dotenv('.env')

URL = os.getenv("KG_URL_FCT")
TOK3N = os.getenv("KG_TOKEN")

NBCAP = 2

classify = [k for k in onto.classes() if "roup" in str(k)]
classify

[pbn_t3_5_v0.4.StakeholderGroup,
 pbn_t3_5_v0.4.StakeholderSubgroup,
 pbn_t3_5_v0.4.TechGroup,
 pbn_t3_5_v0.4.TechSubgroup,
 pbn_t3_5_v0.4.RiskGroup,
 pbn_t3_5_v0.4.RiskSubgroup,
 pbn_t3_5_v0.4.CAO_Group,
 pbn_t3_5_v0.4.CAO_Subgroup]

In [42]:
for klass in classify:
    N = klass.name

    RISKS = [x.label[0] for x in klass.instances() if len(x.label) >= 1]

    RES = []
    NChunks = 50
    #Randomizing list
    if os.path.isfile("newCAO_classified_lv1.parquet.gzip"):
        GOOD = pd.read_parquet("newCAO_classified_lv1.parquet.gzip")
        alreadythere = GOOD["name"].unique()
        GOOD = GOOD[GOOD.name.isin(RISKS)]
        newRisks = [x for x in RISKS if x not in alreadythere]
        CATS_FWK = [GOOD]
    else:
        newRisks = RISKS
        CATS_FWK = []
    if len(newRisks):
        NB = len(newRisks)//NChunks + 1
        for k in range(NB)[:NBCAP]:
            try:
                QUESTION =  "The items are:\n\n* "+"\n* ".join(newRisks[(k*NChunks):(k+1)*NChunks]) 
                prefix ="sh_"
                overwrite = False
                REQ = {
                    "context": DESC_Step1+"\n\nOut the following list, classify the items from a high level perspective.\n\n",
                    "question": QUESTION,
                    "functions": classifFwk_step1(CAO_Cats),
                    "token": TOK3N,
                    "overwrite": overwrite,
                    "source": "local-itemsT3.5",
                    "seed" : ""
                }

                H = h.hashme(REQ["context"]+REQ["question"])
                cached = "cache/"+prefix+"_"+H+".json"
                if not os.path.isfile(cached) or overwrite:
                    x = requests.post(URL+"fct/", json = REQ)
                    answer = json.loads(x.text)["messages"][-1]
                    h.svt(cached,answer)
                else:
                    answer = h.ldt(cached)
                d = pd.DataFrame(json.loads(answer["function_call"]["arguments"])["sorted_items"])
                d = d [d["category"].isin(CAO_Cats)]
                CATS_FWK.append(d)
                DFC = pd.concat(CATS_FWK).reset_index(drop=True)
                DFC.to_parquet("newCAO_classified_lv1.parquet.gzip",compression="gzip")
                print("Done:",k)
            except:
                print("error")
                pass
    else:
        print("All Risks CAO covered")

Done: 0
Done: 0
Done: 1
Done: 0


In [None]:
DFC = pd.read_parquet("newCAO_classified_lv1.parquet.gzip")
DFC.groupby(["category"]).name.count()

In [None]:
DFC = pd.read_parquet("newCAO_classified_lv1.parquet.gzip")
CATs = list(DFC.category.unique())
if "subcategory" not in DFC.columns:
    DFC["subcategory"] = None
if "confidence_lv2" not in DFC.columns:
    DFC["confidence_lv2"] = None
for CAT in CATs:
    #print("Doing",CAT)
    df = DFC[DFC.category == CAT]
    DONE = list(df[~df.subcategory.isna()].name.unique())
    df = df[~df.name.isin(DONE)]
    subtechs = DICTS_CAO[CAT] # classify_technology(CategoriesOfTechs)
    #print(subtechs)
    newRisks = list(df.name)
    RES = []
    NChunks = 50

    if len(newRisks):
        #print(len(newRisks))
        newRisks = [str(x) for x in newRisks]
        NB = len(newRisks)//NChunks +1
        for k in range(NB)[:NBCAP]:
            try:
                QUESTION =  "The items are:\n\n* "+"\n* ".join(newRisks[(k*NChunks):(k+1)*NChunks]) 
                prefix ="CAO_items_"
                overwrite = False
                REQ = {
                    "context": "Out the following list, classify the items from a high level perspective.\n\n",
                    "question": QUESTION,
                    "functions": classifFwk_step1(subtechs),
                    "token": TOK3N,
                    "overwrite": overwrite,
                    "source": "local-evalTechsT3.5",
                    "seed" : ""
                }

                H = h.hashme(REQ["context"]+REQ["question"])
                cached = "cache/"+prefix+"_"+H+".json"
                if not os.path.isfile(cached) or overwrite:
                    x = requests.post(URL+"fct/", json = REQ)
                    answer = json.loads(x.text)["messages"][-1]
                    h.svt(cached,answer)
                else:
                    answer = h.ldt(cached)
                d = pd.DataFrame(json.loads(answer["function_call"]["arguments"])["sorted_items"])
                d = d [d["category"].isin(subtechs)]
                d.columns = ["name","subcategory","confidence_lv2"]
                for ix, row in d.iterrows():
                    DFC.loc[DFC.name == row["name"], "subcategory"] = row["subcategory"]
                    DFC.loc[DFC.name == row["name"], "confidence_lv2"] = row["confidence_lv2"]
                DFC.to_parquet("newCAO_classified_lv1.parquet.gzip",compression="gzip")
                print("Done:",k)
            except:
                print("error")
                pass

# Saving

In [101]:
onto.save("WIP.owl")