In [1]:
import pandas as pd
import os, glob

from owlready2 import *
import owlready2
print(owlready2.VERSION)


import importlib.util
import sys
spec = importlib.util.spec_from_file_location("rdfutils", "../../../utils/rdfutils.py")
u = importlib.util.module_from_spec(spec)
sys.modules["rdfutils"] = u
spec.loader.exec_module(u)

from datetime import datetime

def NOW():
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    return "Current Time = "+ str(current_time)

%load_ext autoreload
%autoreload 2


0.40




In [2]:
onto = get_ontology("WIP_377.owl").load()
dIDct = u.createDict(onto) 

In [3]:
u.cOnto(onto)


# Overview

*  _PBNThing_  --  86872 instances.
*  _BenefReturn_  --  1606 instances.
*  _Benef_  --  1409 instances.
*  _Article_  --  377 instances.
*  _Risk_  --  20899 instances.
*  _ISO_Scale_  --  5 instances.
*  _RiskHealth_  --  14 instances.
*  _RiskType_  --  22 instances.
*  _Stakeholder_  --  16002 instances.
*  _Stakeholder_Type_  --  3 instances.
*  _Technology_  --  23093 instances.
*  _ISO_Impact_  --  24 instances.
*  _ISO_Purpose_  --  18 instances.
*  _Mitigation_  --  22885 instances.
*  _BP_Enabler_  --  33 instances.
*  _BP_Transmission_  --  12 instances.
*  _Blueprint_  --  74 instances.
*  _BP_Scale_  --  3 instances.
*  _BP_Phase_  --  4 instances.
*  _BP_Permanent_  --  4 instances.
*  _BP_Type_  --  2 instances.
*  _BP_Intervention_  --  40 instances.
*  _PBNCategory_  --  330 instances.
*  _BP_Theme_  --  10 instances.
*  _BP_Category_  --  3 instances.
*  _aBlueprint_  --  50 instances.


# Adding the classification to the RDF

In [4]:
with onto:
    class has_for_synonym(onto.PBNThing >> str):
        label = ["synonyms for this element"]
        pass

    class StakeholderGroup(onto.PBNThing):
        pass
    class StakeholderSubgroup(onto.PBNThing):
        pass
    class has_GroupStakeholder( StakeholderGroup >> onto.Stakeholder):
        label = ["Stakeholders in this stakeholder group"]
        pass
    class has_SubgroupStakeholder( StakeholderSubgroup >> onto.Stakeholder):
        label = ["Stakeholders in this stakeholder subgroup"]
        pass  
    class has_StakeholderGroup( onto.Stakeholder >> StakeholderGroup):
        inverse_property = has_GroupStakeholder
        label = ["This Stakeholder belongs to this Stakeholder Group"]
        pass
    class has_StakeholderSubgroup( onto.Stakeholder >> StakeholderSubgroup):
        inverse_property = has_SubgroupStakeholder
        label = ["This Stakeholder belongs to this Stakeholder Subgroup"]
        pass    

    class TechGroup(onto.PBNThing):
        pass
    class TechSubgroup(onto.PBNThing):
        pass
    class has_GroupTech(TechGroup >> onto.Technology):
        label = ["Technology in the TechnologyGroup"]
        pass
    class has_SubgroupTech(TechSubgroup >> onto.Technology):
        label = ["This Technology belongs to this TechnologySubgroup"]
        pass 
    class has_TechGroup(onto.Technology >> TechGroup):
        inverse_property = has_GroupTech
        label = ["This Technology belongs to this TechGroup"]
        pass
    class has_TechSubgroup(onto.Technology >> TechSubgroup):
        inverse_property = has_SubgroupTech
        label = ["This Technology belongs to this TechSubgroup"]
        pass 


    class RiskGroup(onto.PBNThing):
        pass
    class RiskSubgroup(onto.PBNThing):
        pass
    class has_GroupRisk(RiskGroup >> onto.Risk):
        label = ["Risk in the RiskGroup"]
        pass
    class has_SubgroupRisk(RiskSubgroup >> onto.Risk):
        label = ["Risks in this RiskSubgroup"]
        pass 
    class has_RiskGroup(onto.Risk >> RiskGroup):
        inverse_property = has_GroupRisk
        label = ["This Risk belongs to this RiskGroup"]
        pass
    class has_RiskSubgroup(onto.Risk >> RiskSubgroup):
        inverse_property = has_SubgroupRisk
        label = ["This Risk belongs to this RiskSubgroup"]
        pass 

    class has_MitigIntervention(onto.Mitigation >> onto.BP_Intervention):
        label = ["Main intervention linked to the Mitigation"]
        pass
    class has_InterventionMitig(onto.BP_Intervention >> onto.Mitigation):
        inverse_property = has_MitigIntervention
        label = ["Mitigations linked to this intervention"]
        pass


## Adding technology classification

In [5]:
df = pd.read_parquet("../../support/newtechs_classified_lv1.parquet.gzip")
df = df.dropna(subset=["name","category","subcategory"])
df.head(3)

Unnamed: 0,name,category,confidence,co```nfidence,&confidence,subcategory,confidence_lv2
0,-80°c freezers,Healthcare,High,,,Vaccination Development and Distribution,High
1,14-day quarantine,Healthcare,High,,,Social Distancing Technologies,High
2,24-hour mechanical ventilation systems,Healthcare,Medium-high,,,Physical Health Monitoring,High


In [6]:
for ix, row in df.iterrows():
    newTech =  u.addItem("Technology", row["name"], dIDct, onto )
    if row["category"]:
        newGroup =  u.addItem("TechGroup", row["category"], dIDct, onto )
        newTech.has_TechGroup.append(newGroup)
        newTech.has_TechGroup = list(set(newTech.has_TechGroup))
    if row["subcategory"]:    
        newGroup =  u.addItem("TechSubgroup", row["subcategory"], dIDct, onto )
        newTech.has_TechSubgroup.append(newGroup)
        newTech.has_TechSubgroup = list(set(newTech.has_TechSubgroup))
#4mins with the max() method
# 23s with the len() method to add elements

## Adding Risks

In [7]:
df = pd.read_parquet("../../support/newrisks_classified_lv1.parquet.gzip")
df = df.dropna(subset=["name","category","subcategory"])
df.head()
for ix, row in df.iterrows():
    newRisk =  u.addItem("Risk", row["name"], dIDct, onto )
    if row["category"]:
        newGroup =  u.addItem("RiskGroup", row["category"], dIDct, onto )
        newRisk.has_RiskGroup.append(newGroup)
        newRisk.has_RiskGroup = list(set(newRisk.has_RiskGroup))
    if row["subcategory"]:    
        newGroup =  u.addItem("RiskSubgroup", row["subcategory"], dIDct, onto )
        newRisk.has_RiskSubgroup.append(newGroup)
        newRisk.has_RiskSubgroup = list(set(newRisk.has_RiskSubgroup))


In [8]:
for C in list(onto.Risk.instances())[:3]:
    print(C.has_RiskGroup, C.has_RiskSubgroup)

[WIP_377.PBN__RiskGroup_2] [WIP_377.PBN__RiskSubgroup_58]
[WIP_377.PBN__RiskGroup_2] [WIP_377.PBN__RiskSubgroup_58]
[WIP_377.PBN__RiskGroup_2] [WIP_377.PBN__RiskSubgroup_58]


## Adding stakeholders

In [9]:
import pandas as pd
df = pd.read_parquet("../../support/newstakeholders_classified_lv1.parquet.gzip")
df = df.dropna(subset=["name","category","subcategory"])
print(len(df))
df.head(3)

15644


Unnamed: 0,name,category,confidence,subcategory,confidence_lv2
0,0-4 year olds,General public,Medium,General population,Medium
1,2.2 billion people without safe drinking water,Public Entities,High,Water utilities,Medium
2,25 participants specific to their sector,Workers,Medium-high,Project managers,High


In [10]:
df

Unnamed: 0,name,category,confidence,subcategory,confidence_lv2
0,0-4 year olds,General public,Medium,General population,Medium
1,2.2 billion people without safe drinking water,Public Entities,High,Water utilities,Medium
2,25 participants specific to their sector,Workers,Medium-high,Project managers,High
3,3d printing companies,Manufacturing and production sector,Medium-high,Non-medical cechnology manufacturers,High
4,4.2 billion people without sanitation systems,Public Entities,High,Water utilities,Medium-high
...,...,...,...,...,...
16065,Younger potential participants,General public,Medium,General population,Medium
16066,Youth,General public,High,Fragile and/or isolated population,High
16067,Youth sports organizations,General public,Medium,Recreational and fitness enthusiasts,High
16068,Youths,General public,High,General population,High


In [11]:
for C in list(onto.Stakeholder.instances()):
    C.has_StakeholderSubgroup = [x for x in C.has_StakeholderSubgroup if x.is_a[0] == onto.StakeholderSubgroup]
    C.has_StakeholderGroup = [x for x in C.has_StakeholderGroup if x.is_a[0] == onto.StakeholderGroup]

C.has_StakeholderSubgroup

[]

In [12]:
df = pd.read_parquet("../../support/newstakeholders_classified_lv1.parquet.gzip")
df = df.dropna(subset=["name","category","subcategory"])
for ix, row in df.iterrows():
    newSH =  u.addItem("Stakeholder", row["name"], dIDct, onto )
    if row["category"]:
        newGroup =  u.addItem("StakeholderGroup", row["category"], dIDct, onto )
        newSH.has_StakeholderGroup.append(newGroup)
        newSH.has_StakeholderGroup = list(set(newSH.has_StakeholderGroup))
    if row["subcategory"]:    
        newGroup =  u.addItem("StakeholderSubgroup", row["subcategory"], dIDct, onto )
        newSH.has_StakeholderSubgroup.append(newGroup)
        newSH.has_StakeholderSubgroup = list(set(newSH.has_StakeholderSubgroup))

In [13]:
for C in list(onto.Stakeholder.instances())[:3]:
    print(C.has_StakeholderGroup,C.has_StakeholderSubgroup)

[WIP_377.PBN__StakeholderGroup_12] [WIP_377.PBN__StakeholderSubgroup_21]
[WIP_377.PBN__StakeholderGroup_6] [WIP_377.PBN__StakeholderSubgroup_48]
[WIP_377.PBN__StakeholderGroup_13] [WIP_377.PBN__StakeholderSubgroup_42]


## Adding mitigations

In [14]:
import pandas as pd

In [15]:
df

Unnamed: 0,name,category,confidence,subcategory,confidence_lv2
0,0-4 year olds,General public,Medium,General population,Medium
1,2.2 billion people without safe drinking water,Public Entities,High,Water utilities,Medium
2,25 participants specific to their sector,Workers,Medium-high,Project managers,High
3,3d printing companies,Manufacturing and production sector,Medium-high,Non-medical cechnology manufacturers,High
4,4.2 billion people without sanitation systems,Public Entities,High,Water utilities,Medium-high
...,...,...,...,...,...
16065,Younger potential participants,General public,Medium,General population,Medium
16066,Youth,General public,High,Fragile and/or isolated population,High
16067,Youth sports organizations,General public,Medium,Recreational and fitness enthusiasts,High
16068,Youths,General public,High,General population,High


In [16]:
df = pd.read_parquet("../../support/newmitigation_classified_lv1.parquet.gzip")
df = df.dropna(subset=["name","category"])
for ix, row in df.iterrows():
    mitig =  u.addItem("Mitigation", row["name"], dIDct, onto )
    interv = u.addItem("BP_Intervention", row["category"], dIDct, onto )
    interv.has_InterventionMitig.append(mitig)
    interv.has_InterventionMitig = list(set(interv.has_InterventionMitig))

# Adding CAO

In [17]:
with onto:
    class CAO_Group(onto.PBNThing):
        pass
    class CAO_Subgroup(onto.PBNThing):
        pass
    class has_categoryCAORisk(CAO_Group >> onto.Risk):
        label = ["Risks in the this CAO category"]
        pass
    class has_SubcategoryCAORisk(CAO_Subgroup >> onto.Risk):
        label = ["Risks in the this CAO subcategory"]
        pass 
    class has_RiskCAOGroup(onto.Risk >> CAO_Group):
        inverse_property = has_categoryCAORisk
        label = ["This Risk belongs to this CAO group"]
        pass
    class has_RiskCAOSubgroup(onto.Risk >> CAO_Subgroup):
        inverse_property = has_SubcategoryCAORisk
        label = ["This Risk belongs to this CAO subgroup"]
        pass 

In [18]:
import pandas as pd
df = pd.read_parquet("../../support/newCAO_classified_lv1.parquet.gzip")
df = df.dropna(subset=["name","category","subcategory"])
df

Unnamed: 0,name,category,confidence,subcategory,confidence_lv2,confidence.1
0,Spread of sars-cov-2 virus,Structure,Medium-high,Settlement,Medium,
1,Rapid spread of sars-cov-2 virus,Structure,Medium-high,Settlement,Medium,
2,Spread of sars-cov-2 infection,Structure,Medium-high,Settlement,Medium,
3,Spread of sars-cov-2,Structure,Medium-high,Settlement,Medium,
4,Spread of sars-cov-2 in hospitals,Structure,Medium-high,Settlement,Medium,
...,...,...,...,...,...,...
21102,Healthcare facility comfort improvements in bacs,Structure,Medium-high,Neighbourhoods/Districts,Medium,
21103,Large-scale deployment of wsn without proper s...,Interactions,High,Security,High,
21104,Extended survival of bioaerosols due to pm5 an...,Structure,Medium,Air,Medium,
21105,Reducción del contacto social y la interacción...,Interactions,Medium-high,Health,High,


In [19]:
df = pd.read_parquet("../../support/newCAO_classified_lv1.parquet.gzip")
df = df.dropna(subset=["name","category","subcategory"])
for ix, row in df.iterrows():
    newRisk =  u.addItem("Risk", row["name"], dIDct, onto )
    if row["category"]:
        newCAO =  u.addItem("CAO_Group", row["category"], dIDct, onto )
        newRisk.has_RiskCAOGroup.append(newCAO)
        newRisk.has_RiskCAOGroup = list(set(newRisk.has_RiskCAOGroup))
    if row["subcategory"]:    
        newCAO =  u.addItem("CAO_Subgroup", row["subcategory"], dIDct, onto )
        newRisk.has_RiskCAOSubgroup.append(newCAO)
        newRisk.has_RiskCAOSubgroup = list(set(newRisk.has_RiskCAOSubgroup))


In [20]:
for C in list(onto.Risk.instances())[:3]:
    print(C.has_RiskCAOGroup,C.has_RiskCAOSubgroup)

[WIP_377.PBN__CAO_Group_0] [WIP_377.PBN__CAO_Subgroup_0]
[WIP_377.PBN__CAO_Group_0] [WIP_377.PBN__CAO_Subgroup_0]
[WIP_377.PBN__CAO_Group_0] [WIP_377.PBN__CAO_Subgroup_0]


In [21]:
df[df.name.str.contains("Use of wastewater")]

Unnamed: 0,name,category,confidence,subcategory,confidence_lv2,confidence.1
3550,Use of wastewater samples for early detection ...,Interactions,Medium-high,Health,Medium,
13311,Use of wastewater to detect pathogenic organis...,Interactions,High,Health,High,
20386,Use of wastewater ’analysis’ for monitoring co...,Interactions,Medium-high,Health,Medium,


In [22]:
[str(x) for x in onto.classes() if "Subgroup" in str(x)]

['WIP_377.StakeholderSubgroup',
 'WIP_377.TechSubgroup',
 'WIP_377.RiskSubgroup',
 'WIP_377.CAO_Subgroup']

In [23]:
u.cOnto(onto)


# Overview

*  _PBNThing_  --  87838 instances.
*  _BenefReturn_  --  1606 instances.
*  _Benef_  --  1409 instances.
*  _Article_  --  377 instances.
*  _Risk_  --  21145 instances.
*  _ISO_Scale_  --  5 instances.
*  _RiskHealth_  --  14 instances.
*  _RiskType_  --  22 instances.
*  _Stakeholder_  --  16125 instances.
*  _Stakeholder_Type_  --  3 instances.
*  _Technology_  --  23140 instances.
*  _ISO_Impact_  --  24 instances.
*  _ISO_Purpose_  --  18 instances.
*  _Mitigation_  --  22950 instances.
*  _BP_Enabler_  --  33 instances.
*  _BP_Transmission_  --  12 instances.
*  _Blueprint_  --  74 instances.
*  _BP_Scale_  --  3 instances.
*  _BP_Phase_  --  4 instances.
*  _BP_Permanent_  --  4 instances.
*  _BP_Type_  --  2 instances.
*  _BP_Intervention_  --  46 instances.
*  _PBNCategory_  --  330 instances.
*  _BP_Theme_  --  10 instances.
*  _BP_Category_  --  3 instances.
*  _aBlueprint_  --  50 instances.
*  _StakeholderGroup_  --  18 instances.
*  _StakeholderSubgroup_  --

In [24]:
onto.save("WIP_Step1.owl")

In [25]:
print("DONE")

DONE
