# Extracting Ontology classes and annotations(comment) from the ontology file using owlready2 library

In [6]:
pip install owlready2

Note: you may need to restart the kernel to use updated packages.


In [13]:
from owlready2 import get_ontology
import pandas as pd

In [14]:

def read_ontology(path):

    onto = get_ontology(path)

    onto.load()



    # Read classes

    classes = []



    for cl in onto.classes():

        classes.append(cl)



    classes = list(set(classes))



    '''# Read properties

    properties = []



    for prop in onto.properties():

        properties.append(prop)



    properties = list(set(properties))'''



    return classes



def get_mappings(filename):

    mappings = []



    with open(filename) as f:

        soup = BeautifulSoup(f, 'xml')



    cells = soup.find_all('Class')



    for cell in cells:

        entity1 = cell.get_attribute_list



        mappings.append(entity1)



    return mappings




def get_path(cl):

    path = cl.name

    while True:

        try:

            path = path + '/' + cl.is_a[0].name

        except IndexError:

            break

        cl = cl.is_a[0]

        if cl == 'owl.Thing':

          #the very first node

            break



    return '/'.join(path.split('/')[::-1])




def get_classes_df(ont_path):

    data = []



    # Parse ontologies

    classes1 = read_ontology(ont_path)



    # Generate classes

    for class_ in classes1:

      data.append((class_.name.lower(),

                     get_path(class_).lower(),''.join(class_.label).lower(),''.join(class_.comment).lower()))

    dataset = pd.DataFrame(data, columns=['Name','Path','label','comment'])



    return dataset

In [15]:
df=get_classes_df("D:\Thesis\Ontology file\Wind energy\saref4ener.rdf")

In [16]:
df

Unnamed: 0,Name,Path,label,comment
0,minduration,thing/temporalentity/minduration,min duration,"if a slot has a configurable duration, it shal..."
1,energymin,thing/property/energy/energy/energymin,energy min,a possible type of energy in a slot that repre...
2,energyskewness,thing/property/energy/energy/energyskewness,energy skewness,a possible type of energy in a slot that repre...
3,powerskewness,thing/property/power/power/powerskewness,power skewness,a possible type of power in a slot that repres...
4,slot,thing/slot,slot,the single steps of a power sequence are repre...
...,...,...,...,...
60,loadcontroleventstate,thing/state/state/loadcontroleventstate,load control event state,"in the load control, it expresses the possible..."
61,energymax,thing/property/energy/energy/energymax,energy max,a possible type of energy in a slot that repre...
62,pausedurationmin,thing/temporalentity/pausedurationmin,pause duration min,the minimum duration a power sequence can paus...
63,timeperiod,thing/temporalentity/timeperiod,time period,the time period associated with load control e...


Converting saved dataframe to a csv file

In [28]:
# Save the data in a CSV file

df.to_csv("ontology_data.csv", index=False)

In [17]:
import numpy as np
import re
import nltk
from sklearn.datasets import load_files
nltk.download('stopwords')
import pickle
from nltk.corpus import stopwords
import pandas as pd

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [20]:
ontology = pd.read_csv('D:\Thesis\Ontology file\ontology_data.csv')

In [21]:
ontology

Unnamed: 0,Name,Path,label,comment
0,alternativesgroup,thing/alternativesgroup,alternatives group,a collection of power sequences for a certain ...
1,temporalentity,thing/temporalentity,,
2,elapsedslottime,thing/temporalentity/elapsedslottime,elapsed slot time,if state is set to 'running' or 'paused' and t...
3,loadcontroleventstate,thing/state/state/loadcontroleventstate,load control event state,"in the load control, it expresses the possible..."
4,powerexpected,thing/property/power/power/powerexpected,power expected,a possible type of power in a slot that repres...
...,...,...,...,...
60,powerprofile,thing/profile/powerprofile,power profile,a way to model curves of power and energy over...
61,activedurationmax,thing/temporalentity/activedurationmax,active duration max,the active maximum duration a power sequence c...
62,activedurationdescription,thing/durationdescription/activedurationdescri...,active duration description,the duration description for the active durati...
63,defaultduration,thing/temporalentity/defaultduration,default duration,the duration of a slot (shall be present in ca...


In [22]:
# check missing values
ontology.isna().sum()

# drop rows with missing values
ontology = ontology.dropna()

In [23]:
ontology

Unnamed: 0,Name,Path,label,comment
0,alternativesgroup,thing/alternativesgroup,alternatives group,a collection of power sequences for a certain ...
2,elapsedslottime,thing/temporalentity/elapsedslottime,elapsed slot time,if state is set to 'running' or 'paused' and t...
3,loadcontroleventstate,thing/state/state/loadcontroleventstate,load control event state,"in the load control, it expresses the possible..."
4,powerexpected,thing/property/power/power/powerexpected,power expected,a possible type of power in a slot that repres...
5,slottimedurationdescription,thing/durationdescription/slottimedurationdesc...,slot time duration description,the duration description for the slot time dur...
...,...,...,...,...
60,powerprofile,thing/profile/powerprofile,power profile,a way to model curves of power and energy over...
61,activedurationmax,thing/temporalentity/activedurationmax,active duration max,the active maximum duration a power sequence c...
62,activedurationdescription,thing/durationdescription/activedurationdescri...,active duration description,the duration description for the active durati...
63,defaultduration,thing/temporalentity/defaultduration,default duration,the duration of a slot (shall be present in ca...
