In [1]:
import xml.etree.ElementTree as ET
import pandas as pd

In [2]:
def extract_metric(tree_element):
    metric_family = tree_element.tag
    #print(metric_family)
    for child in tree_element:
        #print("---- ", child)
        metric_document = {}
        metric_document["metric_family"] =  metric_family
        metric_document["metric_name"] = child.tag
        metric_document["value"] = child.text 
        #print(metric_document)
        yield metric_document

In [3]:
def load_metrics(filename: str, ontology_name: str) -> pd.DataFrame:
    tree = ET.parse(filename)
    root = tree.getroot()
    metrics = []
    for child in root[0]:
        if child.tag == "Classmetrics": ## we don't handle class metrics
            continue
        for metric_document in extract_metric(child):
            metrics.append(metric_document)
    df = pd.DataFrame(metrics).rename(columns={"metric_name": "metric_code", "value": ontology_name})
    return df


In [4]:
def add_metrics(df_metrics: pd.DataFrame, df_new_metrics: pd.DataFrame) -> pd.DataFrame:
    selected_columns = list(set(df_new_metrics.columns) -  set(['metric_family']))
    df_new_table = df_metrics.merge(df_new_metrics[selected_columns],how='left', on= "metric_code")
    return df_new_table

## To compute metrics use webservice:

http://opi.informatik.uni-rostock.de/api?url<your_ontology_url>

In our case: http://opi.informatik.uni-rostock.de/api?url=http://schema.linkalab-cloud.com/tao.ttl 

Or use web interface:
https://ontometrics.informatik.uni-rostock.de/ontologymetrics/index.jsp



In [6]:
df_tao_solo = load_metrics("tao_metrics.xml","tao")
df_hontology = load_metrics("hontology_metrics.xml","hontology")
df_acco = load_metrics("accommodation_ontology_metrics.xml","acco")

In [8]:
df_metrics_table = pd.read_excel("metrics_labels.xlsx")
df_metrics_table = df_metrics_table[["metric_name","metric_code","description","evaluation_criteria"]]

In [9]:
df_metrics_table = add_metrics(df_metrics_table,df_tao_solo)
df_metrics_table = add_metrics(df_metrics_table,df_hontology)
df_metrics_table = add_metrics(df_metrics_table,df_acco)
df_metrics_table.drop(["metric_code"], axis=1, inplace=True)

In [10]:
df_metrics_table

Unnamed: 0,metric_name,description,evaluation_criteria,tao,hontology,acco
0,# axioms,The total number of axioms defined for classes...,,3853,1453,344
1,# logical axioms,Number of axioms which affect the logical mean...,,1222,448,111
2,# classes,The total number of classes defined in the ont...,,590,284,31
3,# object properties,The total number of object properties defined ...,,16,8,21
4,# datatype properties,The total number of datatype properties define...,,3,31,14
5,# annotation assertions,The total number of annotations in the ontology,,1982,682,161
6,DL expressivity,The description logics expressivity of the ont...,,SROIQ(D),ALCHQ(D),ALUH(D)
7,Inheritance Richness,Inheritance Richness (IR) measure describes th...,low => vertical&#10;high => orizontal&#10;,1.172881,0.961268,0.741935
8,Relationship Richness,This metric reflects the diversity of the type...,low => less information&#10;High => more infor...,0.411565,0.320896,0.477273
9,Axiom Class Ratio,This metric describes the ratio between axioms...,low (near 0) => poor axiomatisation&#10;Higher...,6.530508,5.116197,11.096774


In [11]:
df_metrics_table[0:7]

Unnamed: 0,metric_name,description,evaluation_criteria,tao,hontology,acco
0,# axioms,The total number of axioms defined for classes...,,3853,1453,344
1,# logical axioms,Number of axioms which affect the logical mean...,,1222,448,111
2,# classes,The total number of classes defined in the ont...,,590,284,31
3,# object properties,The total number of object properties defined ...,,16,8,21
4,# datatype properties,The total number of datatype properties define...,,3,31,14
5,# annotation assertions,The total number of annotations in the ontology,,1982,682,161
6,DL expressivity,The description logics expressivity of the ont...,,SROIQ(D),ALCHQ(D),ALUH(D)


In [14]:
df_metrics_table.to_excel("validation_metrics_table.xls", index=False)

  df_metrics_table.to_excel("validation_metrics_table.xls", index=False)


In [15]:
from functools import partial
def use_f_2(x, num_decimals):
    try:
        n = int(str(x))
        return n
    except ValueError:
        try:
            n = float(str(x))
            return f"%.{num_decimals}f" % float(x)
        except Exception as e:
            return x

# the number of columns can be passed to this function
use_f = lambda x: partial(use_f_2, num_decimals=x)

In [16]:
df_base_metrics = df_metrics_table[0:7][['metric_name','tao','hontology','acco']]
df_base_metrics.rename(columns={'metric_name':'metric name'}, inplace=True)

In [17]:
caption = "Base metrics."
label="tab:base-metrics"
with pd.option_context("max_colwidth", 1000, "display.precision", 3):
    df_base_metrics.to_latex("base_metrics.tex",  multicolumn=True, header=True, index_names=False,
              index=False, column_format='p{3.5cm}|p{1.2cm}p{1.2cm}p{1.2cm}', caption=caption, label=label)

  df_base_metrics.to_latex("base_metrics.tex",  multicolumn=True, header=True, index_names=False,


In [18]:
df_schema_and_graph_metrics = df_metrics_table[7:][['metric_name','evaluation_criteria','tao','hontology','acco']]
df_schema_and_graph_metrics.rename(columns={'metric_name':'metric name', 'evaluation_criteria': 'evaluation criteria'}, inplace=True)

In [19]:
#### Add number of external class metric
### Number of external classes is evalutated using Protegé 
description = """The interpretation of NoC values depends on the number of classes in the ontology. For
example, if NoC is near the total number of internal classes a large fraction of the ontology depends on concepts defined in other places.
Thus the change in the external ontologies can influence the intended semantics to a great extent.
We report (i) the absolute NoC values and (ii) the ratios between NoC and the # of classes among parenthesis"""
new_row = {'metric name':'NoC', 'evaluation criteria':description,'tao':'19','hontology':'0','acco':'2'}
new_row_s = pd.DataFrame(new_row, index=[0])
df_schema_and_graph_metrics = pd.concat([df_schema_and_graph_metrics.loc[7:12],new_row_s,df_schema_and_graph_metrics.loc[13:17]]).reset_index(drop=True)

In [20]:
### update NoR and NoL metrics with relative values inside parenthesis
num_classes = [ int(v) for v in df_base_metrics.iloc[2:3, 1:4].values.flatten().tolist()]
nor = [ int(v) for v in df_schema_and_graph_metrics.iloc[4:5,2:5].values.flatten().tolist()]
nol = [ int(v) for v in df_schema_and_graph_metrics.iloc[5:6,2:5].values.flatten().tolist()]
noc = [ int(v) for v in df_schema_and_graph_metrics.iloc[6:7,2:5].values.flatten().tolist()]
for i,v in enumerate(num_classes):
    new_nor = "%s (%1.2f)" % (nor[i],int(nor[i])/int(num_classes[i]))
    df_schema_and_graph_metrics.iloc[4:5,2+i:3+i] = new_nor
    new_nol = "%s (%1.2f)" % (nol[i],int(nol[i])/int(num_classes[i]))
    df_schema_and_graph_metrics.iloc[5:6,2+i:3+i] = new_nol
    new_noc = "%s (%1.2f)" % (noc[i],int(noc[i])/int(num_classes[i]))
    df_schema_and_graph_metrics.iloc[6:7,2+i:3+i] = new_noc

In [21]:
df_schema_and_graph_metrics

Unnamed: 0,metric name,evaluation criteria,tao,hontology,acco
0,Inheritance Richness,low => vertical&#10;high => orizontal&#10;,1.172881,0.961268,0.741935
1,Relationship Richness,low => less information&#10;High => more infor...,0.411565,0.320896,0.477273
2,Axiom Class Ratio,low (near 0) => poor axiomatisation&#10;Higher...,6.530508,5.116197,11.096774
3,Class/propery ratio,Low values (i.e near 0) indicate an ontology w...,0.501701,0.706468,0.704545
4,NoR,The interpretation of NoR values depends on th...,15 (0.03),17 (0.06),13 (0.42)
5,NoL,The interpretation of NoL values depends on th...,496 (0.84),247 (0.87),23 (0.74)
6,NoC,The interpretation of NoC values depends on th...,19 (0.03),0 (0.00),2 (0.06)
7,ADIT-LN,The interpretation of the values depends on th...,3.913007,2.725424,2.439394
8,Max depth,The interpretation of max depth is similar to ...,6,5,3
9,Average breadth,The value should be interpreted relatively to ...,6.614525,7.375,5.076923


In [22]:
new_row = {'metric name':'NoC', 'evaluation criteria':'Description','tao':'19','hontology':'0','acco':'2'}
new_row_s = pd.DataFrame(new_row, index=[0])
pd.concat([df_schema_and_graph_metrics.loc[7:12],new_row_s,df_schema_and_graph_metrics.loc[13:17]]).reset_index(drop=True)

Unnamed: 0,metric name,evaluation criteria,tao,hontology,acco
0,ADIT-LN,The interpretation of the values depends on th...,3.913007,2.725424,2.439394
1,Max depth,The interpretation of max depth is similar to ...,6.0,5.0,3.0
2,Average breadth,The value should be interpreted relatively to ...,6.614525,7.375,5.076923
3,Max breadth,The value should be interpreted relatively to ...,54.0,29.0,13.0
4,Tangledness,Values for tangledness range from 0 (i.e. no t...,0.176271,0.017606,0.096774
5,NoC,Description,19.0,0.0,2.0


In [23]:
df_schema_and_graph_metrics_no_description = df_schema_and_graph_metrics[["metric name","tao","hontology","acco"]]

In [24]:
caption = "Topology metrics."
label="tab:topology-metrics"
with pd.option_context("max_colwidth", 1000):
    df_schema_and_graph_metrics_no_description.to_latex("schema_and_graph_metrics.tex",  
        multicolumn=True, header=True, index_names=False,
        index=False, column_format='p{2.5cm}|p{1cm}p{1cm}p{1cm}', 
        caption=caption, label=label, 
        formatters=[None, use_f(3), use_f(3), use_f(3)]
        )

  df_schema_and_graph_metrics_no_description.to_latex("schema_and_graph_metrics.tex",


In [25]:
df_metrics_table.to_latex("validation_metrics_table.tex", index=False)

  df_metrics_table.to_latex("validation_metrics_table.tex", index=False)
