In [3]:
from owlready2 import *
import rdflib
import os

import numpy as np
import random
from copy import copy
import pickle

In [4]:
PATH = "../datasets/anatomy/"

MOUSEFILE = 'mouse.owl'
HUMANFILE = 'human.owl'
REFFILE   = 'reference.rdf'

BASE = 'base_similarity.rdf'

In [5]:
mouse_onto = get_ontology(PATH+MOUSEFILE).load()
human_onto = get_ontology(PATH+HUMANFILE).load()

mc = list(mouse_onto.classes())
hc = list(human_onto.classes())

In [6]:
mouse_entities = {}
human_entities = {}
entities       = {}
entities_names = {}
labels  = {}

# We need to save the object 'owl.Thing' in the entity dictionaries, 
# Find the owl.Thing object (typically in position 0) and save its position within the mouse classes
for i, m in enumerate(mc):
    if m is owl.Thing:
        ot = i
        print("owl.Thing is located at position:", ot)
        break

# insert the owl.Thing object as the first object within the dictionary
mouse_entities[mc[ot]]      = 0
entities[mc[ot]]            = 0
human_entities[mc[ot]]      = 0
entities_names[mc[ot].name] = 0

for c in mc:
    if "MA" in c.name: # filter only the classes, prevent "OboInOwl" types being included in the dictionary
        if c not in entities:
            labels[c.label[0]] = len(entities)
            mouse_entities[c] = len(entities)
            entities[c] = len(entities)
            entities_names[c.name] = len(entities_names)
            

for c in hc:
    if "NCI" in c.name: # filter only the classes, prevent "OboInOwl" types being included in the dictionary
        if c not in entities:
            labels[c.label[0]] = len(entities)
            human_entities[c] = len(entities)
            entities[c] = len(entities)
            entities_names[c.name] = len(entities_names)
            
idx_to_labels = dict((v,k) for k,v in labels.items())
idx_to_entity = dict((v,k) for k,v in entities.items())
idx_to_entity_names = dict((v,k) for k,v in entities_names.items())
idx_to_mouse_entity = dict((v,k) for k,v in mouse_entities.items())
idx_to_human_entity = dict((v,k) for k,v in human_entities.items())

owl.Thing is located at position: 0


In [7]:
def Read_RDF(file):
    g = rdflib.Graph()
    g.load(PATH+file)

    # Create a list of triples that contain:
    # (Subject, Predicate, Object)
    SPOtriple = []

    for sub, pred, obj in g:
        SPOtriple.append((sub, pred, obj))
        
    alignment_subjects = {}
    alignments  = []

    # Make a count of how often subjects occur in this list of triples
    for t in SPOtriple:
        if t[0] in alignment_subjects:
            alignment_subjects[t[0]] += 1
        else:
            alignment_subjects[t[0]] = 1

    # Loop through every alignment subject (one alignment between two nodes)
    for i, key in enumerate(alignment_subjects):
        # Make a list to get the two aligned nodes
        alignment_pair = [] 

        # Search for the two objects that contain the same subject alignment number
        for t in SPOtriple:
            if key in t[0]:
                if "alignmententity" in t[1]:
                    # Save the index of the entity to the alignment pair 
                    get_iri = human_onto.search(iri=t[2])[0]
                    if "MA" in get_iri.name or "NCI" in get_iri.name:
                        alignment_pair.append(entities[get_iri])

        # There are some SPOtriples that do not actually contain an alignment, so we ignore those.
        if alignment_pair:
            # Save 'is_a' relations in both directions, as alignments are bi-directional
            alignments.append(tuple(alignment_pair))
            alignments.append(tuple(alignment_pair[::-1])) # saves the reverse of the tuple
            
    return alignments
    



In [8]:
reference = Read_RDF(REFFILE)

base = Read_RDF(BASE)

In [9]:
s = set(reference)

In [10]:
def Metrics(test_rdf):
    print("The length of reference RDF is:", len(reference))
    reference_set = set(reference)
    
    true_num = 0
    false_num  = 0
    for pair in test_rdf:
        if pair in reference_set:
            true_num += 1
        else:
            false_num += 1
    
    print("This matching algorithm predict "+str(true_num)+" correct alignments and "+str(false_num)+" false alignments")

In [11]:
#Base
Metrics(base)

reference

The length of reference RDF is: 3032
This matching algorithm predict 1908 correct alignments and 6 false alignments


[(5116, 506),
 (506, 5116),
 (2923, 331),
 (331, 2923),
 (139, 3004),
 (3004, 139),
 (3506, 381),
 (381, 3506),
 (4934, 1376),
 (1376, 4934),
 (2118, 3483),
 (3483, 2118),
 (2338, 5899),
 (5899, 2338),
 (460, 2870),
 (2870, 460),
 (5591, 1919),
 (1919, 5591),
 (67, 5424),
 (5424, 67),
 (5870, 1229),
 (1229, 5870),
 (3973, 978),
 (978, 3973),
 (5605, 1640),
 (1640, 5605),
 (1555, 2843),
 (2843, 1555),
 (5429, 1802),
 (1802, 5429),
 (676, 2855),
 (2855, 676),
 (2817, 147),
 (147, 2817),
 (4049, 442),
 (442, 4049),
 (2301, 6004),
 (6004, 2301),
 (4457, 1375),
 (1375, 4457),
 (1198, 5722),
 (5722, 1198),
 (3086, 18),
 (18, 3086),
 (1536, 4443),
 (4443, 1536),
 (190, 3370),
 (3370, 190),
 (3798, 2561),
 (2561, 3798),
 (1082, 4166),
 (4166, 1082),
 (4605, 103),
 (103, 4605),
 (5156, 522),
 (522, 5156),
 (4163, 293),
 (293, 4163),
 (5352, 2676),
 (2676, 5352),
 (1834, 5679),
 (5679, 1834),
 (49, 4417),
 (4417, 49),
 (451, 3786),
 (3786, 451),
 (3353, 377),
 (377, 3353),
 (5665, 1289),
 (1289,

In [14]:
from os import listdir

for file in listdir(PATH):
    if file.split(".")[1] == "rdf" and file.split(".")[0] != "reference":
        print("Now showing performance for "+file.split(".")[0]+" matching:")
        target = Read_RDF(file)
        Metrics(target)
        print("")

Now showing performance for abstract matching:
The length of reference RDF is: 3032
This matching algorithm predict 0 correct alignments and 0 false alignments

Now showing performance for AllZero matching:
The length of reference RDF is: 3032
This matching algorithm predict 0 correct alignments and 0 false alignments

Now showing performance for base_similarity matching:
The length of reference RDF is: 3032
This matching algorithm predict 1908 correct alignments and 6 false alignments

Now showing performance for concept matching:
The length of reference RDF is: 3032
This matching algorithm predict 0 correct alignments and 0 false alignments

Now showing performance for Copy matching:
The length of reference RDF is: 3032
This matching algorithm predict 0 correct alignments and 0 false alignments

Now showing performance for Descendants_similarity_inheritance matching:
The length of reference RDF is: 3032
This matching algorithm predict 268 correct alignments and 0 false alignments

No