# EDOAL-2-CSV Tutorial

In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import re
import os
from utils import Parser

## Conference example

There are two types of parsers, depending on whether the EDOAL namespace in your EDOAL file is "http://ns.inria.org/edoal/1.0/" or "http://ns.inria.org/edoal/1.0/#". Alignments that use the latter EDOAL namespace are usually reference alignments.

In [2]:
p = Parser(False) # For http://ns.inria.org/edoal/1.0/ files
pref = Parser(True) # For http://ns.inria.org/edoal/1.0/# files

Load the EDOAL alignment you want to convert, extract its mappings and then save it as a CSV.

In [3]:
path = os.path.join("data", "cmt-conference-PGARM.edoal")
df_PGARM = p.extract_mappings(path)
df_PGARM.head()

Unnamed: 0,ent1_type,entity1,constructor1,ent2_type,entity2,constructor2,type_of_alignment,measure,relation
0,Class,http://cmt#Paper,-,Class,http://conference#Conference_contribution,-,Simple,1.0,=
1,Class,http://cmt#ProgramCommitteeMember,-,AttributeDomainRestriction,onAttribute: Relation(http://conference#was_a_...,-,Complex,1.0,=
2,Class,http://cmt#PaperAbstract,-,Class,http://conference#Extended_abstract,-,Simple,1.0,=
3,Class,http://cmt#Co-author,-,Class,http://conference#Contribution_co-author,-,Simple,1.0,=
4,Class,http://cmt#ExternalReviewer,-,AttributeOccurenceRestriction,onAttribute: Relation(http://conference#invite...,-,Complex,1.0,=


In [4]:
#Save to csv if you wish
df_PGARM.to_csv(os.path.join("data","cmt-conference-PGARM.csv"), index=False)

### Evaluation

A simple exact match based evaluation is also available in this tool. There are three modes:
- Missing: shows mappings that are contemplated in the reference alignment, but aren't present in the candidate alignment (i.e. false negatives)
- Found: shows mappings that are both in the candidate alignment and reference alignments (i.e. true positives)
- Full: shows all mappings that were contemplated in the candidate and reference alignments

In [5]:
ref_path = os.path.join("data", "cmt-conference-REF.edoal")
df_eval1 = pref.evaluate(df_PGARM, ref_path, "missing")
df_eval2 = pref.evaluate(df_PGARM, ref_path, "found")
df_eval3 = pref.evaluate(df_PGARM, ref_path, "full")

In [6]:
print("Missing", df_eval1.shape[0], "mappings from the reference")
print("Found", df_eval2.shape[0], "mappings from the reference")
print("Total of", df_eval3.shape[0], "mappings")

Missing 26 mappings from the reference
Found 9 mappings from the reference
Total of 56 mappings


## Enslaved example

In [7]:
path = os.path.join("data","enslaved.rdf")
df_enslaved = p.extract_mappings(path)
df_enslaved.head()

Unnamed: 0,ent1_type,entity1,constructor1,ent2_type,entity2,constructor2,type_of_alignment,measure,relation
0,Class,https://enslaved.org/ontology/PlaceTypeCV,-,Class,https://lod.enslaved.org/entity/Q205,-,Simple,1.0,>
1,Class,https://enslaved.org/ontology/Place,-,Class,https://lod.enslaved.org/entity/Q301,-,Simple,1.0,=
2,Class,https://enslaved.org/ontology/EntityWithProven...,-,Class,https://lod.enslaved.org/entity/Q67,-,Simple,1.0,=
3,Class,https://enslaved.org/ontology/Event,-,Class,https://lod.enslaved.org/entity/Q238,-,Simple,1.0,=
4,Class,https://enslaved.org/ontology/AgeRecord,-,Class,http://wikiba.se/ontology#Statement,-,Simple,1.0,<


Entities from the second ontology (entity2) have IDs instead of proper names, so we can use a method to fetch more intelligeble labels from the ontology file.

In [8]:
labels = p.get_labels(os.path.join("data","wikidataKG.owl"))
labels['https://lod.enslaved.org/entity/Q375']

'Person Status'

In [9]:
p.replace_labels(df_enslaved, "entity2", labels).head()

Unnamed: 0,ent1_type,entity1,constructor1,ent2_type,entity2,constructor2,type_of_alignment,measure,relation
0,Class,https://enslaved.org/ontology/PlaceTypeCV,-,Class,Controlled Vocabulary for Place Type,-,Simple,1.0,>
1,Class,https://enslaved.org/ontology/Place,-,Class,Place,-,Simple,1.0,=
2,Class,https://enslaved.org/ontology/EntityWithProven...,-,Class,Entity with Provenance,-,Simple,1.0,=
3,Class,https://enslaved.org/ontology/Event,-,Class,Event,-,Simple,1.0,=
4,Class,https://enslaved.org/ontology/AgeRecord,-,Class,http://wikiba.se/ontology#Statement,-,Simple,1.0,<


# Some statistics

### Mapping types

In [10]:
p.check_mapping_types(df_PGARM)

Unnamed: 0,ent1_type,ent2_type,Count
0,AttributeDomainRestriction,Class,1
1,AttributeOccurenceRestriction,Class,1
2,Class,AttributeDomainRestriction,2
3,Class,AttributeOccurenceRestriction,1
4,Class,Class,17
5,Relation,Relation,8


### Simple vs complex mappings

In [11]:
#Simple vs complex mappings
df_PGARM['type_of_alignment'].value_counts()

Complex    17
Simple     13
Name: type_of_alignment, dtype: int64