In [1]:
import csv
import ast

# Ontology Definitions


In [11]:
# Object properties (relation type -> (start_node_type, [allowed_end_types]))
ontology_relations = {
    "sentBy": (["Email"], ["Person", "Journal", "Conference"]),
    "receivedBy": (["Email"], ["Person"]),
    "hasAuthor": (["Paper"], ["Person"]),
    "identifies": (["SubmissionID"], ["Paper"]),
    "inVenue": (["SubmissionID"], ["Conference", "Journal"]),
    "mentions": (["Email"], ["Dataset", "Method", "Metric", "Task"]),
    "notifies": (["Email"], ["Meeting", "SubmissionID"]),
    "partOf": (["Email"], ["MailThread"]),
    "movesTo": (["SubmissionID", "PaperStatus"], ["PaperStatus"]),
    "usedFor": (["Method", "Dataset"], ["Task"]),
    "evaluates": (["Metric"], ["Method", "Task", "Dataset"]),
    "uses": (["Method"], ["Datset"])
}

# Loading data

In [12]:
# Load Entities

entity_file = "./Entities.csv"
entity_types = {}      # id -> type
#entity_properties = {} # id -> dict of properties

with open(entity_file, "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        eid = row["id"]
        etype = row["type"]
        if eid:
            entity_types[eid] = etype


In [13]:
# Load Relations

relations_file = "./Relations.csv"
triples = []
with open(relations_file, "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        start_id = row["start_id"].strip()
        end_id = row["end_id"].strip()
        rel_type = row["relation"].strip()
        triples.append((start_id, end_id, rel_type))

# Validation of triples

In [14]:
invalid_triples = []

for s_id, e_id, r_type in triples:
    s_type = entity_types.get(s_id)
    e_type = entity_types.get(e_id)

    if r_type not in ontology_relations:
        invalid_triples.append((s_id, e_id, r_type, "Unknown relation type"))
        continue

    allowed_starts, allowed_ends = ontology_relations[r_type]

    # Start type check (now supports list of allowed types)
    if s_type not in allowed_starts:
        invalid_triples.append((s_id, e_id, r_type, f"Invalid start type: {s_type}"))

    # End type check
    if e_type not in allowed_ends:
        invalid_triples.append((s_id, e_id, r_type, f"Invalid end type: {e_type}"))

# Precision Calculation

In [15]:
total_triples = len(triples)
valid_triples = total_triples - len(invalid_triples)
precision = valid_triples / total_triples if total_triples > 0 else 0

# -------------------------
# Reporting
# -------------------------
print(f"Total triples: {total_triples}")
print(f"Invalid triples: {len(invalid_triples)}")
print(f"Precision: {precision:.2f}")

# Optional: show a few invalid triples
if invalid_triples:
    print("\nSample invalid triples:")
    for t in invalid_triples[:10]:
        print(t)

Total triples: 2413
Invalid triples: 161
Precision: 0.93

Sample invalid triples:
('e1', 'pa1', 'mentions', 'Invalid end type: Paper')
('e11', 'pa1', 'mentions', 'Invalid end type: Paper')
('e12', 'j1', 'mentions', 'Invalid end type: Journal')
('e12', 'pa2', 'mentions', 'Invalid end type: Paper')
('e14', 'pa1', 'mentions', 'Invalid end type: Paper')
('e14', 'j1', 'mentions', 'Invalid end type: Journal')
('e17', 'j1', 'mentions', 'Invalid end type: Journal')
('e17', 'pa1', 'mentions', 'Invalid end type: Paper')
('e19', 'j1', 'mentions', 'Invalid end type: Journal')
('e19', 'pa1', 'mentions', 'Invalid end type: Paper')
