Evaluation of NER against Ontology

In [1]:
import pandas as pd
import numpy as np

In [None]:
# Step 1: Read in NER output as a DataFrame
# Replace 'ner_output.csv' with the actual path to your NER output file.
ner_df = pd.read_csv('ner_output.csv')

# Replace 'ontology_output.csv' with the actual path to your Ontology output file.
ontology_df = pd.read_csv('ontology_output.csv')

# Step 2: Create the ontology dictionary for optimized evaluation
# The structure will be:
# ontology_dict = {
#     "head_type_1": {
#         "tail_type_1": {"relationship_1", "relationship_2", ...},
#         "tail_type_2": {"relationship_3", ...},
#         ...
#     },
#     ...
# }
ontology_dict = {}

# Iterate through the ontology DataFrame and populate the dictionary
for _, row in ontology_df.iterrows():
    head_type = row['head_type']
    tail_type = row['tail_type']
    relationship = row['valid_relationship']  # Assumes a single valid relationship per row
    
    # Initialize nested dictionary if head_type is not yet in ontology_dict
    if head_type not in ontology_dict:
        ontology_dict[head_type] = {}
    
    # Initialize set if tail_type is not yet in the nested dictionary
    if tail_type not in ontology_dict[head_type]:
        ontology_dict[head_type][tail_type] = set()
    
    # Add the relationship to the set of valid relationships for this head_type and tail_type combination
    ontology_dict[head_type][tail_type].add(relationship)

# Step 3: Loop through NER rows to validate each relationship
total_valid = 0
total_invalid = 0

for _, row in ner_df.iterrows():
    head_entity = row['head']
    tail_entity = row['tail']
    relationship = row['relationship']
    
    # Step to get entity types from head and tail entities
    # Placeholder: Replace with code to get entity type from the head and tail entities.
    # For example, using a pre-trained NER model like spaCy, you might use:
    # head_type = get_entity_type(head_entity)
    # tail_type = get_entity_type(tail_entity)
    #
    # Recommendation: Use spaCy or a similar NER tool to extract entity types.
    # For example, with spaCy:
    # head_type = nlp(head_entity).ents[0].label_ if nlp(head_entity).ents else 'Unknown'
    # tail_type = nlp(tail_entity).ents[0].label_ if nlp(tail_entity).ents else 'Unknown'

    head_type = "Example_Head_Type"  # Replace with actual method to get head type
    tail_type = "Example_Tail_Type"  # Replace with actual method to get tail type

    # Check if the relationship is valid according to the ontology dictionary
    if (head_type in ontology_dict and 
        tail_type in ontology_dict[head_type] and 
        relationship in ontology_dict[head_type][tail_type]):
        # Valid relationship
        total_valid += 1
    else:
        # Invalid relationship
        total_invalid += 1
        print(f"Invalid relationship detected: {head_type} -[{relationship}]-> {tail_type}")

# Step 4: Calculate and print the accuracy and summary
total_rows = total_valid + total_invalid
accuracy = (total_valid / total_rows) * 100 if total_rows > 0 else 0

print(f"\nValidation Summary:")
print(f"Total valid relationships: {total_valid}")
print(f"Total invalid relationships: {total_invalid}")
print(f"Accuracy: {accuracy:.2f}%")
