In [89]:
# Evaluator is a library I've created (https://github.com/LLCampos/evaluator)
from Evaluator import Evaluator

import os
import re

In [90]:
# Normalized format is "{index_begin}:{index_end}:{entity_name}". For example:
# "191:200:carcinoma"

# Convert MER annotation into a normalized format
# For example, converts "191\t200\tcarcinoma\n" into "191:200:carcinoma"
def normalize_tsv_mer_annotations(mer_annotation):
    return mer_annotation.strip().replace('\t', ':')

# Convert Standoff annotation into a normalized format
# For example, converts "[40::58]\tHP_0000006 | autosomal dominant\n" into "40:58:autosomal dominant"
def normalize_standoff_annotation(standoff_annotation):
    index_begin = re.findall('\[(.*)::', standoff_annotation)[0]
    index_end = re.findall('::(.*)]', standoff_annotation)[0]
    entity_name = re.findall('\| (.*)', standoff_annotation)[0]
    
    return '{}:{}:{}'.format(index_begin, index_end, entity_name)
    

In [91]:
path_to_mer_annotations = 'annotations/'
path_to_gold_annotations = '../stand-off/'

In [92]:
document_ids = os.listdir(path_to_gold_annotations)

# Micro Evaluation

In [93]:
precisions = []
recalls = []
fscores = []
number_documents = len(document_ids)

for document_id in document_ids:
    with open(path_to_mer_annotations + document_id + '.tsv') as f:
        mer_annotations_tsv = f.readlines()

    with open(path_to_gold_annotations + document_id) as f:
        gold_annotations_standoff = f.readlines()

    mer_annotations = map(lambda annotation: normalize_tsv_mer_annotations(annotation), mer_annotations_tsv) 
    gold_annotations = map(lambda annotation: normalize_standoff_annotation(annotation), gold_annotations_standoff) 
    ev = Evaluator.Evaluator(gold_terms=set(gold_annotations), pred_terms=set(mer_annotations))
    
    precisions.append(ev.precision())
    recalls.append(ev.recall())
    fscores.append(ev.f1_score())
    
micro_precision = sum(precisions) / number_documents
micro_recall = sum(recalls)/number_documents
micro_f1_score = sum(fscores)/number_documents

print "Micro Precision: " + str(micro_precision)
print "Micro Recall: " + str(micro_recall)
print "Micro F1-Score: " + str(micro_f1_score)

Micro Precision: 0.482382936761
Micro Recall: 0.484478222459
Micro F1-Score: 0.449974182127


# Macro Evaluations

In [94]:
# For calculate Macro Evaluatin I use an hack with the Evaluator module

evaluator_master = Evaluator.Evaluator(set(), set())

for document_id in document_ids:
    with open(path_to_mer_annotations + document_id + '.tsv') as f:
        mer_annotations_tsv = f.readlines()

    with open(path_to_gold_annotations + document_id) as f:
        gold_annotations_standoff = f.readlines()

    mer_annotations = map(lambda annotation: normalize_tsv_mer_annotations(annotation), mer_annotations_tsv) 
    gold_annotations = map(lambda annotation: normalize_standoff_annotation(annotation), gold_annotations_standoff) 
    ev = Evaluator.Evaluator(gold_terms=set(gold_annotations), pred_terms=set(mer_annotations))
    
    evaluator_master._y_pred += ev._y_pred
    evaluator_master._y_true += ev._y_true
    
macro_precision = evaluator_master.precision()
macro_recall = evaluator_master.recall()
macro_f1_score = evaluator_master.f1_score()

print "Macro Precision: " + str(macro_precision)
print "Macro Recall: " + str(macro_recall)
print "Macro F1-Score: " + str(macro_f1_score)

Macro Precision: 0.5141536684
Macro Recall: 0.471398305085
Macro F1-Score: 0.491848576955
