In [54]:
import re
import misaka
import spacy
from bs4 import BeautifulSoup
from collections import defaultdict

In [67]:
with open('./example.md') as f:
    md = f.read().split('Legend:')[0]
nlp = spacy.load("en_core_web_lg")

In [68]:
def get_ground_truth(raw_md):
    truth = defaultdict(set)
    marks = re.compile(r'(\<[^>]*\>)([\w\s]+)(<\/mark>)')
    position_overhead = 0

    for m in marks.finditer(raw_md):
        markdown, opening_tag, closing_tag = m.group(0), m.group(1), m.group(3)
        soup = BeautifulSoup(misaka.html(markdown), 'html.parser')
        idx = soup.find('mark').get('id')

        position_overhead += len(opening_tag)
        pos_start = m.start(2) - position_overhead
        pos_end = m.end(2) - position_overhead
        position_overhead += len(closing_tag)
        truth[idx].add((pos_start, pos_end))

    return truth

In [69]:
def extract(text):
    prediction = defaultdict(set)
    doc = nlp(text, disable=['textcat'])
    
    for ent in doc.ents:
        if ent.label_ == 'PERSON':
            prediction[ent.text.lower()].add((ent.start_char, ent.end_char))
    
    return prediction

In [70]:
def getRecall(tp, fn):
    if (not tp and not fn):
        return 0
    return tp / (tp + fn)

def getPrecision(tp, fp):
    if (not tp and not fp):
        return 0
    return tp / (tp + fp)

def getFScore(truth, prediction):
    tp = [val for val in truth if val in prediction]
    fp = [val for val in prediction if val not in tp]
    fn = [val for val in truth if val not in prediction]
    
    precision = getPrecision(len(tp), len(fp))
    recall = getRecall(len(tp), len(fn))

    if precision + recall == 0:
        return 0
    
    return 2 * (precision * recall) / (precision + recall)

In [71]:
truth = get_ground_truth(md)

In [72]:
soup = BeautifulSoup(misaka.html(md), 'html.parser')
prediction = extract(soup.get_text())

In [75]:
#after some manual edits and prettification prediction should look like this:
prediction = {
    'robb': {(150, 154), (1444, 1448), (3231, 3235), (3651, 3655), (3708, 3712), (3738, 3742)},
    'jon': {(159, 162), (2756, 2759), (2901, 2904), (2924, 2927), (3533, 3536), (3686, 3689), (2062, 2070), (3424, 3432)},
    'winterfell': {(416, 426), (930, 940), (1930, 1940), (3008, 3018)},
    'ned': {(1278, 1290)},
    'theon': {(1315, 1328), (2620, 2625), (2784, 2791)},
    'valyrian': {(1464, 1472), (1538, 1546)},
    'jory': {(1606, 1617)},
    'robert': {(1717, 1723)},
    'baratheon': {(1737, 1746)},
    'andals': {(1783, 1789)},
    'stark': {(3525, 3530)}
}

In [74]:
result = {}
for key, value in truth.items():
    if key not in prediction:
        result[key] = 0
    else:
        result[key] = getFScore(value, prediction[key])

result

{'deserter': 0,
 'ned': 0.07692307692307693,
 'robb': 0.7058823529411764,
 'jon': 0.8,
 'bran': 0,
 'theon': 0.6666666666666666,
 'jory': 1.0,
 'robert': 1.0}

In [64]:
#average f1
sum(val for val in result.values())/len(result.values())

0.531184012066365

In [65]:
#overall f1
truth_combined = set.union(*truth.values())
prediction_combined = set.union(*prediction.values())
getFScore(truth_combined, prediction_combined)

0.35593220338983045