In [1]:
from spacy.tokens import DocBin, Doc, Span, Token
from spacy.training import Example
import verb_cue_classifier
import content_classifier
import source_classifier
import content_resolver
import source_resolver
import quote_resolver
import baseline
import spacy

from sklearn.metrics import classification_report
from spacy.training import offsets_to_biluo_tags


from bratcorpus import BratCorpus
import spacy_udpipe

In [2]:
def init_pipeline(nlp, text_features=False, ner=False):
    if ner:
        ner_vcc = spacy.load("verb-cue-classifier/output/model-best")
        nlp.add_pipe("ner", source=ner_vcc, name="ner_vcc", before="ner")
    nlp.add_pipe('verb_cue_classifier')
    nlp.add_pipe('content_classifier_features')
    if text_features:
        nlp.add_pipe('content_classifier_text_features')
        nlp.add_pipe('content_text_classifier')
    else:
        nlp.add_pipe('content_classifier')
    nlp.add_pipe('source_classifier_features')
    if text_features:
        nlp.add_pipe('source_classifier_text_features')
        nlp.add_pipe('source_text_classifier')
    else:
        nlp.add_pipe('source_classifier')
    nlp.add_pipe('content_resolver')
    nlp.add_pipe('source_resolver')
    nlp.add_pipe('quote_resolver')
    #nlp.add_pipe("sentencizer")
    return nlp
def span_after_alignment(span, example):
    start = example.alignment.x2y[span.start][0]
    end = example.alignment.x2y[span.end-1][-1] + 1
    return Span(example.reference, start, end)

def is_span_in(span, arr):
    for other in arr:
        if span.start == other.start and span.end == other.end:
            return True
    return False

def get_other_span(span, arr):
    for other in arr:
        if span.start == other.start and span.end == other.end:
            return other
    return None

def f1_bbc(corpus):
    tp = 0
    fp = 0
    true_count = 0
    for ex in corpus:
        doc = nlp(ex.predicted)
        other = ex.reference
        true_count += len(other._.cue_to_content)
        for cue, content_spans in doc._.cue_to_content.items():
            other_cue = span_after_alignment(cue, ex)
            match = True

            other_cue = get_other_span(other_cue, other._.verb_cues)
            if other_cue is None:
                match = False
            else:
                for content in content_spans:
                    other_content = span_after_alignment(content, ex)
                    if not is_span_in(other_content, other._.cue_to_content[other_cue]):
                        match = False
                        break
                for source in doc._.cue_to_source[cue]:
                    other_source = span_after_alignment(source, ex)
                    if not is_span_in(other_source, other._.cue_to_source[other_cue]):
                        match = False
                        break
            if match:
                tp += 1
            else:
                if other_cue is not None:
                    print('sentence', doc.text)
                    print('pred content', content_spans)
                    print('true content', other._.cue_to_content[other_cue])
                    print('pred source', doc._.cue_to_source[cue])
                    print('true source', other._.cue_to_source[other_cue])
                    print('---------------')
                fp += 1
    precision = tp / (tp + fp)
    recall = tp / true_count
    f1 = 2 * precision * recall / (precision + recall)
    return precision, recall, f1

In [23]:
def spans_to_label(example, span_f):
    label = 'lbl'
    
    tags_pred = offsets_to_biluo_tags(example.predicted, [(s.start_char, s.end_char, label)
                                       for s in span_f(example.predicted)])
    tags_pred = ['None' if tag == 'O' else label for tag in tags_pred]
    
    tags_true = offsets_to_biluo_tags(example.reference, [(s.start_char, s.end_char, label)
                                       for s in span_f(example.reference)])
    
    try:
        tags_true = offsets_to_biluo_tags(example.reference, [(s.start_char, s.end_char, label)
                                       for s in span_f(example.reference)])
    except ValueError:
        print(example.reference)
        
    
    tags_true = ['None' if tag == 'O' else label for tag in tags_true]
    
    tmp = []
    for idx, tag in enumerate(tags_pred):
        align = example.alignment.x2y[idx]
        labels = set([tags_true[i] for i in align])
        # t -> [lbl, lbl, None] labels
        # if at least one matching tag is labelled, then we consider t labelled
        if label in labels:
            tmp.append(label)
        else:
            tmp.append('None')
    tags_true = tmp
    return tags_pred, tags_true

def per_label_metrics(examples):
    cue_pred, cue_true = [], []
    #content_pred, content_true = [], []
    source_pred, source_true = [], []
    for ex in examples:
        #try:
        doc = nlp(ex.predicted)
        #print(ex.reference._.path)
        #print([(vc, vc.start_char, vc.end_char) for vc in ex.reference._.verb_cues])
        new_cue_pred, new_cue_true = spans_to_label(ex, lambda doc: doc._.verb_cues)
        cue_pred += new_cue_pred
        cue_true += new_cue_true
        #new_content_pred, new_content_true = spans_to_label(ex, lambda doc: doc._.content_spans)
        #content_pred += new_content_pred
        #content_true += new_content_true
        new_source_pred, new_source_true = spans_to_label(ex, lambda doc: doc._.source_spans)
        source_pred += new_source_pred
        source_true += new_source_true
        #except Exception as e:
        #    print('missed example', e)
    
    print("Source")
    print(classification_report(source_true, source_pred))
    
    print("Cue")
    print(classification_report(cue_true, cue_pred))
    
    #print("Content")                
    #print(classification_report(content_true, content_pred))
    
    return 0

In [22]:
cs = BratCorpus('sir/data/triple_manual/')

In [14]:
nlp = init_pipeline(spacy_udpipe.load("cs"), text_features=False)
#for ex in cs(nlp):
#    print(ex)
#print(f1_bbc(cs(nlp)))



In [11]:
p = per_label_metrics(cs(nlp))

Mismatch sir/data/triple_manual/doc-5866923.xml.ann informoval | T15	PHRASE 203 212	informova
Mismatch sir/data/triple_manual/doc-6706118.xml.ann vydavatelství Redbird Music | T6	official-non-political 1383 1409	vydavatelství Redbird Musi
Mismatch sir/data/triple_manual/doc-7901307.xml.ann Polští hasiči | T4	anonymous-partial 843 855	Polští hasič
Mismatch sir/data/triple_manual/doc-8404622.xml.ann informoval | T4	PHRASE 162 171	informova
Source
              precision    recall  f1-score   support

        None       0.98      0.85      0.91     14493
         lbl       0.15      0.56      0.24       703

    accuracy                           0.83     15196
   macro avg       0.56      0.70      0.57     15196
weighted avg       0.94      0.83      0.88     15196

Cue
              precision    recall  f1-score   support

        None       0.99      1.00      0.99     14926
         lbl       0.78      0.48      0.59       270

    accuracy                           0.99     15196
  

In [24]:
nlp = init_pipeline(spacy_udpipe.load("cs"), text_features=True)
p = per_label_metrics(cs(nlp))



Source
              precision    recall  f1-score   support

        None       0.97      0.92      0.94     14493
         lbl       0.20      0.42      0.27       703

    accuracy                           0.89     15196
   macro avg       0.58      0.67      0.60     15196
weighted avg       0.93      0.89      0.91     15196

Cue
              precision    recall  f1-score   support

        None       0.99      1.00      0.99     14926
         lbl       0.78      0.48      0.59       270

    accuracy                           0.99     15196
   macro avg       0.88      0.74      0.79     15196
weighted avg       0.99      0.99      0.99     15196



In [25]:
nlp = spacy_udpipe.load("cs")
nlp.add_pipe('rule_based_attribution')



<baseline.RuleBasedAttribution at 0x7f9fc2268880>

In [26]:
p = per_label_metrics(cs(nlp))

1 quotation marks found, indicating an unclosed quotation; given the limitations of this method, it's safest to bail out rather than guess which quotation is unclosed
7 quotation marks found, indicating an unclosed quotation; given the limitations of this method, it's safest to bail out rather than guess which quotation is unclosed
Source
              precision    recall  f1-score   support

        None       0.95      1.00      0.98     14493
         lbl       0.75      0.00      0.01       703

    accuracy                           0.95     15196
   macro avg       0.85      0.50      0.49     15196
weighted avg       0.94      0.95      0.93     15196

Cue
              precision    recall  f1-score   support

        None       0.98      1.00      0.99     14926
         lbl       0.75      0.01      0.02       270

    accuracy                           0.98     15196
   macro avg       0.87      0.51      0.51     15196
weighted avg       0.98      0.98      0.97     15196

