# **Simple Yet Powerful: An Overlooked Architecture for Nested Named Entity Recognition - Task-specific metrics**

In [None]:
import numpy as np
import ast
from collections import defaultdict

# **Predictions**

Create the following folders: layered, exhaustive, boundary, tacl, and mlc. Put the prediction files in each of these folders. In the case of MLC, inside the mlc folder create the following folders: genia, germ, and wl, and place the files with the predictions for each type of entity. In addition, load the original files of each dataset to make a fair comparison with the models that cannot cover any of the nesting types.

**Baseline 1: Layered**

In [None]:
def read_layered_predictions(filepath):
  f = open(filepath, 'r', encoding='utf-8').read()
  sentences = []
  predict = []
  gold = []
  for line in f.split('\n'):
    if line!='' and not (line.startswith('predict') or line.startswith('gold')):
      sentences.append(line)
    if line.startswith('predict'):
      predict.append(line)
    if line.startswith('gold'):
      gold.append(line)
  layered_pred = []
  layered_gold = []

  for p, t in zip(predict, gold):

    pred_entities = []
    for entity in p.split('|')[1:]:
      entity_info = entity.split(',')
      if entity!='':
        type = entity_info[2]
        start = int(entity_info[0])
        end = int(entity_info[1])-1
        pred_entities.append([type, start, end])
    layered_pred.append(pred_entities)

    gold_entities = []
    for entity in t.split('|')[1:]:
      entity_info = entity.split(',')
      if entity!='':
        type = entity_info[2]
        start = int(entity_info[0])
        end = int(entity_info[1])-1
        gold_entities.append([type, start, end])
    layered_gold.append(gold_entities)
  return layered_pred, layered_gold

**Baseline 2: Exhaustive**

In [None]:
def read_exhaustive_predictions(filepath):
  f = open(filepath, 'r').read()
  predict = []
  gold = []
  for line in f.split('\n'):
    if line.startswith('Gold:'):
      gold.append(line)
    if line.startswith('Pred:'):
      predict.append(line)
  
  pred_entities = []
  gold_entities = []
  for p, t in zip(predict, gold):

    pred = []
    for k, v in ast.literal_eval(p[6:]).items():
      pred.append([v, int(k[0]), int(k[1])-1])
    pred_entities.append(pred)

    true = []
    for k, v in ast.literal_eval(t[6:]).items():
      true.append([v, int(k[0]), int(k[1])-1])
    gold_entities.append(true)
  return pred_entities, gold_entities

**Baseline 3: Boundary**

In [None]:
def get_boundary_sentences(path):
  preds = open(path, 'r', encoding='utf-8').read()
  cnt = 0
  sentences = []
  sent = []
  for line in preds.splitlines():
    if cnt==2:
      sent.append(line)
      sentences.append(sent)
      sent = []
      cnt = 0
    else:
      cnt+=1
      sent.append(line)
  return sentences

In [None]:
def read_boundary_predictions(filepath):
  boundary_wl_pred = []
  boundary_sentences = get_boundary_sentences(filepath)
  for sent in boundary_sentences:
    sent_pred = []
    res = ast.literal_eval(sent[1])
    for k, v in res.items():
      sent_pred.append([v, k[0], k[1]-1])
    boundary_wl_pred.append(sent_pred)
  return boundary_wl_pred

**Baseline 4: Recursive-CRF**

In [None]:
def get_tacl_sentences(path):
  preds = open(path, 'r', encoding='utf-8').read()
  cnt = 0
  sentences = []
  sent = []
  for line in preds.splitlines():
    if cnt==3:
      sent.append(line)
      sentences.append(sent)
      sent = []
      cnt = 0
    else:
      cnt+=1
      sent.append(line)
  return sentences

In [None]:
def get_tacl_entities(line):
  entities = []
  if line=='':
    return entities
  else:
    line_entities = line.split('|')
    for ent in line_entities:
      idxs = ent.split(',')
      start = int(idxs[0])
      end = int(idxs[1].split(' ')[0])-1
      type = idxs[1].split(' ')[1]
      entities.append([type, start, end])
  return entities

In [None]:
def read_tacl_predictions(filepath):
  sentences = get_tacl_sentences(filepath)
  sentences_test_entities = []
  sentences_pred_entities = []
  for sent in sentences:
    text = sent[0]
    test = sent[1]
    pred = sent[2]
    test_entities = get_tacl_entities(test)
    sentences_test_entities.append(test_entities)
    pred_entities = get_tacl_entities(pred)
    sentences_pred_entities.append(pred_entities)
  return sentences_pred_entities, sentences_test_entities

**Baseline 5: Pyramid**

In [None]:
def get_pyramid_sentences(path):
  preds = open(path, 'r', encoding='utf-8').read()
  cnt = 0
  sentences = []
  sent = []
  for line in preds.splitlines():
    if cnt==3:
      sent.append(line)
      sentences.append(sent)
      sent = []
      cnt = 0
    else:
      cnt+=1
      sent.append(line)
  return sentences

def get_pyramid_entities(line):
  entities = []
  if line=='':
    return entities
  else:
    line_entities = line.split('|')[:-1]
    for ent in line_entities:
      idxs = ent.split(',')
      start = int(idxs[0])
      end = int(idxs[1])
      type = idxs[2]
      entities.append([type, start, end])
  return entities

def read_pyramid_predictions(filepath):
  sentences = get_pyramid_sentences(filepath)
  sentences_test_entities = []
  sentences_pred_entities = []
  for sent in sentences:
    text = sent[0]
    test = sent[1]
    pred = sent[2]
    test_entities = get_pyramid_entities(test)
    sentences_test_entities.append(test_entities)
    pred_entities = get_pyramid_entities(pred)
    sentences_pred_entities.append(pred_entities)
  return sentences_pred_entities, sentences_test_entities

**Baseline 6: Biaffine**

In [None]:
def get_biaffine_sentences(path):
  preds = open(path, 'r', encoding='utf-8').read()
  cnt = 0
  sentences = []
  sent = []
  for line in preds.splitlines():
    if cnt==3:
      sent.append(line)
      sentences.append(sent)
      sent = []
      cnt = 0
    else:
      cnt+=1
      sent.append(line)
  return sentences

def get_biaffine_entities(line):
  entities = []
  if line=='':
    return entities
  else:
    line_entities = line.split('|')[:-1]
    for ent in line_entities:
      idxs = ent.split(',')
      start = int(idxs[0])
      end = int(idxs[1])
      type = idxs[2]
      entities.append([type, start, end])
  return entities

def read_biaffine_predictions(filepath):
  sentences = get_biaffine_sentences(filepath)
  sentences_test_entities = []
  sentences_pred_entities = []
  for sent in sentences:
    text = sent[0]
    test = sent[1]
    pred = sent[2]
    test_entities = get_biaffine_entities(test)
    sentences_test_entities.append(test_entities)
    pred_entities = get_biaffine_entities(pred)
    sentences_pred_entities.append(pred_entities)
  return sentences_pred_entities, sentences_test_entities

**MCL**

In [None]:

def merge_files(entities, dataset):
    my_dict = defaultdict(list)
    output_file = open(f'mlc/{dataset}_pred.tsv', 'w', encoding='utf-8')
    for i, entity in enumerate(entities):
        predictions = open(f'mlc/{dataset}/{entity}_test.tsv', 'r', encoding='utf-8').read()
        for j, line in enumerate(predictions.splitlines()):
            if line == '':
                my_dict[j].append('EOS')
                continue
            data = line.split()
            token = data[0]
            prediction = data[2]
            if i == 0:
                my_dict[j].append(token)
                my_dict[j].append(prediction)
            else:
                my_dict[j].append(prediction)

    for k, v in my_dict.items():
        if v[0] == 'EOS':
            output_file.write("\n")
        else:
            output_file.write(f"{v[0]} {' '.join(v[1:])}\n")
    output_file.close()

def get_entities(seq, suffix=False):
    """Gets entities from sequence.
    Args:
        seq (list): sequence of labels.
    Returns:
        list: list of (chunk_type, chunk_start, chunk_end).
    Example:
        >>> from seqeval.metrics.sequence_labeling import get_entities
        >>> seq = ['B-PER', 'I-PER', 'O', 'B-LOC']
        >>> get_entities(seq)
        [('PER', 0, 1), ('LOC', 3, 3)]
    """

    def _validate_chunk(chunk, suffix):
        if chunk in ['O', 'B', 'I', 'E', 'S']:
            return
        if suffix:
            if not chunk.endswith(('-B', '-I', '-E', '-S')):
                warnings.warn('{} seems not to be NE tag.'.format(chunk))

        else:
            if not chunk.startswith(('B-', 'I-', 'E-', 'S-')):
                warnings.warn('{} seems not to be NE tag.'.format(chunk))

    # for nested list
    if any(isinstance(s, list) for s in seq):
        seq = [item for sublist in seq for item in sublist + ['O']]

    prev_tag = 'O'
    prev_type = ''
    begin_offset = 0
    chunks = []
    for i, chunk in enumerate(seq + ['O']):
        _validate_chunk(chunk, suffix)

        if suffix:
            tag = chunk[-1]
            type_ = chunk[:-1].rsplit('-', maxsplit=1)[0] or '_'
        else:
            tag = chunk[0]
            type_ = chunk[1:].split('-', maxsplit=1)[-1] or '_'

        if end_of_chunk(prev_tag, tag, prev_type, type_):
            chunks.append([prev_type, begin_offset, i - 1])
        if start_of_chunk(prev_tag, tag, prev_type, type_):
            begin_offset = i
        prev_tag = tag
        prev_type = type_

    return chunks

def end_of_chunk(prev_tag, tag, prev_type, type_):
    """Checks if a chunk ended between the previous and current word.
    Args:
        prev_tag: previous chunk tag.
        tag: current chunk tag.
        prev_type: previous type.
        type_: current type.
    Returns:
        chunk_end: boolean.
    """
    chunk_end = False

    if prev_tag == 'E':
        chunk_end = True
    if prev_tag == 'S':
        chunk_end = True

    if prev_tag == 'B' and tag == 'B':
        chunk_end = True
    if prev_tag == 'B' and tag == 'S':
        chunk_end = True
    if prev_tag == 'B' and tag == 'O':
        chunk_end = True
    if prev_tag == 'I' and tag == 'B':
        chunk_end = True
    if prev_tag == 'I' and tag == 'S':
        chunk_end = True
    if prev_tag == 'I' and tag == 'O':
        chunk_end = True

    if prev_tag != 'O' and prev_tag != '.' and prev_type != type_:
        chunk_end = True

    return chunk_end

def start_of_chunk(prev_tag, tag, prev_type, type_):
    """Checks if a chunk started between the previous and current word.
    Args:
        prev_tag: previous chunk tag.
        tag: current chunk tag.
        prev_type: previous type.
        type_: current type.
    Returns:
        chunk_start: boolean.
    """
    chunk_start = False

    if tag == 'B':
        chunk_start = True
    if tag == 'S':
        chunk_start = True

    if prev_tag == 'E' and tag == 'E':
        chunk_start = True
    if prev_tag == 'E' and tag == 'I':
        chunk_start = True
    if prev_tag == 'S' and tag == 'E':
        chunk_start = True
    if prev_tag == 'S' and tag == 'I':
        chunk_start = True
    if prev_tag == 'O' and tag == 'E':
        chunk_start = True
    if prev_tag == 'O' and tag == 'I':
        chunk_start = True

    if tag != 'O' and tag != '.' and prev_type != type_:
        chunk_start = True

    return chunk_start

def get_entities_from_multiconll(sents):
  entities = []
  for sent in sents:
    entities_per_level = defaultdict(list)
    for line in sent.splitlines():
      for i, v in enumerate(line.split()[1:]):
        entities_per_level[i].append(v)
    
    sent_entities = []
    for k, v in entities_per_level.items():
      sent_entities.extend(get_entities(v))
    entities.append(sent_entities)
  return entities

def read_file(path):
  f = open(path, 'r', encoding = 'utf-8').read()
  sents = [sent for sent in f.split('\n\n')]
  return sents


def read_mlc_predictions(dataset):
  if dataset=='genia':
    genia_test_sentences = read_file('genia.test.iob2')[:-1]
    golden_entities = get_entities_from_multiconll(genia_test_sentences)
    genia_pred_sentences = read_file('mlc/genia_pred.tsv')[:-1]
    pred_entities = get_entities_from_multiconll(genia_pred_sentences)
  if dataset=='germ':
    germ_test_sentences = read_file('germ.test.iob2')[:-1]
    golden_entities = get_entities_from_multiconll(germ_test_sentences)
    germ_pred_sentences = read_file('mlc/germ_pred.tsv')[:-1]
    pred_entities = get_entities_from_multiconll(germ_pred_sentences)
  if dataset=='wl':
    wl_test_sentences = read_file('wl.test.iob2')
    golden_entities = get_entities_from_multiconll(wl_test_sentences)
    wl_pred_sentences = read_file('mlc/wl_pred.tsv')[:-1]
    pred_entities = get_entities_from_multiconll(wl_pred_sentences)
  
  return pred_entities, golden_entities

In [None]:
entities = ['Disease', 'Abbreviation', 'Medication', 'Body_Part', 'Family_Member', 'Procedure', 'Finding']
merge_files(entities, 'wl')
entities = ['DNA', 'RNA',  'protein', 'cell_line', 'cell_type']
merge_files(entities, 'genia')
entities = ['ORG', 'PER',  'LOC', 'OTH', 'DERIV', 'PART']
merge_files(entities, 'germ')

# **Metric definitions**

In [None]:
# Function used to obtain complete nestings (internal and external entities).
def get_nestings(entities):
  nestings = [] 
  total = []

  for e1 in entities:
    is_outer = True 
    possible_nested_entity = [e1]
    
    for e2 in entities:
      if e1!=e2:
        s_e1 = e1[1]
        e_e1 = e1[2]
        s_e2 = e2[1]
        e_e2 = e2[2]
        if ((s_e1>s_e2 and e_e1<e_e2) or (s_e1==s_e2 and e_e1<e_e2) or (s_e1>s_e2 and e_e1==e_e2)):
          is_outer = False 
        if (s_e2>=s_e1 and e_e2<=e_e1):
          if e1 not in total:
            total.append(e1)
          if e2 not in total:
            total.append(e2)
          possible_nested_entity.append(e2)
    
    if len(possible_nested_entity)==1:
      is_outer = False
    
    if is_outer:
      possible_nested_entity.sort(key=lambda x: (x[2]-x[1], x[0]), reverse=True)
      if possible_nested_entity not in nestings:
        nestings.append(possible_nested_entity)
  return nestings, total

**Metric 0: Standard metric**

In [None]:
def metric(pred, gold):
  tp = 0
  fn = 0
  fp = 0
  support = 0
  for p, g in zip(pred, gold):
    for entity in p: 
      if entity in g: 
        tp+=1
      if entity not in g:
        fp+=1

    for entity in g:
      support+=1
      if entity not in p:
        fn+=1
  
  precision = tp/(tp+fp)
  recall = tp/(tp+fn)
  f1 = (2*precision*recall)/(precision+recall)

  return precision, recall, f1, support

**Metric 1: Nestings.**

In [None]:
def nesting_metric(pred_labels, true_labels):
  
  nesting_tp = 0
  nesting_fn = 0
  nesting_fp = 0
  support = 0
  for sent_pred_labels, sent_test_labels in zip(pred_labels, true_labels):
    pred_nestings, tp = get_nestings(sent_pred_labels)
    test_nestings, tt = get_nestings(sent_test_labels)
    
    for nesting in test_nestings:
      support+=1
      if nesting in pred_nestings:
        nesting_tp+=1
      else:
        nesting_fn+=1

    for nesting in pred_nestings:
      if nesting not in test_nestings:
        nesting_fp+=1
  nesting_precision = nesting_tp/(nesting_tp+nesting_fp)
  nesting_recall = nesting_tp/(nesting_tp+nesting_fn)
  nesting_f1 = 2*(nesting_precision*nesting_recall)/(nesting_precision+nesting_recall)
  return nesting_precision, nesting_recall, nesting_f1, support

**Metric 2: Nested entities**

In [None]:
def nested_metric(pred_labels, true_labels):
  
  nested_tp = 0
  nested_fn = 0
  nested_fp = 0
  support = 0

  for sent_pred_labels, sent_test_labels in zip(pred_labels, true_labels):
    pred_nestings, tp = get_nestings(sent_pred_labels)
    test_nestings, tt = get_nestings(sent_test_labels)
    
    for nesting in test_nestings:
      for entity in nesting:
        support+=1
        if entity in sent_pred_labels:
          nested_tp+=1
        else:
          nested_fn+=1

    for nesting in pred_nestings:
      for entity in nesting:
        if entity not in sent_test_labels:
          nested_fp+=1
    
  nested_precision = nested_tp/(nested_tp+nested_fp)
  nested_recall = nested_tp/(nested_tp+nested_fn)
  nested_f1 = 2*(nested_precision*nested_recall)/(nested_precision+nested_recall)
  return nested_precision, nested_recall, nested_f1, support

**Metric 3: Inner entities**

In [None]:
def inner_metric(pred_labels, true_labels):
  support = 0
  inner_tp = 0
  inner_fn = 0
  inner_fp = 0

  for sent_pred_labels, sent_test_labels in zip(pred_labels, true_labels):
    pred_nestings, tp = get_nestings(sent_pred_labels)
    test_nestings, tt = get_nestings(sent_test_labels)

    for nesting in test_nestings:
      for entity in nesting[1:]:
        support+=1
        if entity in sent_pred_labels:
          inner_tp+=1
        else:
          inner_fn+=1

    for nesting in pred_nestings:
      for entity in nesting[1:]:
        if entity not in sent_test_labels:
          inner_fp+=1

  inner_precision = inner_tp/(inner_tp+inner_fp)
  inner_recall = inner_tp/(inner_tp+inner_fn)
  inner_f1 = 2*(inner_precision*inner_recall)/(inner_precision+inner_recall)
  return inner_precision, inner_recall, inner_f1, support

**Metric 4: Outer entities**

In [None]:
def outer_metric(pred_labels, true_labels):
  
  outer_tp = 0
  outer_fn = 0
  outer_fp = 0
  support = 0
  for sent_pred_labels, sent_test_labels in zip(pred_labels, true_labels):
    pred_nestings, tp = get_nestings(sent_pred_labels)
    test_nestings, tt = get_nestings(sent_test_labels)
    
    for nesting in test_nestings:
      support+=1
      if nesting[0] in sent_pred_labels:
        outer_tp+=1
      else:
        outer_fn+=1

    for nesting in pred_nestings:
      if nesting[0] not in sent_test_labels:
        outer_fp+=1
  
  outer_precision = outer_tp/(outer_tp+outer_fp)
  outer_recall = outer_tp/(outer_tp+outer_fn)
  outer_f1 = 2*(outer_precision*outer_recall)/(outer_precision+outer_recall)
  return outer_precision, outer_recall, outer_f1, support 

**Metric 5: Flat entities**

In [None]:
def flat_metric(pred_labels, true_labels):
  
  flat_tp = 0
  flat_fn = 0
  flat_fp = 0
  support = 0
  total = 0
  for sent_pred_labels, sent_test_labels in zip(pred_labels, true_labels):
    pred_nestings, tp = get_nestings(sent_pred_labels)
    pred_flat_entities = []
    for entity in sent_pred_labels:
      is_nested = False
      for nesting in pred_nestings:
        if entity in nesting:
          is_nested = True
      if not is_nested:
        pred_flat_entities.append(entity)
    

    test_nestings, tt = get_nestings(sent_test_labels)
    test_flat_entities = []
    for entity in sent_test_labels:
   
      is_nested = False
      for nesting in test_nestings:
        if entity in nesting:
          is_nested = True
      if not is_nested:
        test_flat_entities.append(entity)

    

    for entity in test_flat_entities:
      support+=1
      if entity in sent_pred_labels:
        flat_tp+=1
      else:
        flat_fn+=1

    for entity in pred_flat_entities:
      if entity not in sent_test_labels:
        flat_fp+=1

  flat_precision = flat_tp/(flat_tp+flat_fp)
  flat_recall = flat_tp/(flat_tp+flat_fn)
  flat_f1 = 2*(flat_precision*flat_recall)/(flat_precision+flat_recall)
  return flat_precision, flat_recall, flat_f1, support 

**Metric 6: Multilabel entities**

In [None]:
def multilabel_metric(pred_labels, true_labels):
  multilabel_tp = 0
  multilabel_fn = 0
  multilabel_fp = 0
  support = 0
  for sent_pred_labels, sent_test_labels in zip(pred_labels, true_labels):
    pred_nestings, tp = get_nestings(sent_pred_labels)
    test_nestings, tt = get_nestings(sent_test_labels)
   
    test_multilabel_entities = defaultdict(list)
    for nesting in test_nestings:
      for entity in nesting:
        test_multilabel_entities[(entity[1], entity[2])].append(entity[0])
    
    for k, v in test_multilabel_entities.items():
      if len(v)>1:
        support+=1
        all_predicted = True
        for entity in v:
          if [entity, k[0], k[1]] not in sent_pred_labels:
            all_predicted = False

        if all_predicted:
          multilabel_tp+=1
        else:
          multilabel_fn+=1

      
    pred_multilabel_entities = defaultdict(list)
    for nesting in pred_nestings:
      for entity in nesting:
        pred_multilabel_entities[(entity[1], entity[2])].append(entity[0])
       
    for k, v in pred_multilabel_entities.items():
      if len(v)>1:
        all_predicted = True
        for entity in v:
          if [entity, k[0], k[1]] not in sent_test_labels:
            all_predicted = False

        if not all_predicted:
          multilabel_fp+=1
          
  multilabel_precision = multilabel_tp/(multilabel_tp+multilabel_fp) if multilabel_tp+multilabel_fp!=0 else 0
  multilabel_recall = multilabel_tp/(multilabel_tp+multilabel_fn) if multilabel_tp+multilabel_fn!=0 else 0
  multilabel_f1 = 2*(multilabel_precision*multilabel_recall)/(multilabel_precision+multilabel_recall) if multilabel_precision+multilabel_recall!=0 else 0
  return multilabel_precision, multilabel_recall, multilabel_f1, support

**Metric 7: Same type nesting**

In [None]:
def same_nesting_type_metric(pred_labels, true_labels):
  snt_tp = 0
  snt_fn = 0
  snt_fp = 0
  support = 0

  for sent_pred_labels, sent_test_labels in zip(pred_labels, true_labels):
    pred_nestings, tp = get_nestings(sent_pred_labels)
    test_nestings, tt = get_nestings(sent_test_labels)

    snt_test = []
    for nesting in test_nestings:
      outer = nesting[0]
      stn = [outer]
      for inner in nesting[1:]:

        if inner[0]==outer[0]:
          stn.append(inner)
  
      if len(stn)>1: snt_test.append(stn)

    snt_pred = []
    for nesting in pred_nestings:
      outer = nesting[0]
      stn = [outer]
      for inner in nesting[1:]:
        if inner[0]==outer[0]:
          stn.append(inner)
          
      if len(stn)>1: snt_pred.append(stn)
        

    
    for nesting in snt_test:
      all_equal = True
      for entity in nesting:
        if entity not in sent_pred_labels:
          all_equal = False

      if all_equal:
        snt_tp+=1
      else:
        snt_fn+=1

    for nesting in snt_pred:
      all_equal = True
      for entity in nesting:
        if entity not in sent_test_labels:
          all_equal = False
      
      if not all_equal:
        snt_fp+=1

    support+=len(snt_test)

  
  snt_precision = snt_tp/(snt_tp+snt_fp) if snt_tp+snt_fp!=0 else 0
  snt_recall = snt_tp/(snt_tp+snt_fn) if snt_tp+snt_fn!=0 else 0
  snt_f1 = 2*(snt_precision*snt_recall)/(snt_precision+snt_recall) if snt_precision+snt_recall!=0 else 0
  return snt_precision, snt_recall, snt_f1, support

**Metric 8: Different type nesting**

In [None]:
def is_multilabel_entity(nesting):
  for entity in nesting:
    if entity[1]!=nesting[0][1] or entity[2]!=nesting[0][2]:
      return False
  return True

def different_nesting_type_metric(pred_labels, true_labels):

  # Métrica multilabel
  dnt_tp = 0
  dnt_fn = 0
  dnt_fp = 0
  support = 0

  for sent_pred_labels, sent_test_labels in zip(pred_labels, true_labels):

    # Obtenemos todas las anidaciones
    pred_nestings, tp = get_nestings(sent_pred_labels)
    test_nestings, tt = get_nestings(sent_test_labels)
    
    dnt_test = []
    for nesting in test_nestings:
      if not is_multilabel_entity(nesting):
        outer = nesting[0]
        dtn = [outer]
        for inner in nesting[1:]:
          if inner[0]!=outer[0]:
            dtn.append(inner)

        if len(dtn)>1: dnt_test.append(dtn)
    
    dnt_pred = []
    for nesting in pred_nestings:
      if not is_multilabel_entity(nesting):
        outer = nesting[0]
        dtn = [outer]
        for inner in nesting[1:]:
          if inner[0]!=outer[0]:
            dtn.append(inner)

        if len(dtn)>1: dnt_pred.append(dtn)
    

   

    for nesting in dnt_test:
      all_equal = True
      for entity in nesting:
        if entity not in sent_pred_labels:
          all_equal = False
      
      if all_equal:
        dnt_tp+=1
      else:
        dnt_fn+=1
    for nesting in dnt_pred:
      all_equal = True
      for entity in nesting:
        if entity not in sent_test_labels:
          all_equal = False
      
      if not all_equal:
        dnt_fp+=1

    support+=len(dnt_test)

  
  dnt_precision = dnt_tp/(dnt_tp+dnt_fp) if dnt_tp+dnt_fp!=0 else 0
  dnt_recall = dnt_tp/(dnt_tp+dnt_fn) if dnt_tp+dnt_fn!=0 else 0
  dnt_f1 = 2*(dnt_precision*dnt_recall)/(dnt_precision+dnt_recall) if dnt_precision+dnt_recall!=0 else 0
  return dnt_precision, dnt_recall, dnt_f1, support

# **Metrics**

**Layered**

In [None]:
# GENIA
print('GENIA Layered Nested Metrics')
layered_genia_pred, layered_genia_gold = read_layered_predictions('layered/layered.genia.result.txt')

_, _, f1, support = metric(layered_genia_pred, layered_genia_gold)
print(f'Layered GENIA f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(layered_genia_pred, layered_genia_gold)
print(f'Layered GENIA Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(layered_genia_pred, layered_genia_gold)
print(f'Layered GENIA SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(layered_genia_pred, layered_genia_gold)
print(f'Layered GENIA DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(layered_genia_pred, layered_genia_gold)
print(f'Layered GENIA Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(layered_genia_pred, layered_genia_gold)
print(f'Layered GENIA Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(layered_genia_pred, layered_genia_gold)
print(f'Layered GENIA Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(layered_genia_pred, layered_genia_gold)
print(f'Layered GENIA Outer f1-score: {np.round(outer_f1*100,1)}')
print()

# GERM
print('GERM Layered Nested Metrics')
layered_germ_pred, layered_germ_gold = read_layered_predictions('layered/layered.germ.result.txt')

_, _, f1, support = metric(layered_germ_pred, layered_germ_gold)
print(f'Layered GERM f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(layered_germ_pred, layered_germ_gold)
print(f'Layered GERM Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(layered_germ_pred, layered_germ_gold)
print(f'Layered GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(layered_germ_pred, layered_germ_gold)
print(f'Layered GERM DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(layered_germ_pred, layered_germ_gold)
print(f'Layered GERM Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(layered_germ_pred, layered_germ_gold)
print(f'Layered GERM Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(layered_germ_pred, layered_germ_gold)
print(f'Layered GERM Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(layered_germ_pred, layered_germ_gold)
print(f'Layered GERM Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# WL
print('Chilean Waiting List Layered Nested Metrics')
layered_wl_pred, layered_wl_gold = read_layered_predictions('layered/layered.wl.result.txt')
_, _ , f1, support = metric(layered_wl_pred, layered_wl_gold[:-1])
print(f'Layered WL f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(layered_wl_pred, layered_wl_gold)
print(f'Layered WL Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, multilabel_f1, support = multilabel_metric(layered_wl_pred, layered_wl_gold)
print(f'Layered WL Multilabel f1-score: {np.round(multilabel_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(layered_wl_pred, layered_wl_gold)
print(f'Layered WL DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(layered_wl_pred, layered_wl_gold)
print(f'Layered WL Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(layered_wl_pred, layered_wl_gold)
print(f'Layered WL Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(layered_wl_pred, layered_wl_gold)
print(f'Layered WL Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(layered_wl_pred, layered_wl_gold)
print(f'Layered WL Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')


**Exhaustive**

In [None]:
# GENIA
print('GENIA Exhaustive Nested Metrics')
exhaustive_genia_pred, exhaustive_genia_gold = read_exhaustive_predictions('exhaustive/exhaustive.genia.result.txt')

p, r, f1, support = metric(exhaustive_genia_pred, exhaustive_genia_gold)
print(f'Exhaustive GENIA f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(exhaustive_genia_pred, exhaustive_genia_gold)
print(f'Exhaustive GENIA Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(exhaustive_genia_pred, exhaustive_genia_gold)
print(f'Exhaustive GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(exhaustive_genia_pred, exhaustive_genia_gold)
print(f'Exhaustive GENIA DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(exhaustive_genia_pred, exhaustive_genia_gold)
print(f'Exhaustive GENIA Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(exhaustive_genia_pred, exhaustive_genia_gold)
print(f'Exhaustive GENIA Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(exhaustive_genia_pred, exhaustive_genia_gold)
print(f'Exhaustive GENIA Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(exhaustive_genia_pred, exhaustive_genia_gold)
print(f'Exhaustive GENIA Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# GERM
print('GERM Exhaustive Nested Metrics')
exhaustive_germ_pred, exhaustive_germ_gold = read_exhaustive_predictions('exhaustive/exhaustive.germ.result.txt')

p, r, f1, support = metric(exhaustive_germ_pred, exhaustive_germ_gold)
print(f'Exhaustive GERM f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(exhaustive_germ_pred, exhaustive_germ_gold)
print(f'Exhaustive GERM Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(exhaustive_germ_pred, exhaustive_germ_gold)
print(f'Exhaustive GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(exhaustive_germ_pred, exhaustive_germ_gold)
print(f'Exhaustive GERM DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(exhaustive_germ_pred, exhaustive_germ_gold)
print(f'Exhaustive GERM Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(exhaustive_germ_pred, exhaustive_germ_gold)
print(f'Exhaustive GERM Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(exhaustive_germ_pred, exhaustive_germ_gold)
print(f'Exhaustive GERM Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(exhaustive_germ_pred, exhaustive_germ_gold)
print(f'Exhaustive GERM Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# WL
print('Chilean Waiting List Exhaustive Nested Metrics')
exhaustive_wl_pred, _ = read_exhaustive_predictions('exhaustive/exhaustive.wl.result.txt')

_, _, f1, support = metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, multilabel_f1, support = multilabel_metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL Multilabel f1-score: {np.round(multilabel_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(exhaustive_wl_pred, layered_wl_gold)
print(f'Exhaustive WL Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')

**Boundary**

In [None]:
# GENIA
print('GENIA Boundary Nested Metrics')
boundary_genia_pred, boundary_genia_gold = read_boundary_predictions('boundary/boundary.genia.result.txt')

p, r, f1, support = metric(boundary_genia_pred, boundary_genia_gold)
print(f'Boundary GENIA f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(boundary_genia_pred, boundary_genia_gold)
print(f'Boundary GENIA Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(boundary_genia_pred, boundary_genia_gold)
print(f'Boundary GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(boundary_genia_pred, boundary_genia_gold)
print(f'Boundary GENIA DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(boundary_genia_pred, boundary_genia_gold)
print(f'Boundary GENIA Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(boundary_genia_pred, boundary_genia_gold)
print(f'Boundary GENIA Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(boundary_genia_pred, boundary_genia_gold)
print(f'Boundary GENIA Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(boundary_genia_pred, boundary_genia_gold)
print(f'Boundary GENIA Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# GERM
print('GERM boundary Nested Metrics')
boundary_germ_pred, boundary_germ_gold = read_boundary_predictions('boundary/boundary.germ.result.txt')

p, r, f1, support = metric(boundary_germ_pred, boundary_germ_gold)
print(f'Boundary GERM f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(boundary_germ_pred, boundary_germ_gold)
print(f'Boundary GERM Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(boundary_germ_pred, boundary_germ_gold)
print(f'Boundary GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(boundary_germ_pred, boundary_germ_gold)
print(f'Boundary GERM DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(boundary_germ_pred, boundary_germ_gold)
print(f'Boundary GERM Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(boundary_germ_pred, boundary_germ_gold)
print(f'Boundary GERM Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(boundary_germ_pred, boundary_germ_gold)
print(f'Boundary GERM Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(boundary_germ_pred, boundary_germ_gold)
print(f'Boundary GERM Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# WL
print('Chilean Waiting List Boundary Nested Metrics')
boundary_wl_pred, _ = read_boundary_predictions('boundary/boundary.wl.result.txt')

_, _, f1, support = metric(boundary_wl_pred, layered_wl_gold)
print(f'Boundary WL f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(boundary_wl_pred, layered_wl_gold)
print(f'Boundary WL Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, multilabel_f1, support = multilabel_metric(boundary_wl_pred, layered_wl_gold)
print(f'Boundary WL Multilabel f1-score: {np.round(multilabel_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(boundary_wl_pred, boundary_wl_gold)
print(f'Boundary WL SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(boundary_wl_pred, layered_wl_gold)
print(f'Boundary WL DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(boundary_wl_pred, layered_wl_gold)
print(f'Boundary WL Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(boundary_wl_pred, layered_wl_gold)
print(f'Boundary WL Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(boundary_wl_pred, layered_wl_gold)
print(f'Boundary WL Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(boundary_wl_pred, layered_wl_gold)
print(f'Boundary WL Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')

In [None]:
boundary_wl_pred = read_boundary_predictions('boundary/boundary.wl.result.txt')
print(boundary_wl_pred[-1])

**Tacl**

In [None]:
# GENIA
print('GENIA Tacl Nested Metrics')
tacl_genia_pred, tacl_genia_gold = read_tacl_predictions('tacl/tacl.genia.result.txt')

_, _, f1, support = metric(tacl_genia_pred, tacl_genia_gold)
print(f'Tacl GENIA f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(tacl_genia_pred, tacl_genia_gold)
print(f'Tacl GENIA Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(tacl_genia_pred, tacl_genia_gold)
print(f'Tacl GENIA SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(tacl_genia_pred, tacl_genia_gold)
print(f'Tacl GENIA DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(tacl_genia_pred, tacl_genia_gold)
print(f'Tacl GENIA Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(tacl_genia_pred, tacl_genia_gold)
print(f'Tacl GENIA Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(tacl_genia_pred, tacl_genia_gold)
print(f'Tacl GENIA Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(tacl_genia_pred, tacl_genia_gold)
print(f'Tacl GENIA Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# GERM
print('GERM Tacl Nested Metrics')
tacl_germ_pred, tacl_germ_gold = read_tacl_predictions('tacl/tacl.germ.result.txt')

_, _, f1, support = metric(tacl_germ_pred, tacl_germ_gold)
print(f'Tacl GERM f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(tacl_germ_pred, tacl_germ_gold)
print(f'Tacl GERM Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(tacl_germ_pred, tacl_germ_gold)
print(f'Tacl GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(tacl_germ_pred, tacl_germ_gold)
print(f'Tacl GERM DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(tacl_germ_pred, tacl_germ_gold)
print(f'Tacl GERM Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(tacl_germ_pred, tacl_germ_gold)
print(f'Tacl GERM Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(tacl_germ_pred, tacl_germ_gold)
print(f'Tacl GERM Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(tacl_germ_pred, tacl_germ_gold)
print(f'Tacl GERM Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# WL
print('WL Tacl Nested Metrics')
tacl_wl_pred, tacl_wl_gold = read_tacl_predictions('tacl/tacl.wl.result.txt')

_, _, f1, support = metric(tacl_wl_pred, tacl_wl_gold)
print(f'Tacl WL f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(tacl_wl_pred, tacl_wl_gold)
print(f'Tacl WL Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, multilabel_f1, support = multilabel_metric(tacl_wl_pred, tacl_wl_gold)
print(f'Tacl WL Multilabel f1-score: {np.round(multilabel_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(tacl_wl_pred, tacl_wl_gold)
print(f'Tacl WL DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(tacl_wl_pred, tacl_wl_gold)
print(f'Tacl WL Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(tacl_wl_pred, tacl_wl_gold)
print(f'Tacl WL Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(tacl_wl_pred, tacl_wl_gold)
print(f'Tacl WL Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(tacl_wl_pred, tacl_wl_gold)
print(f'Tacl WL Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')

**Pyramid**

In [None]:
# GENIA
print('GENIA Pyramid Nested Metrics')
pyramid_genia_pred, pyramid_genia_gold = read_pyramid_predictions('genia_predictions_pyramid_bert_78_0.txt')

p, r, f1, support = metric(pyramid_genia_pred, pyramid_genia_gold)
print(np.round(p*100,1))
print(np.round(r*100,1))
print(f'Pyramid GENIA f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(pyramid_genia_pred, pyramid_genia_gold)
print(f'Pyramid GENIA Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(pyramid_genia_pred, pyramid_genia_gold)
print(f'Pyramid GENIA SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(pyramid_genia_pred, pyramid_genia_gold)
print(f'Pyramid GENIA DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(pyramid_genia_pred, pyramid_genia_gold)
print(f'Pyramid GENIA Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(pyramid_genia_pred, pyramid_genia_gold)
print(f'Pyramid GENIA Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(pyramid_genia_pred, pyramid_genia_gold)
print(f'Pyramid GENIA Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(pyramid_genia_pred, pyramid_genia_gold)
print(f'Pyramid GENIA Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

In [None]:
# GermEval
print('GermEval Pyramid Nested Metrics')
pyramid_germ_pred, pyramid_germ_gold = read_pyramid_predictions('germ_predictions_pyramid_bert_86_7.txt')

p, r, f1, support = metric(pyramid_germ_pred, pyramid_germ_gold)
print(np.round(p*100,1))
print(np.round(r*100,1))
print(f'Pyramid GERM f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(pyramid_germ_pred, pyramid_germ_gold)
print(f'Pyramid GERM Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(pyramid_germ_pred, pyramid_germ_gold)
print(f'Pyramid GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(pyramid_germ_pred, pyramid_germ_gold)
print(f'Pyramid GERM DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(pyramid_germ_pred, pyramid_germ_gold)
print(f'Pyramid GERM Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(pyramid_germ_pred, pyramid_germ_gold)
print(f'Pyramid GERM Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(pyramid_germ_pred, pyramid_germ_gold)
print(f'Pyramid GERM Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(pyramid_germ_pred, pyramid_germ_gold)
print(f'Pyramid GERM Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

In [None]:
# WL
print('WL Pyramid Nested Metrics')
pyramid_wl_pred, pyramid_wl_gold = read_pyramid_predictions('wl_predictions_pyramid_flair_78.6.txt')

p, r, f1, support = metric(pyramid_wl_pred, pyramid_wl_gold)
print(np.round(p*100,1))
print(np.round(r*100,1))
print(f'Pyramid WL f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(pyramid_wl_pred, pyramid_wl_gold)
print(f'Pyramid WL Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(pyramid_wl_pred, pyramid_wl_gold)
print(f'Pyramid WL SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(pyramid_wl_pred, pyramid_wl_gold)
print(f'Pyramid WL DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(pyramid_wl_pred, pyramid_wl_gold)
print(f'Pyramid WL Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, multilabel_f1, support = multilabel_metric(pyramid_wl_pred, pyramid_wl_gold)
print(f'Pyramid WL Multilabel f1-score: {np.round(multilabel_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(pyramid_wl_pred, pyramid_wl_gold)
print(f'Pyramid WL Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(pyramid_wl_pred, pyramid_wl_gold)
print(f'Pyramid WL Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(pyramid_wl_pred, pyramid_wl_gold)
print(f'Pyramid WL Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

**Biaffine**

In [None]:
# GENIA
print('GENIA Biaffine Nested Metrics')
biaffine_genia_pred, biaffine_genia_gold = read_biaffine_predictions('genia_predictions_biaffine_bert_78_1.txt')

p, r, f1, support = metric(biaffine_genia_pred, biaffine_genia_gold)
print(np.round(p*100,1))
print(np.round(r*100,1))
print(f'Biaffine GENIA f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(biaffine_genia_pred, biaffine_genia_gold)
print(f'Biaffine GENIA Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(biaffine_genia_pred, biaffine_genia_gold)
print(f'Biaffine GENIA SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(biaffine_genia_pred, biaffine_genia_gold)
print(f'Biaffine GENIA DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(biaffine_genia_pred, biaffine_genia_gold)
print(f'Biaffine GENIA Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(biaffine_genia_pred, biaffine_genia_gold)
print(f'Biaffine GENIA Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(biaffine_genia_pred, biaffine_genia_gold)
print(f'Biaffine GENIA Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(biaffine_genia_pred, biaffine_genia_gold)
print(f'Biaffine GENIA Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

In [None]:
# Germeval
print('GERM Biaffine Nested Metrics')
biaffine_germ_pred, biaffine_germ_gold = read_biaffine_predictions('germ_predictions_biaffine_bert_86_6.txt')

p, r, f1, support = metric(biaffine_germ_pred, biaffine_germ_gold)
print(np.round(p*100,1))
print(np.round(r*100,1))
print(f'Biaffine GERM f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(biaffine_germ_pred, biaffine_germ_gold)
print(f'Biaffine GERM Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(biaffine_germ_pred, biaffine_germ_gold)
print(f'Biaffine GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(biaffine_germ_pred, biaffine_germ_gold)
print(f'Biaffine GERM DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(biaffine_germ_pred, biaffine_germ_gold)
print(f'Biaffine GERM Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(biaffine_germ_pred, biaffine_germ_gold)
print(f'Biaffine GERM Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(biaffine_germ_pred, biaffine_germ_gold)
print(f'Biaffine GERM Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(biaffine_germ_pred, biaffine_germ_gold)
print(f'Biaffine GERM Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

In [None]:
# WL
print('WL Biaffine Nested Metrics')
biaffine_wl_pred, biaffine_wl_gold = read_biaffine_predictions('wl_predictions_biaffine_bert.txt')

p, r, f1, support = metric(biaffine_wl_pred, biaffine_wl_gold)
print(np.round(p*100,1))
print(np.round(r*100,1))
print(f'Biaffine WL f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(biaffine_wl_pred, biaffine_wl_gold)
print(f'Biaffine WL Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(biaffine_wl_pred, biaffine_wl_gold)
print(f'Biaffine WL SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(biaffine_wl_pred, biaffine_wl_gold)
print(f'Biaffine WL DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, multilabel_f1, support = multilabel_metric(biaffine_wl_pred, biaffine_wl_gold)
print(f'Biaffine WL Multilabel f1-score: {np.round(multilabel_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(biaffine_wl_pred, biaffine_wl_gold)
print(f'Biaffine WL Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(biaffine_wl_pred, biaffine_wl_gold)
print(f'Biaffine WL Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(biaffine_wl_pred, biaffine_wl_gold)
print(f'Biaffine WL Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(biaffine_wl_pred, biaffine_wl_gold)
print(f'Biaffine WL Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

**MCL**

In [None]:
# GENIA
print('GENIA MLC Nested Metrics')
mlc_genia_pred, mlc_genia_gold = read_mlc_predictions('genia')

_, _, f1, support = metric(mlc_genia_pred, mlc_genia_gold)
print(f'MLC GENIA f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(mlc_genia_pred, mlc_genia_gold)
print(f'MLC GENIA Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(mlc_genia_pred, mlc_genia_gold)
print(f'MLC GENIA SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(mlc_genia_pred, mlc_genia_gold)
print(f'MLC GENIA DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(mlc_genia_pred, mlc_genia_gold)
print(f'MLC GENIA Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(mlc_genia_pred, mlc_genia_gold)
print(f'MLC GENIA Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(mlc_genia_pred, mlc_genia_gold)
print(f'MLC GENIA Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(mlc_genia_pred, mlc_genia_gold)
print(f'MLC GENIA Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# GERM
print('GERM MLC Nested Metrics')
mlc_germ_pred, mlc_germ_gold = read_mlc_predictions('germ')

_, _, f1, support = metric(mlc_germ_pred, mlc_germ_gold)
print(f'MLC GERM f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(mlc_germ_pred, mlc_germ_gold)
print(f'MLC GERM Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(mlc_germ_pred, mlc_germ_gold)
print(f'MLC GERM SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(mlc_germ_pred, mlc_germ_gold)
print(f'MLC GERM DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(mlc_germ_pred, mlc_germ_gold)
print(f'MLC GERM Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(mlc_germ_pred, mlc_germ_gold)
print(f'MLC GERM Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(mlc_germ_pred, mlc_germ_gold)
print(f'MLC GERM Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(mlc_germ_pred, mlc_germ_gold)
print(f'MLC GERM Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')
print()

# WL
print('WL MLC Nested Metrics')
mlc_wl_pred, mlc_wl_gold = read_mlc_predictions('wl')

_, _, f1, support = metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL f1-score: {np.round(f1*100,1)}, support: {support}')

_, _, flat_f1, support = flat_metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL Flat f1-score: {np.round(flat_f1*100,1)}, support: {support}')

_, _, multilabel_f1, support = multilabel_metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL Multilabel f1-score: {np.round(multilabel_f1*100,1)}, support: {support}')

_, _, snt_f1, support = same_nesting_type_metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL SNT f1-score: {np.round(snt_f1*100,1)}, support: {support}')

_, _, dnt_f1, support = different_nesting_type_metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL DNT f1-score: {np.round(dnt_f1*100,1)}, support: {support}')

_, _, nesting_f1, support = nesting_metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL Nesting f1-score: {np.round(nesting_f1*100,1)}, support: {support}')

_, _, nested_f1, support = nested_metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL Nested f1-score: {np.round(nested_f1*100,1)}, support: {support}')

_, _, inner_f1, support = inner_metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL Inner f1-score: {np.round(inner_f1*100,1)}, support: {support}')

_, _, outer_f1, support = outer_metric(mlc_wl_pred, mlc_wl_gold)
print(f'MLC WL Outer f1-score: {np.round(outer_f1*100,1)}, support: {support}')