### Setting the environment

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os 
os.chdir('/content/drive/MyDrive/NEREL/Cross_Domain_Nested_NER')

In [3]:
!pip install fastNLP==1.0.1 -q
!pip install transformers -q
!pip install sparse==0.13.0 -q
!pip install torch==1.11.0+cu113 --extra-index-url https://download.pytorch.org/whl/cu113 -q
!pip install llvmlite==0.38.1 --user -q
!pip install numba==0.55.2 --user -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/644.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m378.9/644.7 kB[0m [31m11.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.7/644.7 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m217.3/217.3 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.1/51.1 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for fastNLP (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m63.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━

In [4]:
import torch
print(torch.__version__)

1.11.0+cu113


In [5]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [6]:
!pip install torch-scatter --no-index -f https://data.pyg.org/whl/torch-{torch.__version__}.html 

Looking in links: https://data.pyg.org/whl/torch-1.11.0+cu113.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.11.0%2Bcu113/torch_scatter-2.0.9-cp310-cp310-linux_x86_64.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9


In [7]:
import json
import warnings
import argparse
import collections

if 'p' in os.environ:
    os.environ['CUDA_VISIBLE_DEVICES'] = os.environ['p']
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
os.environ['MKL_THREADING_LAYER'] = 'GNU'
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import fastNLP
from fastNLP import cache_results, prepare_torch_dataloader
from fastNLP import Evaluator
from fastNLP import SortedSampler, BucketedBatchSampler

from model.model import CNNNer
from model.metrics import NERMetric
from preprocessing.ner_pipe import SpanNerPipe
from preprocessing.padder import Torch3DMatrixPadder
from model.metrics_utils import decode
from fastNLP import print as pr
from tqdm.notebook import tqdm


seed_value=42
np.random.seed(seed_value)
torch.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
os.environ['FASTNLP_GLOBAL_SEED'] = str(seed_value)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
def calculate(path, count=False):
    max_sent_len = 0
    total_lengths = 0
    total_ent_length = 0
    max_ent_length = 0
    num_ents = 0
    overlapped_ent_num = 0
    num_sent = 0
    counts = {}
    with open(path, 'r') as f:
        for line in f:
            num_sent += 1
            data = json.loads(line.strip())
            max_sent_len = max(max_sent_len, len(data['tokens']))
            total_lengths += len(data['tokens'])
            flags = [0]*len(data['tokens'])
            for ent in data['entity_mentions']:
                num_ents += 1
                if ent['entity_type'] not in counts.keys():
                    counts[ent['entity_type']] = 1
                else:
                    counts[ent['entity_type']] += 1
                start, end = ent['start'], ent['end']
                total_ent_length += ent['end'] - ent['start']
                max_ent_length = max(max_ent_length, ent['end'] - ent['start'])
                for i in range(start, end):
                    flags[i] += 1
            for ent in data['entity_mentions']:
                start, end = ent['start'], ent['end']
                if any([flags[i]>1 for i in range(start, end)]):
                    overlapped_ent_num += 1

    pr(f"For {path}")
    pr("total sentence ", num_sent)
    pr("average sentence length ", total_lengths/num_sent)
    pr("max sentence length ", max_sent_len)

    pr('num_entities ', num_ents)
    pr('average entity length  ', total_ent_length/num_ents)
    pr('max entity length  ', max_ent_length)
    pr("Number of nested entity ", overlapped_ent_num)

    pr("Number of tokens ", total_lengths)
    pr()
    if count:
        return counts

def get_stats(directory): 
    for name in ['train', 'dev', 'test']:
        path = f'{directory}/{name}.jsonl'
        if name == 'test':
            counts = calculate(path, count=True)
        else:
            calculate(path)
    return counts

In [9]:
counts_nerel = get_stats('datasets/outputs/nerel_common_labels')

In [10]:
counts_nerel_bio = get_stats('datasets/outputs/nerel_bio_common_labels')

### Fetching and preprocessing the dataset

In [11]:
###### HYPERPARAMS ######
lr = 5e-6
batch_size = 4
n_epochs = 10
warmup = 0.1
model_name = 'DeepPavlov/rubert-base-cased'
# model_name = 'cointegrated/rubert-tiny2'
cnn_depth = 3
cnn_dim = 100
logit_drop = 0
biaffine_size = 200
n_head = 4
accumulation_steps = 1
########################
non_ptm_lr_ratio = 100
schedule = 'linear'
weight_decay = 1e-2
size_embed_dim = 25
ent_thres = 0.5
kernel_size = 3
########################

def get_data(dataset_name, model_name):
    paths = f'datasets/outputs/{dataset_name}'
    pipe = SpanNerPipe(model_name=model_name)
    dl = pipe.process_from_file(paths)
    return dl, pipe.matrix_segs

def densify(x):
    x = x.todense().astype(np.float32)
    return x
    
def preprocess_data(dataset_name, model_name):

    dl, matrix_segs = get_data(dataset_name, model_name)

    dl.apply_field(densify, field_name='matrix', new_field_name='matrix', progress_bar='Densify')

    print(dl)

    label2idx = getattr(dl, 'ner_vocab') if hasattr(dl, 'ner_vocab') else getattr(dl, 'label2idx')
    print(f"{len(label2idx)} labels: {label2idx}, matrix_segs:{matrix_segs}")

    dls = {}
    for name, ds in dl.iter_datasets():
        ds.set_pad('matrix', pad_fn=Torch3DMatrixPadder(pad_val=ds.collator.input_fields['matrix']['pad_val'],
                                                        num_class=matrix_segs['ent'],
                                                        batch_size=batch_size))

        if name in ['train', 'dev']:
            _dl = prepare_torch_dataloader(ds, batch_size=batch_size, num_workers=0,
                                           batch_sampler=BucketedBatchSampler(ds, 'input_ids',
                                                                              batch_size=batch_size,
                                                                              num_batch_per_bucket=30),
                                           pin_memory=True, shuffle=True)
        else:
            _dl = prepare_torch_dataloader(ds, batch_size=1, num_workers=0, pin_memory=True, shuffle=False)

        dls[name] = _dl
    
    return dls, label2idx, matrix_segs

In [12]:
dls_nerel, label2idx_nerel, matrix_segs_nerel = preprocess_data('nerel_common_labels', model_name)

Downloading (…)okenizer_config.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/1.65M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



In total 3 datasets:
	dev has 476 instances.
	test has 471 instances.
	train has 3984 instances.

16 labels: {'AGE': 0, 'CITY': 1, 'COUNTRY': 2, 'DATE': 3, 'DISEASE': 4, 'FACILITY': 5, 'LOCATION': 6, 'NUMBER': 7, 'ORDINAL': 8, 'ORGANIZATION': 9, 'PERCENT': 10, 'PERSON': 11, 'PRODUCT': 12, 'PROFESSION': 13, 'STATE_OR_PROVINCE': 14, 'TIME': 15}, matrix_segs:{'ent': 16}


In [13]:
dls_nerel_bio, label2idx_nerel_bio, matrix_segs_nerel_bio = preprocess_data('nerel_bio_common_labels', model_name)



In total 3 datasets:
	dev has 663 instances.
	test has 662 instances.
	train has 4767 instances.

16 labels: {'AGE': 0, 'CITY': 1, 'COUNTRY': 2, 'DATE': 3, 'DISEASE': 4, 'FACILITY': 5, 'LOCATION': 6, 'NUMBER': 7, 'ORDINAL': 8, 'ORGANIZATION': 9, 'PERCENT': 10, 'PERSON': 11, 'PRODUCT': 12, 'PROFESSION': 13, 'STATE_OR_PROVINCE': 14, 'TIME': 15}, matrix_segs:{'ent': 16}


### Loading and evaluating the models

In [14]:
model_nerel_1 = CNNNer(model_name, num_ner_tag=matrix_segs_nerel['ent'], cnn_dim=cnn_dim, biaffine_size=biaffine_size,
               size_embed_dim=size_embed_dim, logit_drop=logit_drop,
               kernel_size=kernel_size, n_head=n_head, cnn_depth=cnn_depth)

model_nerel_1 = torch.load('weights/model_best_common_labels_bertbase_cnn_8965.pkl')

counter = collections.Counter()
for name, param in model_nerel_1.named_parameters():
    counter[name.split('.')[0]] += torch.numel(param)
print(counter)
print("Total param ", sum(counter.values()))

Downloading pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of the model checkpoint at DeepPavlov/rubert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Counter({'pretrain_model': 177853440, 'cnn': 360600, 'multi_head_biaffine': 250000, 'head_mlp': 153800, 'tail_mlp': 153800, 'W': 42700, 'down_fc': 1616, 'size_embedding': 750})
Total param  178816706


In [15]:
# model_nerel_2 = CNNNer(model_name, num_ner_tag=matrix_segs_nerel_bio['ent'], cnn_dim=cnn_dim, biaffine_size=biaffine_size,
#                size_embed_dim=size_embed_dim, logit_drop=logit_drop,
#                kernel_size=kernel_size, n_head=n_head, cnn_depth=cnn_depth)

# model_nerel_2 = torch.load('weights/model_best_common_labels_berttiny_cnn_8036.pkl')

# counter = collections.Counter()
# for name, param in model_nerel_2.named_parameters():
#     counter[name.split('.')[0]] += torch.numel(param)
# print(counter)
# print("Total param ", sum(counter.values()))

In [16]:
allow_nested = True
metrics = {'ner': NERMetric(matrix_segs=matrix_segs_nerel, ent_thres=ent_thres, allow_nested=allow_nested)}

In [17]:
tester1 = fastNLP.Evaluator(model=model_nerel_1, driver='torch', dataloaders=dls_nerel['test'], metrics=metrics, device=0)
tester1.run()

Output()

{'f#ner': 89.23, 'rec#ner': 91.09, 'pre#ner': 87.45}

In [18]:
tester2 = fastNLP.Evaluator(model=model_nerel_1, driver='torch', dataloaders=dls_nerel_bio['test'], metrics=metrics, device=0)
tester2.run()

Output()

{'f#ner': 63.55, 'rec#ner': 53.59, 'pre#ner': 78.05}

### Getting predictions

In [19]:
from model.metrics_utils import decode

# given dataloader and model, return all predictions, sentence lengths, tokens and targets
def predict(dataloader, model):
    model.eval()
    outputs_list = []
    wordlen_list = []
    tokens_list = []
    targets_list = []
    with torch.no_grad():
        for i, item in enumerate(tqdm(dataloader)):
            inputs = item['input_ids'].to(device)
            indexes = item['indexes'].to(device)
            bpe_len = item['bpe_len'].to(device)
            word_len = item['word_len'].to(device)
            matrix = item['matrix'].to(device)
            tokens = item['tokens']
            target = item['ent_target']
            outputs = model(inputs, bpe_len, indexes, matrix)
            outputs_list.append(outputs['scores'])
            wordlen_list.append(word_len)
            tokens_list.append(tokens)
            targets_list.append(target)
    return outputs_list, wordlen_list, tokens_list, targets_list

# util function
def get_key_by_value(dictionary, value):
    return list(dictionary.keys())[list(dictionary.values()).index(value)]

# converting integer labels to string labels
def labels_to_str(pred_list, label2idx):
    pred_list_str = [None]*len(pred_list)

    for i in range(len(pred_list)):
        for s, e, ent_type in pred_list[i]:
            if pred_list_str[i] is not None:
                pred_list_str[i].append([s, e+1, get_key_by_value(label2idx, ent_type)])
            else:
                pred_list_str[i] = [[s, e+1, get_key_by_value(label2idx, ent_type)]]
        if pred_list_str[i] is None:
            pred_list_str[i] = []
    return pred_list_str
    
# decode score matrix into actual predictions
def decode_outputs(scores, word_lens):

    span_ids = [None]*len(scores)
    pred_list = [None]*len(scores)

    for i in range(len(scores)):
        # getting score matrix containing probabilities vector for each span cell
        scores[i] = scores[i].sigmoid()
        scores[i] = (scores[i] + scores[i].transpose(1, 2))/2
        # getting valid spans from score matrix based on max probability in prob vector
        span_ids[i] = decode(scores[i].max(dim=-1)[0], word_lens[i], allow_nested=True, thres=0.5)

        batch_preds = []
        for span_id, pred_score in zip(span_ids[i], scores[i].cpu().numpy()):
            temp_preds = set()
            for s, e, l in span_id:
                score = pred_score[s, e]
                ent_type = score.argmax()
                if score[ent_type]>=0.5:
                    temp_preds.add((s, e, ent_type))

            batch_preds.append(list(temp_preds))

        pred_list[i] = batch_preds[0]   
    
    return pred_list

### Calculating metrics per each category

In [20]:
def split_by_label(preds, targets, label2idx):
    ppl = {key:set() for key in range(len(label2idx))}
    tpl = {key:set() for key in range(len(label2idx))}

    for i in range(len(preds)):
        cur_target = set(map(tuple, targets[i]))
        cur_pred = set(map(tuple, preds[i]))
        for item in cur_pred:
            ppl[item[2]].add(item)
        for item in cur_target:
            tpl[item[2]].add(item)
    return ppl, tpl


def metrics_to_str(f1, pre, rec, label2idx):
    f1_str, pre_str, rec_str = {}, {}, {}

    for i in range(len(f1)):
#         if pre[i] != 0.0 else '-'
        f1_str[get_key_by_value(label2idx, i)] = f1[i]
        pre_str[get_key_by_value(label2idx, i)] = pre[i] 
        rec_str[get_key_by_value(label2idx, i)] = rec[i]
        
    return f1_str, pre_str, rec_str


def metrics_by_label(ppl, tpl, str_label=False, label2idx=None):
    tp = {key:0 for key in range(len(ppl))}
    pre = {key:0 for key in range(len(ppl))}
    rec = {key:0 for key in range(len(ppl))}
    f1 = {key:0 for key in range(len(ppl))}
    
    for i in range(len(ppl)):
        tp[i] += len(tpl[i].intersection(ppl[i]))
        pre[i] += len(ppl[i])
        rec[i] += len(tpl[i])
    
    for i in range(len(ppl)):
        pre[i] = tp[i]/(pre[i]+1e-6)
        rec[i] = tp[i]/(rec[i]+1e-6)
        f1[i] = 2*pre[i]*rec[i]/(pre[i]+rec[i]+1e-6)
        
        f1[i] = round(f1[i] * 100, 2)
        pre[i] = round(pre[i] * 100, 2)
        rec[i] = round(rec[i] * 100, 2)
    
    if str_label:
        f1_str, pre_str, rec_str = metrics_to_str(f1, pre, rec, label2idx)
        return f1_str, pre_str, rec_str
    else:
        return f1, pre, rec


def metrics_micro(pred_list, targets):
    tp = 0
    pre = 0
    rec = 0
    for pred, target in zip(pred_list, targets):
        cur_target = set(map(tuple, target))
        tp += len(cur_target.intersection(pred))
        pre += len(pred)
        rec += len(cur_target)
        
    pre = tp/(pre+1e-6)
    rec = tp/(rec+1e-6)
    f1 = 2*pre*rec/(pre+rec+1e-6)
    
    return f1, pre, rec

def squeeze_batch_dim(input_list):
    res = input_list
    for i in range(len(input_list)):
        res[i] = res[i][0]
    return res

In [21]:
# predictiong on NEREL using NEREL model
scores_nerel, word_lens_nerel, tokens_nerel, targets_nerel = predict(dls_nerel['test'], model_nerel_1)

tokens_nerel = squeeze_batch_dim(tokens_nerel)
targets_nerel = squeeze_batch_dim(targets_nerel)

pred_list_nerel = decode_outputs(scores_nerel, word_lens_nerel)

preds_per_label_nerel, targets_per_label_nerel = split_by_label(pred_list_nerel, targets_nerel, label2idx_nerel)
f1_score_nerel, precision_nerel, recall_nerel = metrics_by_label(preds_per_label_nerel, targets_per_label_nerel, str_label=True, label2idx=label2idx_nerel)

# predicting on NEREL-BIO using NEREL model
scores_nerel_bio, word_lens_nerel_bio, tokens_nerel_bio, targets_nerel_bio = predict(dls_nerel_bio['test'], model_nerel_1)

tokens_nerel_bio = squeeze_batch_dim(tokens_nerel_bio)
targets_nerel_bio = squeeze_batch_dim(targets_nerel_bio)

pred_list_nerel_bio = decode_outputs(scores_nerel_bio, word_lens_nerel_bio)

preds_per_label_nerel_bio, targets_per_label_nerel_bio = split_by_label(pred_list_nerel_bio, targets_nerel_bio, label2idx_nerel_bio)
f1_score_nerel_bio, precision_nerel_bio, recall_nerel_bio = metrics_by_label(preds_per_label_nerel_bio, targets_per_label_nerel_bio, str_label=True, label2idx=label2idx_nerel_bio)

  0%|          | 0/471 [00:00<?, ?it/s]

  0%|          | 0/662 [00:00<?, ?it/s]

In [22]:
common_labels = ['AGE', 'CITY', 'COUNTRY', 'DATE', 'DISO', 'DISEASE', 'FACILITY', 'LOCATION', 'NUMBER', 'ORDINAL', 'ORGANIZATION', 'PERCENT', 'PERSON', 'PRODUCT', 'PROFESSION', 'STATE_OR_PROVINCE', 'TIME']

In [23]:
metrics_nerel = {'f1': list(f1_score_nerel.values()), 'precision': list(precision_nerel.values()), 'recall': list(recall_nerel.values())}

df_metrics_nerel = pd.DataFrame(data=metrics_nerel, index=list(f1_score_nerel.keys()))

counts_nerel = dict(sorted(counts_nerel.items()))
df_metrics_nerel['counts'] = list(counts_nerel.values())

df_metrics_nerel = df_metrics_nerel[df_metrics_nerel.index.isin(common_labels)]
df_metrics_nerel

Unnamed: 0,f1,precision,recall,counts
AGE,96.92,96.92,96.92,111
CITY,96.34,94.05,98.75,184
COUNTRY,98.16,97.56,98.77,351
DATE,94.12,92.31,96.0,387
DISEASE,83.72,80.0,87.8,48
FACILITY,75.56,68.0,85.0,44
LOCATION,75.68,77.78,73.68,48
NUMBER,91.53,92.05,91.01,170
ORDINAL,84.71,87.8,81.82,74
ORGANIZATION,91.02,90.08,91.98,515


In [24]:
metrics_nerel_bio = {'f1': list(f1_score_nerel_bio.values()), 'precision': list(precision_nerel_bio.values()), 'recall': list(recall_nerel_bio.values())}


df_metrics_nerel_bio = pd.DataFrame(data=metrics_nerel_bio, index=list(f1_score_nerel_bio.keys()))

# counts_nerel_bio['GENE'] = 0
counts_nerel_bio = dict(sorted(counts_nerel_bio.items()))
df_metrics_nerel_bio['counts'] = list(counts_nerel_bio.values())

# df_metrics_nerel_bio = df_metrics_nerel_bio[df_metrics_nerel_bio.index.isin(common_labels)]
# df_metrics_nerel_bio = df_metrics_nerel_bio.rename(index={'DISO': 'DISEASE'})
df_metrics_nerel_bio

Unnamed: 0,f1,precision,recall,counts
AGE,83.67,85.42,82.0,63
CITY,66.67,66.67,66.67,7
COUNTRY,79.39,96.3,67.53,137
DATE,78.97,79.31,78.63,177
DISEASE,87.96,94.71,82.11,940
FACILITY,0.0,0.0,0.0,37
LOCATION,78.26,75.0,81.82,39
NUMBER,83.14,88.33,78.52,467
ORDINAL,87.5,83.33,92.11,84
ORGANIZATION,66.67,86.54,54.22,108


### Rendering and analyzing predictions using SpaCy

In [25]:
from spacy import displacy
from pathlib import Path
import random

def get_colors(labels): 
    colors = dict.fromkeys(labels)
    for key in colors.keys():
        rand = lambda: random.randint(100, 220)
        colors[key] = '#%02X%02X%02X' % (rand(), rand(), rand())
    return colors

def render_one_pred(inputs, preds, toFile=False, colors=None, filename=None):
    span_dict = {}
    span_dict["tokens"] = inputs
    span_dict["spans"] = []
    if preds:
        for i in range(len(preds)): 
            start = preds[i][0]
            end = preds[i][1]
            ent = preds[i][2]
            span_dict["spans"].append({"start_token": start, "end_token": end, "label": ent})
            
    if colors is None:
        colors = {'AGE': '#AC9D90', 'AWARD': '#CD6777', 'CITY': '#758B73', 'COUNTRY': '#AC6479', 'CRIME': '#72D5D6',
                  'DATE': '#CAAE9C', 'DISEASE': '#65B8C2', 'DISTRICT': '#BC64B1', 'EVENT': '#CA8F75', 'FACILITY': '#6DA283', 
                  'FAMILY': '#7688D8', 'IDEOLOGY': '#B9B072', 'LANGUAGE': '#919E96', 'LAW': '#7CA6CC', 'LOCATION': '#D5C1B1', 
                  'MONEY': '#926F6E', 'NATIONALITY': '#708DD8', 'NUMBER': '#AF75CB', 'ORDINAL': '#C76585', 'ORGANIZATION': '#7394BA',
                  'PENALTY': '#948A67', 'PERCENT': '#729384', 'PERSON': '#B2DABF', 'PRODUCT': '#CFA4C1', 'PROFESSION': '#968870', 
                  'RELIGION': '#90AEA4', 'STATE_OR_PROVINCE': '#A295A3', 'TIME': '#BCC89B', 'WORK_OF_ART': '#D9C187'}
    
    options = {"colors": colors, "compact": False}
    if toFile:
        svg = displacy.render(span_dict, style="span", manual=True, page=True, jupyter=False, options=options)
        if filename is not None:
            output_path = Path(f"{filename}.html")
        else:
            output_path = Path("no_filename.html")
        output_path.open("w", encoding="utf-8").write(svg)
    else:
        displacy.render(span_dict, style="span", manual=True, jupyter=True, options=options)
        
def render_all_preds(inputs, preds):
    for i in range(len(inputs)):
        render_one_pred(inputs[i], preds[i], toFile=False)
        print('-------------------------------------------------------------------------------------------------------------------------')
        
        
def filter_labels(annotations, keep):
    new_annotations = []
    for anns in annotations:
        temp = list(filter(lambda ann: ann[2] in keep, anns))
        new_annotations.append(temp)
    return new_annotations

In [26]:
pred_list_nerel_str = labels_to_str(pred_list_nerel, label2idx_nerel)
targets_nerel_str = labels_to_str(targets_nerel, label2idx_nerel)

# render_all_preds(tokens_nerel, pred_list_nerel_str)

In [27]:
pred_list_nerel_bio_str = labels_to_str(pred_list_nerel_bio, label2idx_nerel_bio)
targets_nerel_bio_str = labels_to_str(targets_nerel_bio, label2idx_nerel)

# render_all_preds(tokens_nerel_bio, pred_list_nerel_bio_str)

In [28]:
def render_one_comparison(tokens, preds, targets, toFile=False, filename='none'):
    if toFile == True:
        pred_filename = filename + '_pred'
        target_filename = filename + '_target'
    else:
        pred_filename = None
        target_filename = None
    print('--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------')
    render_one_pred(tokens, preds, toFile=toFile, filename=pred_filename)
    print('---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------')
    render_one_pred(tokens, targets, toFile=toFile, filename=target_filename)
    print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
    print()                         
    print()

In [29]:
def compare_preds_targets(all_tokens, all_preds, all_targets, error='FN'):
    j = 0
    for tokens, preds, targets in zip(all_tokens, all_preds, all_targets):
        preds = list(map(tuple, preds))
        targets = list(map(tuple, targets))
        
        if error == 'FN':
            if len(preds) < len(targets): # and ('DISEASE' in [entry[2] for entry in preds]) or ('DISEASE' in [entry[2] for entry in targets]):
                print(j)
                render_one_comparison(tokens, preds, targets, toFile=False)   
                
        elif error == 'FP':
            if len(preds) > len(targets): # and ('DISEASE' in [entry[2] for entry in preds]) or ('DISEASE' in [entry[2] for entry in targets]):
                print(j)
                render_one_comparison(tokens, preds, targets, toFile=False)         
        else:
            if sorted(preds) != sorted(targets):
                if len(preds) == len(targets): 
                    print(j)
                    render_one_comparison(tokens, preds, targets, toFile=False)
        j += 1

### NEREL-C

In [30]:
# FN
print(' '.join(tokens_nerel[468]))
print(pred_list_nerel_str[468])
print(targets_nerel_str[468])

render_one_comparison(tokens_nerel[468], pred_list_nerel_str[468], targets_nerel_str[468])

Словацкий тренер Жолт Хорняк стал новым главным тренером футбольного клуба " Бананц " ( Ереван ) . Контракт с 40 летним тренером был подписан по системе " 1 + 1 " , - сообщает пресс - служба столичного клуба .
[[6, 13, 'PROFESSION'], [1, 2, 'PROFESSION'], [19, 21, 'AGE'], [14, 15, 'CITY'], [11, 12, 'ORGANIZATION'], [2, 4, 'PERSON'], [6, 8, 'PROFESSION'], [21, 22, 'PROFESSION']]
[[2, 4, 'PERSON'], [6, 13, 'PROFESSION'], [11, 12, 'ORGANIZATION'], [14, 15, 'CITY'], [19, 21, 'AGE'], [1, 2, 'PROFESSION'], [6, 8, 'PROFESSION'], [21, 22, 'PROFESSION'], [7, 8, 'PROFESSION']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [31]:
# FP

print(' '.join(tokens_nerel[8]))
print(pred_list_nerel_str[8])
print(targets_nerel_str[8])
      
render_one_comparison(tokens_nerel[8], pred_list_nerel_str[8], targets_nerel_str[8])

Рокфеллер также основал несколько международных и благотворительных организаций : Americas Society , влиятельную Трёхстороннюю комиссию и некоммерческую организацию помощи малообеспеченным ньюйоркцам New York City Partnership .
[[21, 25, 'ORGANIZATION'], [22, 23, 'CITY'], [9, 11, 'ORGANIZATION'], [9, 10, 'COUNTRY'], [20, 21, 'CITY'], [13, 15, 'ORGANIZATION'], [21, 24, 'CITY'], [21, 23, 'CITY'], [0, 1, 'PERSON']]
[[0, 1, 'PERSON'], [9, 11, 'ORGANIZATION'], [13, 15, 'ORGANIZATION'], [20, 21, 'CITY'], [21, 25, 'ORGANIZATION'], [21, 24, 'CITY']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [32]:
# CLOS

print(' '.join(tokens_nerel[211]))
print(pred_list_nerel_str[211])
print(targets_nerel_str[211])
      
render_one_comparison(tokens_nerel[211], pred_list_nerel_str[211], targets_nerel_str[211])
render_one_comparison(tokens_nerel[318], pred_list_nerel_str[318], targets_nerel_str[318], toFile=False, filename=None)
render_one_comparison(tokens_nerel[370], pred_list_nerel_str[370], targets_nerel_str[370], toFile=False, filename=None)

Инновационный туалет — результат одного из исследовательских проектов , финансируемых благотворительным фондом Билла Гейтса и его супруги Мелинды . По словам Гейтса , работа над проектом заняла годы и сейчас он готов к выходу на рынок . За семь лет его фонд инвестировал около $ 200 млн в исследовательские проекты , разрабатывающие способы утилизации человеческих отходов : Должен признать , десять лет назад я и не думал , что буду так много знать о человеческих фекалиях . И уж тем более не думал , что Мелинде придётся просить меня перестать говорить о туалетах и отходах за обеденным столом .
[[37, 40, 'DATE'], [17, 18, 'PERSON'], [60, 63, 'DATE'], [10, 14, 'ORGANIZATION'], [85, 86, 'PERSON'], [21, 22, 'PERSON'], [27, 28, 'DATE'], [12, 14, 'PERSON']]
[[12, 14, 'PERSON'], [17, 18, 'PERSON'], [21, 22, 'PERSON'], [27, 28, 'DATE'], [60, 63, 'DATE'], [85, 86, 'PERSON'], [38, 39, 'NUMBER'], [10, 18, 'ORGANIZATION']]
--------------------------------------------------- PREDICTED ANNOTATION -----

---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [33]:
# WLCS

print(' '.join(tokens_nerel[11]))
print(pred_list_nerel_str[11])
print(targets_nerel_str[11])
      
render_one_comparison(tokens_nerel[11], pred_list_nerel_str[11], targets_nerel_str[11])

40-летний премьер - министр Чувашии Иван Моторин награждён медалью ордена " За заслуги перед Чувашской Республикой " . Как передаёт корреспондент ИА REGNUM , соответствующий указ подписал глава Чувашии Михаил Игнатьев . В указе отмечено , что награда присуждается " за многолетний плодотворный труд во благо Чувашской Республики " .
[[1, 5, 'PROFESSION'], [0, 1, 'AGE'], [21, 23, 'ORGANIZATION'], [5, 7, 'PERSON'], [4, 5, 'STATE_OR_PROVINCE'], [46, 48, 'STATE_OR_PROVINCE'], [1, 4, 'PROFESSION'], [28, 29, 'STATE_OR_PROVINCE'], [27, 29, 'PROFESSION'], [29, 31, 'PERSON'], [14, 16, 'COUNTRY']]
[[0, 1, 'AGE'], [1, 4, 'PROFESSION'], [5, 7, 'PERSON'], [21, 23, 'ORGANIZATION'], [27, 29, 'PROFESSION'], [29, 31, 'PERSON'], [46, 48, 'STATE_OR_PROVINCE'], [1, 5, 'PROFESSION'], [14, 16, 'STATE_OR_PROVINCE'], [20, 23, 'PROFESSION'], [28, 29, 'STATE_OR_PROVINCE'], [4, 5, 'STATE_OR_PROVINCE'], [20, 21, 'PROFESSION']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------

---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [34]:
# Triple WLCS 

print(' '.join(tokens_nerel[358]))
print(pred_list_nerel_str[358])
print(targets_nerel_str[358])
      
render_one_comparison(tokens_nerel[358], pred_list_nerel_str[358], targets_nerel_str[358])

Куинджи нашли в Заречье Архип Куинджи « Ай - Петри . Крым » ; 1898 по 1908 годы ; Бумага на холсте , масло ; 39 × 53 ; Государственный Русский музей В посёлке Заречье Одинцовского района задержан преступник , похитивший накануне картину Архипа Куинджи « Ай - Петри . Крым » из Третьяковской галереи в Москве . Картина также обнаружена , она не повреждена . Злоумышленник прятал её на территории строящегося объекта . По предварительным данным , преступление совершено с корыстными целями .
[[43, 45, 'PERSON'], [50, 51, 'LOCATION'], [25, 28, 'NUMBER'], [34, 35, 'CITY'], [53, 54, 'PERSON'], [53, 55, 'FACILITY'], [29, 32, 'ORGANIZATION'], [27, 28, 'NUMBER'], [56, 57, 'CITY'], [46, 49, 'FACILITY'], [11, 12, 'LOCATION'], [41, 42, 'DATE'], [7, 10, 'FACILITY'], [0, 1, 'PERSON'], [14, 18, 'DATE'], [4, 6, 'PERSON']]
[[0, 1, 'PERSON'], [14, 18, 'DATE'], [25, 26, 'NUMBER'], [27, 28, 'NUMBER'], [29, 32, 'ORGANIZATION'], [43, 45, 'PERSON'], [53, 55, 'ORGANIZATION'], [56, 57, 'CITY'], [3, 4, 'CITY'], [4,

---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [35]:
# ERROR IN ANNOTATION and FN

print(' '.join(tokens_nerel[423]))
print(pred_list_nerel_str[423])
print(targets_nerel_str[423])
      
render_one_comparison(tokens_nerel[423], pred_list_nerel_str[423], targets_nerel_str[423])

Работал в посольствах Перу во Франции , Великобритании , Боливии и Бразилии . В 1964 году назначен послом Перу в Швейцарии , в 1969—1971 — посол Перу в СССР и Польше , а в 1977—1979 — в Венесуэле . Кроме того , в 1971—1982 годах Хавьер Перес де Куэльяр возглавлял перуанскую делегацию в ООН .
[[20, 21, 'COUNTRY'], [9, 10, 'COUNTRY'], [50, 51, 'COUNTRY'], [28, 29, 'COUNTRY'], [17, 18, 'PROFESSION'], [11, 12, 'COUNTRY'], [37, 38, 'COUNTRY'], [42, 45, 'DATE'], [3, 4, 'COUNTRY'], [7, 8, 'COUNTRY'], [53, 54, 'ORGANIZATION'], [26, 27, 'COUNTRY'], [30, 31, 'COUNTRY'], [13, 16, 'DATE'], [33, 35, 'DATE'], [22, 24, 'DATE'], [45, 49, 'PERSON'], [5, 6, 'COUNTRY'], [17, 21, 'PROFESSION'], [18, 19, 'COUNTRY'], [25, 26, 'PROFESSION']]
[[5, 6, 'COUNTRY'], [7, 8, 'COUNTRY'], [9, 10, 'COUNTRY'], [11, 12, 'COUNTRY'], [13, 16, 'DATE'], [17, 18, 'PROFESSION'], [20, 21, 'COUNTRY'], [22, 24, 'DATE'], [25, 26, 'PROFESSION'], [28, 29, 'COUNTRY'], [30, 31, 'COUNTRY'], [33, 35, 'DATE'], [37, 38, 'COUNTRY'], [42, 

---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [36]:
# QUESTIONABLE (CLOS) or (FP)

print(' '.join(tokens_nerel[308]))
print(pred_list_nerel_str[308])
print(targets_nerel_str[308])
      
render_one_comparison(tokens_nerel[308], pred_list_nerel_str[308], targets_nerel_str[308])

Юань Лин ( фото ) — кандидат в доктора наук в области финансов престижного Пекинского университета , магистр статистики Университета Райса в Хьюстоне , штат Техас . Получив степень магистра в 2013 году , Юань вернулась в Китай , работала в компании Zhengren Investment в Тяньцзине , в 2015 году переехала в Пекин , чтобы продолжить учёбу .
[[17, 19, 'PROFESSION'], [6, 13, 'PROFESSION'], [22, 23, 'CITY'], [25, 26, 'STATE_OR_PROVINCE'], [47, 50, 'DATE'], [14, 15, 'CITY'], [52, 53, 'CITY'], [19, 21, 'ORGANIZATION'], [17, 18, 'PROFESSION'], [30, 33, 'DATE'], [37, 38, 'COUNTRY'], [34, 35, 'PERSON'], [42, 44, 'ORGANIZATION'], [17, 21, 'PROFESSION'], [14, 16, 'ORGANIZATION'], [8, 13, 'PROFESSION'], [0, 2, 'PERSON'], [45, 46, 'CITY']]
[[0, 2, 'PERSON'], [6, 13, 'PROFESSION'], [14, 15, 'CITY'], [17, 19, 'PROFESSION'], [22, 23, 'CITY'], [25, 26, 'STATE_OR_PROVINCE'], [30, 33, 'DATE'], [34, 35, 'PERSON'], [37, 38, 'COUNTRY'], [42, 44, 'ORGANIZATION'], [45, 46, 'CITY'], [47, 50, 'DATE'], [52, 53, 'C

---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [37]:
# WLOS

print(' '.join(tokens_nerel[420]))
print(pred_list_nerel_str[420])
print(targets_nerel_str[420])
      
render_one_comparison(tokens_nerel[420], pred_list_nerel_str[420], targets_nerel_str[420])
render_one_comparison(tokens_nerel[149], pred_list_nerel_str[149], targets_nerel_str[149])

Похороны пройдут в субботу , 7 марта 2020 года , на кладбище Presbítero Maestro в Лиме и будут сопровождаться соответствующими главе государства почестями .
[[2, 9, 'DATE'], [20, 22, 'PROFESSION'], [11, 14, 'FACILITY'], [15, 16, 'CITY'], [12, 14, 'FACILITY']]
[[2, 9, 'DATE'], [11, 14, 'FACILITY'], [15, 16, 'CITY'], [12, 13, 'PROFESSION'], [13, 14, 'PERSON'], [20, 22, 'PROFESSION']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [38]:
# compare_preds_targets(tokens_nerel, pred_list_nerel_str, targets_nerel_str, error=None)

### NEREL-BIO-C

In [39]:
# FN

print(' '.join(tokens_nerel_bio[41]))
print(pred_list_nerel_bio_str[41])
print(targets_nerel_bio_str[41])
      
render_one_comparison(tokens_nerel_bio[41], pred_list_nerel_bio_str[41], targets_nerel_bio_str[41])

Среди пациентов с исходно пониженной АТ ( 2-я группа , n=17 ) имелась подгруппа с отсутствием спонтанной агрегации ( n=9 ) и с дизрегуляцией системы гемостаза в виде повышения спонтанной АТ и фибриногена ( n=8 ) , у которых восстановление неврологических функций было ниже , чем в 1-й группе .
[[19, 20, 'NUMBER'], [10, 11, 'NUMBER'], [7, 8, 'ORDINAL'], [47, 48, 'ORDINAL'], [34, 35, 'NUMBER']]
[[7, 8, 'ORDINAL'], [47, 48, 'ORDINAL'], [1, 2, 'PERSON'], [23, 24, 'DISEASE'], [7, 9, 'PERSON'], [8, 9, 'PERSON'], [13, 14, 'PERSON'], [47, 49, 'PERSON'], [48, 49, 'PERSON']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [40]:
# FP

print(' '.join(tokens_nerel_bio[635]))
print(pred_list_nerel_bio_str[635])
print(targets_nerel_bio_str[635])
      
render_one_comparison(tokens_nerel_bio[635], pred_list_nerel_bio_str[635], targets_nerel_bio_str[635])

Помимо стандартного офтальмологического обследования , всем пациентам проводили количественную оценку увеосклерального оттока по разработанной и обоснованной нами методике .   Было исследовано влияние различных гипотензивных препаратов на увеосклеральный отток : селективного бета-1-адреноблокатора бетаксолола 0,5 % , ингибитора карбоангидразы бринзоламида 1 % , аналога простагландинов травопроста 0,004 % и фиксированной комбинации неселективного бета - блокатора тимолола 0,5 % с аналогом простагландинов травопростом 0,004 % ( ФКТТ ) в сравнении .
[[60, 61, 'PRODUCT'], [39, 41, 'PERCENT'], [32, 33, 'PRODUCT'], [55, 57, 'PERCENT'], [54, 55, 'PRODUCT'], [44, 45, 'PRODUCT'], [64, 65, 'PRODUCT'], [61, 63, 'PERCENT'], [33, 35, 'PERCENT'], [27, 29, 'DISEASE'], [45, 47, 'PERCENT'], [38, 39, 'PRODUCT'], [10, 12, 'DISEASE']]
[[6, 7, 'PERSON'], [33, 35, 'PERCENT'], [39, 41, 'PERCENT'], [45, 47, 'PERCENT'], [55, 57, 'PERCENT'], [61, 63, 'PERCENT']]
-------------------------------------------------

---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [41]:
# WLOS

print(' '.join(tokens_nerel_bio[622]))
print(pred_list_nerel_bio_str[622])
print(targets_nerel_bio_str[622])
      
render_one_comparison(tokens_nerel_bio[622], pred_list_nerel_bio_str[622], targets_nerel_bio_str[622])

В качестве вторичных антител применяли биотинилированные антитела к иммуноглобулинам мыши и кролика ( " Dako " LSAB + KIT , PEROXIDASE ) .
[[20, 21, 'PRODUCT'], [14, 19, 'PRODUCT']]
[[14, 15, 'ORGANIZATION']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [42]:
# WLCS

print(' '.join(tokens_nerel_bio[115]))
print(pred_list_nerel_bio_str[115])
print(targets_nerel_bio_str[115])
      
render_one_comparison(tokens_nerel_bio[115], pred_list_nerel_bio_str[115], targets_nerel_bio_str[115])
render_one_comparison(tokens_nerel_bio[179], pred_list_nerel_bio_str[179], targets_nerel_bio_str[179])

Основная доля прямых медицинских расходов на лечение T1DM приходилась на медицинские технологии .
[[7, 8, 'PRODUCT']]
[[7, 8, 'DISEASE']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [43]:
# CLOS

print(' '.join(tokens_nerel_bio[599]))
print(pred_list_nerel_bio_str[599])
print(targets_nerel_bio_str[599])
      
render_one_comparison(tokens_nerel_bio[599], pred_list_nerel_bio_str[599], targets_nerel_bio_str[599])

На протяжении 2011–2012 годов в Руанде была проведена вакцинация 227   246 девочек всеми тремя дозами вакцины против вируса папилломы человека ( ВПЧ ) .
[[22, 23, 'DISEASE'], [11, 12, 'NUMBER'], [18, 21, 'DISEASE'], [5, 6, 'COUNTRY'], [14, 15, 'NUMBER'], [9, 12, 'NUMBER'], [0, 4, 'DATE']]
[[9, 12, 'NUMBER'], [14, 15, 'NUMBER'], [12, 13, 'PERSON'], [2, 4, 'DATE'], [5, 6, 'COUNTRY'], [19, 21, 'DISEASE'], [19, 20, 'DISEASE']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [44]:
# QUESTIONABLE (CLOS or FP)

print(' '.join(tokens_nerel_bio[125]))
print(pred_list_nerel_bio_str[125])
print(targets_nerel_bio_str[125])
      
render_one_comparison(tokens_nerel_bio[125], pred_list_nerel_bio_str[125], targets_nerel_bio_str[125])

Представлены результаты эндоваскулярных вмешательств , выполненных у 220 пациентов с хронической ишемией нижних конечностей и окклюзионно - стенотическими поражениями аорто - подвздошного артериального сегмента .
[[7, 8, 'NUMBER'], [18, 19, 'DISEASE'], [15, 19, 'DISEASE'], [11, 14, 'DISEASE'], [11, 12, 'DISEASE'], [10, 14, 'DISEASE']]
[[7, 8, 'NUMBER'], [10, 14, 'DISEASE'], [8, 9, 'PERSON'], [15, 19, 'DISEASE'], [11, 12, 'DISEASE'], [10, 12, 'DISEASE']]
--------------------------------------------------- PREDICTED ANNOTATION ----------------------------------------------------


---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [45]:
# ERROR IN ANNOTATION

print(' '.join(tokens_nerel_bio[186]))
print(pred_list_nerel_bio_str[186])
print(targets_nerel_bio_str[186])
      
render_one_comparison(tokens_nerel_bio[186], pred_list_nerel_bio_str[186], targets_nerel_bio_str[186])

Изучено функциональное состояние периферического микроциркуляторного кровотока ( МЦК ) методом лазер - допплеровской флоуметрии и уровень парциального давления кислорода ( ТсрО2 ) у 82 больных хронической ишемией нижних конечностей ( ХИНК ) IIБ - IV стадий до чрескожной транслюминальной баллонной ангиопластики ( ЧТБА ) со стентированием артерий нижних конечностей и в отдаленные сроки после реваскуляризации ( через 12 месяцев ) .
[[32, 35, 'ORDINAL'], [26, 29, 'DISEASE'], [50, 53, 'DATE'], [23, 24, 'NUMBER'], [26, 27, 'DISEASE'], [37, 41, 'DISEASE'], [40, 41, 'DISEASE'], [30, 31, 'DISEASE'], [56, 59, 'DATE'], [34, 35, 'ORDINAL'], [25, 29, 'DISEASE']]
[[23, 24, 'NUMBER'], [24, 25, 'PERSON'], [25, 29, 'DISEASE'], [26, 27, 'DISEASE'], [25, 27, 'DISEASE'], [30, 31, 'DISEASE'], [32, 33, 'ORDINAL'], [34, 35, 'ORDINAL'], [25, 29, 'DISEASE'], [32, 36, 'DISEASE'], [56, 59, 'DATE']]
--------------------------------------------------- PREDICTED ANNOTATION ------------------------------------------

---------------------------------------------------- TARGET ANNOTATION ------------------------------------------------------


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




In [46]:
# compare_preds_targets(tokens_nerel_bio, pred_list_nerel_bio_str, targets_nerel_bio_str, error=None)