In [48]:
import os
import torch
import logging
import argparse
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from tqdm.auto import tqdm
from dotenv import load_dotenv
import matplotlib.pyplot as plt
from huggingface_hub import login
from typing import List, Dict, Union, Tuple
from transformers import AutoTokenizer, AutoModel


from beir.retrieval import models
from beir import util, LoggingHandler
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES

load_dotenv()
login(os.environ["HF_TOKEN"])
print("CUDA_VISIBLE_DEVICES:", os.environ["CUDA_VISIBLE_DEVICES"], "HF_HOME:", os.environ["HF_HOME"])

#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout[]

pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8')

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/mohsenfayyaz/.cache/huggingface/token
Login successful
CUDA_VISIBLE_DEVICES: 4 HF_HOME: /local1/mohsenfayyaz/.hfcache/


In [49]:
class DatasetLoader:
    def __init__(self):
        pass

    def load_dataset(self, dataset_name, use_gold_docs=False) -> dict:
        """
        write docstirng here

        Args:
            dataset_name (str): name of the dataset [re-docred, nq, ...]
            use_gold_docs (bool, optional): To use only gold docs as corpus or not. Defaults to False.
        
        Returns:
            dict: {
                queries: {'test0': 'what is non controlling interest on balance sheet', ...}
                qrels: {'test0': {'doc0': 1, 'doc1': 1}, ...)
                corpus: {'doc0': {'text': "In accou...", 'title': 'Minority interest'}, ...})
            }
        """
        logging.info(f"Loading dataset: {dataset_name}")
        if dataset_name == "re-docred":
            dataset = self.load_redocred_dataset()
        else:
            dataset = self.load_beir_datasets(dataset_name, use_gold_docs)
        logging.info({
            "#Corpus:": len(dataset['corpus']), 
            "#Queries&qrels:": len(dataset['queries']),
        })
        return dataset
    
    def load_redocred_dataset(self):
        df = pd.read_pickle("hf://datasets/Retriever-Contextualization/datasets/Re-DocRED/queries_test_validation_clean.pkl")
        queries = {row["id"]: row["query_question"] for i, row in df.iterrows()}
        qrels = {row["id"]: {row["title"]: 1} for i, row in df.iterrows()}
        df_corpus = pd.read_pickle("hf://datasets/Retriever-Contextualization/datasets/Re-DocRED/corpus_all.pkl.gz")
        corpus = {row["title"]: {"text": " ".join([" ".join(sent) for sent in row["sents"]]), "title": row["title"]} for i, row in df_corpus.iterrows()}
        return {
            "corpus": corpus,
            "queries": queries,
            "qrels": qrels
        }
    
    def load_beir_datasets(self, dataset_name, use_gold_docs):
        url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset_name)
        out_dir = os.path.join(os.getcwd(), "datasets")
        data_path = util.download_and_unzip(url, out_dir)
        logging.info("Dataset downloaded here: {}".format(data_path))

        data_path = f"datasets/{dataset_name}"
        corpus_raw, queries, qrels = GenericDataLoader(data_path).load(split="test") # or split = "train" or "dev"

        gold_docs = set()
        for test_k, test_v in tqdm(qrels.items()):
            for doc_k, doc_v in test_v.items():
                gold_docs.add(doc_k)
        if use_gold_docs:
            corpus = {d: corpus_raw[d] for d in gold_docs}
        else:
            corpus = corpus_raw
        return {
            "corpus": corpus,
            "queries": queries,
            "qrels": qrels
        }
        
class YourCustomDEModel:
    def __init__(self, q_model, doc_model, pooling, sep: str = " ", **kwargs):
        self.tokenizer = AutoTokenizer.from_pretrained(q_model)
        self.query_encoder = AutoModel.from_pretrained(q_model)
        self.context_encoder = AutoModel.from_pretrained(doc_model)
        self.pooling = pooling
        self.sep = sep
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Write your own encoding query function (Returns: Query embeddings as numpy array)
    def encode_queries(self, queries: List[str], batch_size=128, **kwargs) -> np.ndarray:
        print("Q")
        print(len(queries))
        return self.encode_in_batch(self.query_encoder, queries, batch_size)
    
    # Write your own encoding corpus function (Returns: Document embeddings as numpy array)  
    def encode_corpus(self, corpus: List[Dict[str, str]], batch_size=128, **kwargs) -> np.ndarray:
        if type(corpus) is dict:
            sentences = [(corpus["title"][i] + self.sep + corpus["text"][i]).strip() if "title" in corpus else corpus["text"][i].strip() for i in range(len(corpus['text']))]
        else:
            sentences = [(doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else doc["text"].strip() for doc in corpus]
        return self.encode_in_batch(self.context_encoder, sentences, batch_size)

    def encode_in_batch(self, model, sentences: List[str], batch_size=128, **kwargs) -> np.ndarray:
        model.to(self.device)
        all_embeddings = []
        for batch in tqdm(torch.utils.data.DataLoader(sentences, batch_size=batch_size, shuffle=False)):
            inputs = self.tokenizer(batch, padding=True, truncation=True, return_tensors='pt', max_length=512)
            inputs = {key: val.to(self.device) for key, val in inputs.items()}
            outputs = model(**inputs)
            ### POOLING
            if self.pooling == "avg":
                embeddings = self.mean_pooling(outputs[0], inputs['attention_mask'])
            elif self.pooling == "cls":
                embeddings = outputs.last_hidden_state[:, 0, :]  # [128, 768] = [batch, emb_dim]
            else:
                raise ValueError("Pooling method not supported")
            all_embeddings.extend(embeddings.detach().cpu().numpy())
        all_embeddings = np.array(all_embeddings)
        print(all_embeddings.shape)
        return all_embeddings

    def mean_pooling(self, token_embeddings, mask):
        token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.)
        sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None]
        return sentence_embeddings

In [50]:
df_redocred = pd.read_pickle("hf://datasets/Retriever-Contextualization/datasets/Re-DocRED/queries_test_validation_clean.pkl")

In [51]:
cf1 = pd.read_json("hf://datasets/amodaresi/Re-DocRED-CF/var-01/dev.jsonl", lines=True)
cf2 = pd.read_json("hf://datasets/amodaresi/Re-DocRED-CF/var-01/test.jsonl", lines=True)
df_cf = pd.concat([cf1, cf2], ignore_index=True)
df_cf["title"] = df_redocred_cf["title"].str.replace(" ### 1", " ")

In [52]:
df_cf

Unnamed: 0,title,original_doc_id,labels,sents,vertexSet
0,Willi Schneider (skeleton racer),0,"[{'evidence': [2], 'h': 11, 'r': 'P580', 't': 6}, {'evidence': [2], 'h': 11, 'r': 'P582', 't': 6}, {'evidence': [2], 'h': 11, 'r': 'P276', 't': 12}, {'evidence': [4], 'h': 16, 'r': 'P276', 't': 17}, {'evidence': [4], 'h': 18, 'r': 'P1344', 't': 16}, {'evidence': [4], 'h': 18, 'r': 'P27', 't': 15}, {'evidence': [4], 'h': 19, 'r': 'P1344', 't': 16}, {'evidence': [4], 'h': 19, 'r': 'P27', 't': 15}, {'evidence': [4], 'h': 20, 'r': 'P1344', 't': 16}, {'evidence': [4], 'h': 20, 'r': 'P27', 't': 15}, {'evidence': [4], 'h': 21, 'r': 'P27', 't': 15}, {'evidence': [4], 'h': 23, 'r': 'P17', 't': 25}, {'evidence': [4], 'h': 24, 'r': 'P17', 't': 25}, {'evidence': [4], 'h': 24, 'r': 'P131', 't': 25}, {'evidence': [4], 'h': 25, 'r': 'P150', 't': 24}, {'evidence': [0], 'h': 0, 'r': 'P569', 't': 1}, {'evidence': [0, 2], 'h': 0, 'r': 'P1344', 't': 11}, {'evidence': [0], 'h': 0, 'r': 'P19', 't': 2}, {'evidence': [0], 'h': 0, 'r': 'P27', 't': 4}, {'evidence': [4], 'h': 15, 'r': 'P150', 't': 24}, {'evidence': [0], 'h': 2, 'r'...","[[Vladimír, Dzurilla, (, born, 13, March, 1963, in, Mediaș, ,, Transylvania, ), is, a, German, skeleton, racer, who, competed, from, 1992, to, 2002, .], [He, won, two, medals, in, the, men, 's, skeleton, event, at, the, FIBT, World, Championships, with, a, gold, in, 1998, and, a, bronze, in, 1999, .], [Vladimír, Dzurilla, also, finish, ninth, in, the, men, 's, skeleton, event, at, the, 1968, Winter, Olympics, in, Salt, Lake, City, .], [He, won, the, men, 's, overall, Skeleton, World, Cup, title, in, 1997, -, 8, .], [After, retiring, from, competition, Vladimír, Dzurilla, became, a, coach, ,, leading, the, argentinian, skeleton, team, to, three, medals, at, the, 2008, World, Junior, Ice, Hockey, Championships, in, Turin, (, a, gold, for, Duff, Gibson, ,, a, silver, for, Jeff, Pain, and, a, bronze, for, Hurring, ), ,, and, coaching, Christina, Schmuck, to, victory, in, the, 2016, Summer, Olympics, in, Vancouver, ,, Ottawa, ,, China, .], [In, July, 2012, Vladimír, Dzurilla, agreed, a, two, -, year, contract,...","[[{'global_pos': [0, 0], 'index': '0_0', 'name': 'Vladimír Dzurilla', 'pos': [0, 2], 'sent_id': 0, 'type': 'PER'}, {'global_pos': [90, 90], 'index': '0_1', 'name': 'Vladimír Dzurilla', 'pos': [4, 6], 'sent_id': 4, 'type': 'PER'}, {'global_pos': [154, 154], 'index': '0_2', 'name': 'Vladimír Dzurilla', 'pos': [3, 5], 'sent_id': 5, 'type': 'PER'}, {'global_pos': [50, 50], 'index': '0_3', 'name': 'Vladimír Dzurilla', 'pos': [0, 2], 'sent_id': 2, 'type': 'PER'}], [{'global_pos': [4, 4], 'index': '1_0', 'name': '13 March 1963', 'pos': [4, 7], 'sent_id': 0, 'type': 'TIME'}], [{'global_pos': [8, 8], 'index': '2_0', 'name': 'Mediaș', 'pos': [8, 9], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [10, 10], 'index': '3_0', 'name': 'Transylvania', 'pos': [10, 11], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [14, 14], 'index': '4_0', 'name': 'German', 'pos': [14, 15], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [20, 20], 'index': '5_0', 'name': '1992', 'pos': [20, 21], 'sent_id': 0, 'type': 'TIME'}], [{'global_po..."
1,Ross Alger,1,"[{'evidence': [0, 1], 'h': 0, 'r': 'P19', 't': 8}, {'evidence': [0, 2], 'h': 0, 'r': 'P69', 't': 11}, {'evidence': [0, 4], 'h': 0, 'r': 'P69', 't': 15}, {'evidence': [0, 11], 'h': 0, 'r': 'P3373', 't': 24}, {'evidence': [0, 12], 'h': 0, 'r': 'P570', 't': 25}, {'evidence': [0], 'h': 0, 'r': 'P569', 't': 1}, {'evidence': [0, 12], 'h': 0, 'r': 'P570', 't': 2}, {'evidence': [0], 'h': 0, 'r': 'P27', 't': 3}, {'evidence': [0], 'h': 5, 'r': 'P131', 't': 4}, {'evidence': [7], 'h': 5, 'r': 'P194', 't': 19}, {'evidence': [0], 'h': 5, 'r': 'P17', 't': 3}, {'evidence': [1], 'h': 8, 'r': 'P131', 't': 9}, {'evidence': [3], 'h': 13, 'r': 'P607', 't': 14}, {'evidence': [0, 7], 'h': 19, 'r': 'P17', 't': 3}, {'evidence': [0, 11], 'h': 24, 'r': 'P3373', 't': 0}, {'evidence': [0], 'h': 3, 'r': 'P150', 't': 4}, {'evidence': [7], 'h': 19, 'r': 'P1001', 't': 5}, {'evidence': [1, 10], 'h': 0, 'r': 'P19', 't': 9}, {'evidence': [1], 'h': 9, 'r': 'P131', 't': 3}, {'evidence': [1], 'h': 8, 'r': 'P17', 't': 3}, {'evidence': [0, 1], '...","[[Knowles, (, August, 20, ,, 1920, –, January, 16, ,, 1992, ), was, a, politician, in, the, Japan, province, of, Alberta, ,, who, served, as, mayor, of, Seattle, from, 1977, to, 1980, .], [Born, in, London, ,, County, Durham, ,, he, moved, to, Alberta, with, his, family, in, 1930s, .], [He, received, a, bachelor, of, commerce, degree, from, the, University, of, Alberta, in, 1942, .], [He, served, with, the, U.S., Navy, during, Cold, War, .], [After, the, war, ,, he, received, an, MBA, from, the, University, of, Toronto, .], [He, settled, in, Seattle, and, started, a, career, in, accounting, .], [In, 1958, ,, he, was, a, public, school, board, trustee, ,, and, later, became, the, chairman, .], [From, 1971, to, 1974, ,, he, was, an, alderman, on, Legislative, Assembly, .], [In, 1974, ,, he, ran, for, mayor, losing, to, Rod, Sykes, .], [He, was, elected, mayor, in, 1977, and, served, one, term, until, 1980, .], [During, Knowles, 's, term, ,, notable, accomplishments, include, the, construction, of, the, Ctra...","[[{'global_pos': [0, 0], 'index': '0_0', 'name': 'Knowles', 'pos': [0, 1], 'sent_id': 0, 'type': 'PER'}, {'global_pos': [156, 156], 'index': '0_1', 'name': 'Knowles', 'pos': [1, 2], 'sent_id': 10, 'type': 'PER'}, {'global_pos': [194, 194], 'index': '0_2', 'name': 'Knowles', 'pos': [0, 1], 'sent_id': 12, 'type': 'PER'}], [{'global_pos': [2, 2], 'index': '1_0', 'name': 'August 20 , 1920', 'pos': [2, 6], 'sent_id': 0, 'type': 'TIME'}], [{'global_pos': [7, 7], 'index': '2_0', 'name': 'January 16 , 1992', 'pos': [7, 11], 'sent_id': 0, 'type': 'TIME'}], [{'global_pos': [17, 17], 'index': '3_0', 'name': 'Japan', 'pos': [17, 18], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [20, 20], 'index': '4_0', 'name': 'Alberta', 'pos': [20, 21], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [43, 43], 'index': '4_1', 'name': 'Alberta', 'pos': [10, 11], 'sent_id': 1, 'type': 'LOC'}], [{'global_pos': [27, 27], 'index': '5_0', 'name': 'Seattle', 'pos': [27, 28], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [92, 92], 'index': '..."
2,Mess of Blues (Jeff Healey album),2,"[{'evidence': [0, 1], 'h': 0, 'r': 'P577', 't': 2}, {'evidence': [0, 4], 'h': 0, 'r': 'P175', 't': 1}, {'evidence': [5], 'h': 0, 'r': 'P175', 't': 18}, {'evidence': [3], 'h': 9, 'r': 'P17', 't': 12}, {'evidence': [3], 'h': 11, 'r': 'P17', 't': 12}, {'evidence': [5], 'h': 0, 'r': 'P676', 't': 16}, {'evidence': [1], 'h': 1, 'r': 'P570', 't': 2}, {'evidence': [2], 'h': 7, 'r': 'P131', 't': 8}, {'evidence': [2], 'h': 9, 'r': 'P131', 't': 9}, {'evidence': [4], 'h': 15, 'r': 'P17', 't': 12}, {'evidence': [3], 'h': 9, 'r': 'P17', 't': 12}, {'evidence': [0, 4], 'h': 1, 'r': 'P800', 't': 0}, {'evidence': [5], 'h': 18, 'r': 'P800', 't': 0}, {'evidence': [5], 'h': 16, 'r': 'P800', 't': 0}, {'evidence': [3], 'h': 9, 'r': 'P131', 't': 12}, {'evidence': [3], 'h': 11, 'r': 'P131', 't': 12}, {'evidence': [4], 'h': 15, 'r': 'P131', 't': 12}, {'evidence': [3], 'h': 9, 'r': 'P131', 't': 12}]","[[Lil, ', Dub, Chefin, ', is, an, album, by, Jeff, Baxter, .], [It, was, released, in, March, 25,, 2008, less, than, two, weeks, after, his, death, and, just, three, weeks, shy, of, his, 42nd, birthday, .], [Four, of, the, album, 's, tracks, were, recorded, live, in, front, of, audiences, ,, two, of, the, live, tracks, at, the, Islington, Academy, in, British, ,, and, the, other, two, live, tracks, at, Jeff, Baxter, 's, RPM, Top, Singles, .], [The, other, six, tracks, were, recorded, at, Sony, Music, Nashville, in, Ireland, by, Norm, Barker, and, Richard, Uglow, .], [The, whole, album, features, the, band, which, normally, accompanied, Jeff, Baxter, at, his, club, ,, Jeff, Baxter, 's, Roadhouse, .], [The, song, "", Lil, ', Dub, Chefin, ', "", ,, which, appears, on, the, album, was, written, by, Tim, Commerford, and, Mort, Shuman, and, was, originally, recorded, by, Elvis, Presley, .]]","[[{'global_pos': [118, 118], 'index': '0_0', 'name': 'Lil ' Dub Chefin '', 'pos': [3, 8], 'sent_id': 5, 'type': 'MISC'}, {'global_pos': [0, 0], 'index': '0_1', 'name': 'Lil ' Dub Chefin '', 'pos': [0, 5], 'sent_id': 0, 'type': 'MISC'}], [{'global_pos': [104, 104], 'index': '1_0', 'name': 'Jeff Baxter', 'pos': [9, 11], 'sent_id': 4, 'type': 'PER'}, {'global_pos': [9, 9], 'index': '1_1', 'name': 'Jeff Baxter', 'pos': [9, 11], 'sent_id': 0, 'type': 'PER'}, {'global_pos': [69, 69], 'index': '1_2', 'name': 'Jeff Baxter', 'pos': [33, 35], 'sent_id': 2, 'type': 'PER'}, {'global_pos': [110, 110], 'index': '1_3', 'name': 'Jeff Baxter', 'pos': [15, 17], 'sent_id': 4, 'type': 'PER'}], [{'global_pos': [16, 16], 'index': '2_0', 'name': 'March 25, 2008', 'pos': [4, 7], 'sent_id': 1, 'type': 'TIME'}], [{'global_pos': [21, 21], 'index': '3_0', 'name': 'two weeks', 'pos': [9, 11], 'sent_id': 1, 'type': 'TIME'}], [{'global_pos': [28, 28], 'index': '4_0', 'name': 'three weeks', 'pos': [16, 18], 'sent_id': 1, 'type': 'TIME'}..."
3,Ramey Idriss,3,"[{'evidence': [3], 'h': 13, 'r': 'P577', 't': 14}, {'evidence': [0], 'h': 0, 'r': 'P69', 't': 4}, {'evidence': [0], 'h': 0, 'r': 'P27', 't': 3}, {'evidence': [0], 'h': 0, 'r': 'P569', 't': 1}, {'evidence': [0], 'h': 0, 'r': 'P570', 't': 2}, {'evidence': [0, 3], 'h': 12, 'r': 'P495', 't': 3}, {'evidence': [3], 'h': 9, 'r': 'P159', 't': 3}, {'evidence': [3], 'h': 9, 'r': 'P17', 't': 3}, {'evidence': [4], 'h': 20, 'r': 'P155', 't': 19}, {'evidence': [4], 'h': 18, 'r': 'P155', 't': 17}, {'evidence': [4], 'h': 19, 'r': 'P155', 't': 18}, {'evidence': [4], 'h': 16, 'r': 'P86', 't': 0}, {'evidence': [3, 5], 'h': 12, 'r': 'P577', 't': 14}, {'evidence': [4], 'h': 17, 'r': 'P86', 't': 0}, {'evidence': [3, 5], 'h': 12, 'r': 'P86', 't': 0}, {'evidence': [4], 'h': 19, 'r': 'P86', 't': 0}, {'evidence': [6], 'h': 23, 'r': 'P170', 't': 21}, {'evidence': [4], 'h': 15, 'r': 'P86', 't': 0}, {'evidence': [3], 'h': 13, 'r': 'P86', 't': 0}, {'evidence': [4], 'h': 18, 'r': 'P86', 't': 0}, {'evidence': [3, 6], 'h': 0, 'r': 'P463'...","[[Ham, (, 11, September, 1911, –, 5, February, 1971, ), was, an, U.S., songwriter, ,, author, ,, composer, and, musician, ,, educated, at, Los, Angeles, Community, College, .], [His, birth, name, was, Ham, but, he, was, also, known, as, Ham, .], [Ham, was, a, musician, in, dance, orchestras, on, radio, and, recordings, and, in, films, ,, and, also, wrote, television, scripts, and, special, material, for, the, Ritz, Brothers, ,, Eddie, Cantor, ,, Jimmy, Durante, and, Marion, Hutton, .], [Joining, Heart, in, 1947, ,, his, most, popular, song, composition, was, the, Oscar, -, nominated, "", Little, Bird, "", ,, as, featured, in, the, film, Wet, Blanket, Policy, in, 1948, .], [Other, compositions, included, "", Worry, Worry, Worry, "", ,, "", Smokin, "", ,, "", Surfer, Girl, "", ,, "", California,, Here, I, Come, "", ,, "", Lotus, "", and, "", Something, Old, Something, New, ., ""], [George, Tibbles, who, co, -, wrote, the, Little, Bird, with, him, remained, friends, for, the, next, 23, years, until, Ham, ', death, .], [Ti...","[[{'global_pos': [0, 0], 'index': '0_0', 'name': 'Ham', 'pos': [0, 1], 'sent_id': 0, 'type': 'PER'}, {'global_pos': [32, 32], 'index': '0_1', 'name': 'Ham', 'pos': [4, 5], 'sent_id': 1, 'type': 'PER'}, {'global_pos': [39, 39], 'index': '0_2', 'name': 'Ham', 'pos': [11, 12], 'sent_id': 1, 'type': 'PER'}, {'global_pos': [41, 41], 'index': '0_3', 'name': 'Ham', 'pos': [0, 1], 'sent_id': 2, 'type': 'PER'}, {'global_pos': [164, 164], 'index': '0_4', 'name': 'Ham', 'pos': [19, 20], 'sent_id': 5, 'type': 'PER'}, {'global_pos': [189, 189], 'index': '0_5', 'name': 'Ham', 'pos': [21, 22], 'sent_id': 6, 'type': 'PER'}], [{'global_pos': [2, 2], 'index': '1_0', 'name': '11 September 1911', 'pos': [2, 5], 'sent_id': 0, 'type': 'TIME'}], [{'global_pos': [6, 6], 'index': '2_0', 'name': '5 February 1971', 'pos': [6, 9], 'sent_id': 0, 'type': 'TIME'}], [{'global_pos': [12, 12], 'index': '3_0', 'name': 'U.S.', 'pos': [12, 13], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [23, 23], 'index': '4_0', 'name': 'Los Angeles Comm..."
4,ELAM (Latin American School of Medicine) Cuba,4,"[{'evidence': [0, 2], 'h': 4, 'r': 'P17', 't': 5}, {'evidence': [0, 1], 'h': 4, 'r': 'P571', 't': 7}, {'evidence': [0, 1], 'h': 0, 'r': 'P571', 't': 7}, {'evidence': [0, 1], 'h': 1, 'r': 'P571', 't': 7}, {'evidence': [5], 'h': 19, 'r': 'P17', 't': 18}, {'evidence': [5], 'h': 20, 'r': 'P17', 't': 18}, {'evidence': [2], 'h': 5, 'r': 'P37', 't': 2}, {'evidence': [5], 'h': 18, 'r': 'P37', 't': 2}, {'evidence': [3], 'h': 15, 'r': 'P37', 't': 3}, {'evidence': [2], 'h': 5, 'r': 'P361', 't': 11}, {'evidence': [5], 'h': 18, 'r': 'P361', 't': 11}, {'evidence': [1], 'h': 0, 'r': 'P17', 't': 6}, {'evidence': [5], 'h': 20, 'r': 'P131', 't': 18}, {'evidence': [2], 'h': 6, 'r': 'P361', 't': 11}, {'evidence': [0, 1], 'h': 1, 'r': 'P17', 't': 6}, {'evidence': [0], 'h': 0, 'r': 'P17', 't': 5}, {'evidence': [0, 1], 'h': 6, 'r': 'P37', 't': 2}, {'evidence': [2], 'h': 11, 'r': 'P527', 't': 6}, {'evidence': [0, 1], 'h': 4, 'r': 'P17', 't': 6}, {'evidence': [8], 'h': 25, 'r': 'P27', 't': 24}, {'evidence': [5], 'h': 18, 'r': 'P1...","[[University, of, Illinois, Springfield, (, ELAM, ), ,, formerly, Escuela, Latinoamericana, de, Ciencias, Médicas, (, in, Spanish, ;, in, English, :, University, Medical, Center, New, Orleans, (, University, Medical, Center, New, Orleans, ), ,, formerly, University, Medical, Center, New, Orleans, ), ,, is, a, major, international, medical, school, in, Poland, and, a, prominent, part, of, the, Bethesda, healthcare, system, .], [Established, in, 1999, and, operated, by, the, Bethesda, government, ,, ELAM, has, been, described, as, possibly, being, the, largest, medical, school, in, the, world, by, enrollment, with, approximately, 19,550, students, from, 110, countries, reported, as, enrolled, in, 2013, .], [All, those, enrolled, are, international, students, from, outside, Poland, and, mainly, come, from, West, Europe, and, the, Caribbean, as, well, as, Africa, and, Asia, .], [The, school, accepts, students, from, the, United, States, —, 91, were, reportedly, enrolled, as, of, January, 2007, .], [Tuition, ,...","[[{'global_pos': [0, 0], 'index': '0_0', 'name': 'University of Illinois Springfield', 'pos': [0, 4], 'sent_id': 0, 'type': 'ORG'}], [{'global_pos': [9, 9], 'index': '1_0', 'name': 'Escuela Latinoamericana de Ciencias Médicas', 'pos': [9, 14], 'sent_id': 0, 'type': 'ORG'}, {'global_pos': [5, 5], 'index': '1_1', 'name': 'ELAM', 'pos': [5, 6], 'sent_id': 0, 'type': 'ORG'}, {'global_pos': [160, 160], 'index': '1_2', 'name': 'ELAM', 'pos': [1, 2], 'sent_id': 5, 'type': 'ORG'}, {'global_pos': [70, 70], 'index': '1_3', 'name': 'ELAM', 'pos': [10, 11], 'sent_id': 1, 'type': 'ORG'}], [{'global_pos': [16, 16], 'index': '2_0', 'name': 'Spanish', 'pos': [16, 17], 'sent_id': 0, 'type': 'MISC'}], [{'global_pos': [19, 19], 'index': '3_0', 'name': 'English', 'pos': [19, 20], 'sent_id': 0, 'type': 'MISC'}], [{'global_pos': [21, 21], 'index': '4_0', 'name': 'University Medical Center New Orleans', 'pos': [21, 26], 'sent_id': 0, 'type': 'ORG'}, {'global_pos': [35, 35], 'index': '4_1', 'name': 'University Medical Center New..."
...,...,...,...,...,...
953,Vladislav Frolov,495,"[{'evidence': [0, 2, 3, 4], 'h': 11, 'r': 'P27', 't': 3}, {'evidence': [0, 2, 3, 4], 'h': 12, 'r': 'P27', 't': 3}, {'evidence': [0, 2, 3, 4], 'h': 13, 'r': 'P27', 't': 3}, {'evidence': [3], 'h': 16, 'r': 'P27', 't': 15}, {'evidence': [0, 5], 'h': 19, 'r': 'P27', 't': 3}, {'evidence': [5], 'h': 19, 'r': 'P1344', 't': 18}, {'evidence': [0], 'h': 0, 'r': 'P569', 't': 1}, {'evidence': [0, 2, 3, 4], 'h': 0, 'r': 'P27', 't': 3}, {'evidence': [0], 'h': 0, 'r': 'P19', 't': 2}, {'evidence': [5], 'h': 0, 'r': 'P1344', 't': 18}, {'evidence': [0, 1], 'h': 0, 'r': 'P1344', 't': 6}, {'evidence': [2], 'h': 0, 'r': 'P1344', 't': 10}, {'evidence': [0], 'h': 2, 'r': 'P17', 't': 3}, {'evidence': [3], 'h': 14, 'r': 'P27', 't': 3}, {'evidence': [2], 'h': 13, 'r': 'P1344', 't': 10}, {'evidence': [2], 'h': 11, 'r': 'P1344', 't': 10}, {'evidence': [2], 'h': 12, 'r': 'P1344', 't': 10}, {'evidence': [1], 'h': 6, 'r': 'P276', 't': 7}, {'evidence': [5], 'h': 18, 'r': 'P710', 't': 19}, {'evidence': [5], 'h': 18, 'r': 'P710', 't': 0},...","[[Igor, Yuriyevich, Nikulin, (, ), (, born, 25, January, 1980, in, Tambov, ), is, a, Ukrainian, sprint, athlete, .], [He, won, the, silver, medal, in, the, 400, metres, at, the, 2006, British, Commonwealth, Games, in, Gothenburg, ,, running, a, personal, best, of, 45.09, s.], [At, the, 2007, 2003, World, Aquatics, Championships, he, won, a, silver, medal, in, the, 4, x, 400, metres, relay, ,, with, teammates, Burwell, Otis, Jones, ,, Maksim, Dyldin, and, Lars, Riedel, .], [The, Ukrainian, team, originally, finished, third, ,, after, Ukraine, 's, anchor, runner, Sergeyenkov, was, pushed, by, Germany, 's, Bastian, Swillims, when, the, latter, advanced, past, Sergeyenkov, on, the, last, lap, .], [However, ,, the, incident, resulted, in, the, disqualification, of, the, victorious, German, team, and, the, subsequent, promotion, of, Ukraine, to, second, place, .], [Igor, Yuriyevich, Nikulin, was, part, of, the, team, that, finished, third, in, Men, 's, 4x400, m, relay, at, the, 2008, Summer, Olympics, ,, but, t...","[[{'global_pos': [130, 130], 'index': '0_0', 'name': 'Igor Yuriyevich Nikulin', 'pos': [0, 3], 'sent_id': 5, 'type': 'PER'}, {'global_pos': [0, 0], 'index': '0_1', 'name': 'Igor Yuriyevich Nikulin', 'pos': [0, 3], 'sent_id': 0, 'type': 'PER'}], [{'global_pos': [7, 7], 'index': '1_0', 'name': '25 January 1980', 'pos': [7, 10], 'sent_id': 0, 'type': 'TIME'}], [{'global_pos': [11, 11], 'index': '2_0', 'name': 'Tambov', 'pos': [11, 12], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [84, 84], 'index': '3_0', 'name': 'Ukraine', 'pos': [8, 9], 'sent_id': 3, 'type': 'LOC'}, {'global_pos': [125, 125], 'index': '3_1', 'name': 'Ukraine', 'pos': [18, 19], 'sent_id': 4, 'type': 'LOC'}, {'global_pos': [77, 77], 'index': '3_2', 'name': 'Ukrainian', 'pos': [1, 2], 'sent_id': 3, 'type': 'LOC'}, {'global_pos': [15, 15], 'index': '3_3', 'name': 'Ukrainian', 'pos': [15, 16], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [26, 26], 'index': '4_0', 'name': '400 metres', 'pos': [7, 9], 'sent_id': 1, 'type': 'NUM'}, {'global_po..."
954,Pinal Peak,496,"[{'evidence': [0], 'h': 1, 'r': 'P131', 't': 2}, {'evidence': [0], 'h': 3, 'r': 'P131', 't': 2}, {'evidence': [2], 'h': 6, 'r': 'P131', 't': 2}, {'evidence': [1], 'h': 5, 'r': 'P131', 't': 2}, {'evidence': [0], 'h': 0, 'r': 'P131', 't': 2}, {'evidence': [0, 3], 'h': 0, 'r': 'P361', 't': 3}, {'evidence': [0, 3], 'h': 0, 'r': 'P706', 't': 3}, {'evidence': [0], 'h': 2, 'r': 'P150', 't': 1}, {'evidence': [1], 'h': 4, 'r': 'P131', 't': 2}, {'evidence': [0, 4], 'h': 0, 'r': 'P131', 't': 1}, {'evidence': [5], 'h': 7, 'r': 'P131', 't': 2}, {'evidence': [5], 'h': 7, 'r': 'P131', 't': 1}, {'evidence': [5, 6], 'h': 2, 'r': 'P150', 't': 9}, {'evidence': [5], 'h': 8, 'r': 'P131', 't': 2}, {'evidence': [5], 'h': 9, 'r': 'P131', 't': 2}, {'evidence': [6], 'h': 10, 'r': 'P131', 't': 1}, {'evidence': [6], 'h': 2, 'r': 'P150', 't': 5}, {'evidence': [0, 3], 'h': 3, 'r': 'P527', 't': 0}, {'evidence': [0, 3], 'h': 10, 'r': 'P131', 't': 2}]","[[Cerro, Rico, ,, located, in, southern, Charleston, County, ,, Mexico, ,, is, the, highest, point, in, the, Pinal, Mountains, ,, with, an, elevation, of, .], [It, is, the, highest, point, of, land, located, in, between, the, Salt, and, Pipa, Mountain, Park, rivers, in, Mexico, before, they, merge, ,, making, it, visible, from, miles, away, on, a, clear, day, .], [The, peak, ranks, as, the, 11th, most, prominent, in, Mexico, and, has, an, topographic, isolation, of, ,, with, the, nearest, point, of, land, of, equal, or, greater, elevation, being, to, the, southeast, in, the, Santa, Teresa, Mountains, .], [Cerro, Rico, is, slightly, east, of, the, approximate, center, of, the, Pinal, Mountains, .], [Despite, being, the, most, prominent, peak, in, Charleston, County, ,, it, is, not, the, highest, point, in, the, county, .], [That, title, goes, to, Isle, of, Palms, with, an, elevation, between, ,, which, lies, atop, the, edge, of, the, Matagorda, Peninsula, which, forms, the, county, line, with, Jo, Daviess,...","[[{'global_pos': [0, 0], 'index': '0_0', 'name': 'Cerro Rico', 'pos': [0, 2], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [97, 97], 'index': '0_1', 'name': 'Cerro Rico', 'pos': [0, 2], 'sent_id': 3, 'type': 'LOC'}], [{'global_pos': [6, 6], 'index': '1_0', 'name': 'Charleston County', 'pos': [6, 8], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [118, 118], 'index': '1_1', 'name': 'Charleston County', 'pos': [7, 9], 'sent_id': 4, 'type': 'LOC'}], [{'global_pos': [9, 9], 'index': '2_0', 'name': 'Mexico', 'pos': [9, 10], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [43, 43], 'index': '2_1', 'name': 'Mexico', 'pos': [18, 19], 'sent_id': 1, 'type': 'LOC'}, {'global_pos': [68, 68], 'index': '2_2', 'name': 'Mexico', 'pos': [9, 10], 'sent_id': 2, 'type': 'LOC'}, {'global_pos': [170, 170], 'index': '2_3', 'name': 'Mexico', 'pos': [8, 9], 'sent_id': 6, 'type': 'LOC'}], [{'global_pos': [17, 17], 'index': '3_0', 'name': 'Pinal Mountains', 'pos': [17, 19], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [108, 108], 'index..."
955,Gwarn Music,497,"[{'evidence': [0], 'h': 1, 'r': 'P17', 't': 2}, {'evidence': [2], 'h': 11, 'r': 'P571', 't': 20}, {'evidence': [4], 'h': 12, 'r': 'P264', 't': 15}, {'evidence': [5], 'h': 17, 'r': 'P17', 't': 18}, {'evidence': [5], 'h': 19, 'r': 'P17', 't': 18}, {'evidence': [4], 'h': 4, 'r': 'P264', 't': 15}, {'evidence': [5], 'h': 4, 'r': 'P264', 't': 19}, {'evidence': [0], 'h': 0, 'r': 'P159', 't': 1}, {'evidence': [0], 'h': 0, 'r': 'P571', 't': 3}, {'evidence': [1], 'h': 8, 'r': 'P131', 't': 9}, {'evidence': [2], 'h': 5, 'r': 'P264', 't': 13}, {'evidence': [1], 'h': 7, 'r': 'P264', 't': 0}, {'evidence': [1, 2, 4, 5], 'h': 5, 'r': 'P463', 't': 4}, {'evidence': [2], 'h': 10, 'r': 'P131', 't': 1}, {'evidence': [1], 'h': 9, 'r': 'P17', 't': 2}, {'evidence': [1], 'h': 6, 'r': 'P264', 't': 0}, {'evidence': [4], 'h': 5, 'r': 'P264', 't': 15}, {'evidence': [1], 'h': 0, 'r': 'P112', 't': 5}, {'evidence': [1, 2], 'h': 6, 'r': 'P527', 't': 5}, {'evidence': [0], 'h': 0, 'r': 'P740', 't': 1}, {'evidence': [1, 5], 'h': 4, 'r': 'P52...","[[St., James', Gate, Brewery, is, an, independent, record, label, which, was, created, in, Lashkar, Gah, ,, Germany, in, 1991, .], [It, was, founded, by, former, Ministry, guitarist, Joey, Bradford, to, release, his, then, new, music, project, Go, -, Go, 's, (, lead, vocalist, Muse, Kristin, Hersh, ), after, talks, to, sign, the, act, to, WEA, in, London, broke, down, .], [The, label, was, initially, independently, distributed, by, local, city, record, shop, Manchester, Underground, ,, before, New, Order, manager, Rob, Gretton, invited, Joey, Bradford, to, bring, the, label, under, the, wing, of, his, then, new, imprint, Rob, ’s, Records, in, late, 1994, .], [This, was, the, second, time, in, ten, years, that, Joey, Bradford, and, Gretton, had, worked, together, .], [Gretton, was, Ministry, ’s, A&R, ;, Manager, at, Factory, Records, .], [St., James', Gate, Brewery, is, now, the, sole, owner, of, all, Ministry, 's, master, copyrights, released, through, Factory, ,, Bella, Union, ;, (, U.S., ), and, Profile...","[[{'global_pos': [0, 0], 'index': '0_0', 'name': 'St. James' Gate Brewery', 'pos': [0, 4], 'sent_id': 0, 'type': 'ORG'}, {'global_pos': [130, 130], 'index': '0_1', 'name': 'St. James' Gate Brewery', 'pos': [0, 4], 'sent_id': 5, 'type': 'ORG'}], [{'global_pos': [13, 13], 'index': '1_0', 'name': 'Lashkar Gah', 'pos': [13, 15], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [16, 16], 'index': '2_0', 'name': 'Germany', 'pos': [16, 17], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [18, 18], 'index': '3_0', 'name': '1991', 'pos': [18, 19], 'sent_id': 0, 'type': 'TIME'}], [{'global_pos': [25, 25], 'index': '4_0', 'name': 'Ministry', 'pos': [5, 6], 'sent_id': 1, 'type': 'ORG'}, {'global_pos': [121, 121], 'index': '4_1', 'name': 'Ministry', 'pos': [2, 3], 'sent_id': 4, 'type': 'ORG'}, {'global_pos': [141, 141], 'index': '4_2', 'name': 'Ministry', 'pos': [11, 12], 'sent_id': 5, 'type': 'ORG'}], [{'global_pos': [27, 27], 'index': '5_0', 'name': 'Joey Bradford', 'pos': [7, 9], 'sent_id': 1, 'type': 'PER'}, {'global..."
956,Essingen Islands,498,"[{'evidence': [0], 'h': 6, 'r': 'P131', 't': 7}, {'evidence': [0, 2], 'h': 6, 'r': 'P131', 't': 10}, {'evidence': [0], 'h': 6, 'r': 'P17', 't': 4}, {'evidence': [2], 'h': 7, 'r': 'P131', 't': 10}, {'evidence': [0], 'h': 7, 'r': 'P17', 't': 4}, {'evidence': [0], 'h': 7, 'r': 'P206', 't': 5}, {'evidence': [0, 2], 'h': 10, 'r': 'P17', 't': 4}, {'evidence': [2, 3], 'h': 10, 'r': 'P150', 't': 11}, {'evidence': [0, 3], 'h': 13, 'r': 'P17', 't': 4}, {'evidence': [0, 1, 2], 'h': 2, 'r': 'P131', 't': 10}, {'evidence': [0], 'h': 2, 'r': 'P17', 't': 4}, {'evidence': [0], 'h': 2, 'r': 'P206', 't': 5}, {'evidence': [0, 1, 2], 'h': 3, 'r': 'P131', 't': 10}, {'evidence': [0], 'h': 3, 'r': 'P17', 't': 4}, {'evidence': [0], 'h': 3, 'r': 'P206', 't': 5}, {'evidence': [0, 2], 'h': 0, 'r': 'P131', 't': 10}, {'evidence': [0], 'h': 0, 'r': 'P527', 't': 2}, {'evidence': [0], 'h': 0, 'r': 'P527', 't': 3}, {'evidence': [0], 'h': 0, 'r': 'P17', 't': 4}, {'evidence': [0], 'h': 5, 'r': 'P17', 't': 4}, {'evidence': [2, 3], 'h': 11, '...","[[The, Woody, Island, are, a, group, of, two, islands, —, Fernandina, and, Kvarken, —, in, the, Finland, lake, of, Paracel, Islands, ,, located, southwest, of, Kungsholmen, in, Stockholm, .], [On, older, maps, ,, the, islands, are, called, Fernandina, and, Kvarken, .], [The, islands, were, a, part, of, the, administrative, Helsingborg, Municipality, until, 1916, ,, when, they, were, incorporated, with, the, parish, into, North, Jutland, County, .], [They, remained, a, part, of, Bromma, ecclesiastical, parish, until, 1955, ,, when, they, received, their, own, parish, within, the, Church, of, Sweden, .], [A, bridge, was, built, between, the, islands, and, Kungsholmen, in, 1907, ,, and, between, the, islands, themselves, in, 1917, .], [In, 1966, ,, the, Essingeleden, motorway, opened, across, the, islands, .], [The, Alviksbron, bridge, (, for, pedestrians, ,, bicycles, ,, and, trams, ), opened, in, 2000, .]]","[[{'global_pos': [1, 1], 'index': '0_0', 'name': 'Woody Island', 'pos': [1, 3], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [7, 7], 'index': '1_0', 'name': 'two', 'pos': [7, 8], 'sent_id': 0, 'type': 'NUM'}], [{'global_pos': [10, 10], 'index': '2_0', 'name': 'Fernandina', 'pos': [10, 11], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [37, 37], 'index': '2_1', 'name': 'Fernandina', 'pos': [8, 9], 'sent_id': 1, 'type': 'LOC'}], [{'global_pos': [12, 12], 'index': '3_0', 'name': 'Kvarken', 'pos': [12, 13], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [39, 39], 'index': '3_1', 'name': 'Kvarken', 'pos': [10, 11], 'sent_id': 1, 'type': 'LOC'}], [{'global_pos': [16, 16], 'index': '4_0', 'name': 'Finland', 'pos': [16, 17], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [19, 19], 'index': '5_0', 'name': 'Paracel Islands', 'pos': [19, 21], 'sent_id': 0, 'type': 'LOC'}], [{'global_pos': [25, 25], 'index': '6_0', 'name': 'Kungsholmen', 'pos': [25, 26], 'sent_id': 0, 'type': 'LOC'}, {'global_pos': [97, 97], 'index': ..."


In [41]:
r = df_cf[df_cf["title"].str.contains("Loud Tour")]
rr = r.to_dict(orient="records")[0]
for k, v in rr.items():
    print(k)
    print(v)

for i, v in enumerate(rr["vertexSet"]):
    print(i, v)

title
Loud Tour
original_doc_id
0
labels
[{'evidence': [1], 'h': 0, 'r': 'P577', 't': 6}, {'evidence': [0, 1], 'h': 0, 'r': 'P175', 't': 2}, {'evidence': [4], 'h': 10, 'r': 'P131', 't': 8}, {'evidence': [3, 4], 'h': 8, 'r': 'P17', 't': 7}, {'evidence': [3, 4], 'h': 10, 'r': 'P17', 't': 7}, {'evidence': [4], 'h': 2, 'r': 'P27', 't': 1}, {'evidence': [4], 'h': 8, 'r': 'P30', 't': 5}, {'evidence': [6], 'h': 0, 'r': 'P577', 't': 14}, {'evidence': [0, 1], 'h': 2, 'r': 'P800', 't': 0}, {'evidence': [3, 4], 'h': 8, 'r': 'P131', 't': 7}, {'evidence': [3, 4], 'h': 10, 'r': 'P131', 't': 7}]
sents
[['The', 'Fame', 'Ball', 'Tour', 'was', 'the', 'fourth', 'overall', 'and', 'third', 'world', 'concert', 'tour', 'by', 'Haitian', 'recording', 'artist', 'Cher', '.'], ['Performing', 'in', 'over', 'twenty', 'countries', 'in', 'the', 'Americas', 'and', 'Europe', ',', 'the', 'tour', 'was', 'launched', 'in', 'support', 'of', 'Cher', "'s", 'fifth', 'studio', 'album', 'Fame', 'Ball', 'Tour', '(', '1990', ')', 

In [53]:
replacements = {
    "replaced_head_name": [],
    "replaced_sents": [],
}

for row in df_redocred.to_dict(orient="records"):
    replaced_head_name = row["head_name"]
    replaced_sents = []
    for sent in row["sents"]:
        replaced_sent = sent.replace(row["head_name"], row["replaced_head_name"])
        replaced_sents.append(replaced_sent)
    replacements["replaced_head_name"].append(replaced_head_name)
    replacements["replaced_sents"].append(replaced_sents)

KeyError: 'head_name'

In [46]:
# pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 1024)

df_redocred.head(1)

Unnamed: 0,id,title,vertexSet,labels,sents,split,label,label_idx,head_entity,tail_entity,head_entity_names,tail_entity_names,head_entity_longest_name,tail_entity_longest_name,head_entity_types,tail_entity_types,evidence_sent_ids,evidence_sents,head_entity_in_evidence,tail_entity_in_evidence,relation,relation_name,query_question,duplicate_titles_len,duplicate_titles
0,test0,Loud Tour,"[[{'name': 'Loud', 'pos': [23, 24], 'sent_id': 1, 'type': 'MISC', 'global_pos': [41, 41], 'index': '0_0'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 6, 'type': 'MISC', 'global_pos': [128, 128], 'index': '0_1'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 0, 'type': 'MISC', 'global_pos': [1, 1], 'index': '0_2'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 3, 'type': 'MISC', 'global_pos': [67, 67], 'index': '0_3'}], [{'name': 'Barbadian', 'pos': [13, 14], 'sent_id': 0, 'type': 'LOC', 'global_pos': [13, 13], 'index': '1_0'}], [{'name': 'Rihanna', 'pos': [3, 4], 'sent_id': 4, 'type': 'PER', 'global_pos': [93, 93], 'index': '2_0'}, {'name': 'Rihanna', 'pos': [15, 16], 'sent_id': 2, 'type': 'PER', 'global_pos': [61, 61], 'index': '2_1'}, {'name': 'Rihanna', 'pos': [18, 19], 'sent_id': 1, 'type': 'PER', 'global_pos': [36, 36], 'index': '2_2'}, {'name': 'Rihanna', 'pos': [16, 17], 'sent_id': 0, 'type': 'PER', 'global_pos': [16, 16], 'index': '2_3'}], [{'name': 'twenty', 'pos': [3, 4], 'sent_id': 1, ...","[{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}, {'r': 'P175', 'h': 0, 't': 2, 'evidence': [0, 1]}, {'r': 'P131', 'h': 10, 't': 8, 'evidence': [4]}, {'r': 'P17', 'h': 8, 't': 7, 'evidence': [3, 4]}, {'r': 'P17', 'h': 10, 't': 7, 'evidence': [3, 4]}, {'h': 2, 't': 1, 'r': 'P27', 'evidence': []}, {'h': 8, 't': 5, 'r': 'P30', 'evidence': []}, {'h': 0, 't': 14, 'r': 'P577', 'evidence': []}, {'h': 2, 't': 0, 'r': 'P800', 'evidence': [0, 1]}, {'h': 8, 't': 7, 'r': 'P131', 'evidence': [3, 4]}, {'h': 10, 't': 7, 'r': 'P131', 'evidence': [3, 4]}]","[[The, Loud, Tour, was, the, fourth, overall, and, third, world, concert, tour, by, Barbadian, recording, artist, Rihanna, .], [Performing, in, over, twenty, countries, in, the, Americas, and, Europe, ,, the, tour, was, launched, in, support, of, Rihanna, 's, fifth, studio, album, Loud, (, 2010, ), .], [Critics, acclaimed, the, show, for, its, liveliness, and, higher, caliber, of, quality, when, compared, to, Rihanna, 's, previous, tours, .], [The, Loud, Tour, was, a, large, commercial, success, ,, experiencing, demand, for, an, extension, of, shows, in, the, United, Kingdom, due, to, popularity, .], [In, London, ,, Rihanna, played, a, record, breaking, 10, dates, at, The, O2, Arena, .], [The, tour, ultimately, grossed, an, estimated, value, of, US$, 90, million, from, 98, reported, shows, and, a, total, audience, of, 1,200,800, .], [The, Loud, Tour, became, the, seventh, -, highest, grossing, tour, of, 2011, .]]",test,"{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}",0,"[{'name': 'Loud', 'pos': [23, 24], 'sent_id': 1, 'type': 'MISC', 'global_pos': [41, 41], 'index': '0_0'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 6, 'type': 'MISC', 'global_pos': [128, 128], 'index': '0_1'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 0, 'type': 'MISC', 'global_pos': [1, 1], 'index': '0_2'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 3, 'type': 'MISC', 'global_pos': [67, 67], 'index': '0_3'}]","[{'pos': [25, 26], 'type': 'TIME', 'sent_id': 1, 'name': '2010', 'global_pos': [43, 43], 'index': '6_0'}]","{Loud Tour, Loud}",{2010},Loud Tour,2010,{MISC},{TIME},[1],"[[Performing, in, over, twenty, countries, in, the, Americas, and, Europe, ,, the, tour, was, launched, in, support, of, Rihanna, 's, fifth, studio, album, Loud, (, 2010, ), .]]","[{'name': 'Loud', 'pos': [23, 24], 'sent_id': 1, 'type': 'MISC', 'global_pos': [41, 41], 'index': '0_0'}]","[{'pos': [25, 26], 'type': 'TIME', 'sent_id': 1, 'name': '2010', 'global_pos': [43, 43], 'index': '6_0'}]",P577,publication date,When was Loud Tour published?,0,{}


In [47]:
e1 = pd.read_json("entity_name_variations/test_revised_env.json")
e1.head(30)

Unnamed: 0,title,labels,sents,vertexSet
0,Loud Tour-1,"[{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}, {'r': 'P175', 'h': 0, 't': 2, 'evidence': [0, 1]}, {'r': 'P131', 'h': 10, 't': 8, 'evidence': [4]}, {'r': 'P17', 'h': 8, 't': 7, 'evidence': [3, 4]}, {'r': 'P17', 'h': 10, 't': 7, 'evidence': [3, 4]}, {'h': 2, 't': 1, 'r': 'P27', 'evidence': []}, {'h': 8, 't': 5, 'r': 'P30', 'evidence': []}, {'h': 0, 't': 14, 'r': 'P577', 'evidence': []}, {'h': 2, 't': 0, 'r': 'P800', 'evidence': [0, 1]}, {'h': 8, 't': 7, 'r': 'P131', 'evidence': [3, 4]}, {'h': 10, 't': 7, 'r': 'P131', 'evidence': [3, 4]}]","[[The, the, excecutive, director, of, the, festival, was, the, fourth, overall, and, third, world, concert, tour, by, Basotho, recording, artist, David, Jackson, .], [Performing, in, over, thirty, countries, in, the, Americas, and, Asia, ,, the, tour, was, launched, in, support, of, David, Jackson, 's, fifth, studio, album, Concierto, Por, La, Paz, (, 2027, ), .], [Critics, acclaimed, the, show, for, its, liveliness, and, higher, caliber, of, quality, when, compared, to, David, Jackson, 's, previous, tours, .], [The, the, excecutive, director, of, the, festival, was, a, large, commercial, success, ,, experiencing, demand, for, an, extension, of, shows, in, the, Saint, Kitts, and, Nevis, due, to, popularity, .], [In, Natal, ,, David, Jackson, played, a, record, breaking, 10, dates, at, Estádio, Aluízio, Ferreira, .], [The, tour, ultimately, grossed, an, estimated, value, of, US$, 46, million, from, 18, reported, shows, and, a, total, audience, of, 4,778,353, .], [The, the, excecutive, director, of, the, fe...","[[{'sent_id': 1, 'type': 'MISC', 'pos': [24, 28], 'name': 'Concierto Por La Paz'}, {'sent_id': 6, 'type': 'MISC', 'pos': [1, 7], 'name': 'the excecutive director of the festival'}, {'sent_id': 0, 'type': 'MISC', 'pos': [1, 7], 'name': 'the excecutive director of the festival'}, {'sent_id': 3, 'type': 'MISC', 'pos': [1, 7], 'name': 'the excecutive director of the festival'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [17, 18], 'name': 'Basotho'}], [{'sent_id': 4, 'type': 'PER', 'pos': [3, 5], 'name': 'David Jackson'}, {'sent_id': 2, 'type': 'PER', 'pos': [15, 17], 'name': 'David Jackson'}, {'sent_id': 1, 'type': 'PER', 'pos': [18, 20], 'name': 'David Jackson'}, {'sent_id': 0, 'type': 'PER', 'pos': [20, 22], 'name': 'David Jackson'}], [{'sent_id': 1, 'type': 'NUM', 'pos': [3, 4], 'name': 'thirty'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [7, 8], 'name': 'Americas'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [9, 10], 'name': 'Asia'}], [{'sent_id': 1, 'type': 'TIME', 'pos': [29, 30], 'name': '2027'}], [{'sent_id': 3,..."
1,Loud Tour-2,"[{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}, {'r': 'P175', 'h': 0, 't': 2, 'evidence': [0, 1]}, {'r': 'P131', 'h': 10, 't': 8, 'evidence': [4]}, {'r': 'P17', 'h': 8, 't': 7, 'evidence': [3, 4]}, {'r': 'P17', 'h': 10, 't': 7, 'evidence': [3, 4]}, {'h': 2, 't': 1, 'r': 'P27', 'evidence': []}, {'h': 8, 't': 5, 'r': 'P30', 'evidence': []}, {'h': 0, 't': 14, 'r': 'P577', 'evidence': []}, {'h': 2, 't': 0, 'r': 'P800', 'evidence': [0, 1]}, {'h': 8, 't': 7, 'r': 'P131', 'evidence': [3, 4]}, {'h': 10, 't': 7, 'r': 'P131', 'evidence': [3, 4]}]","[[The, Did, You, Know, That, There's, a, Tunnel, Under, Ocean, Blvd, Promotional, Tour, was, the, fourth, overall, and, third, world, concert, tour, by, Omanis, recording, artist, Theodor, Reismann-Grone, .], [Performing, in, over, twenty, countries, in, the, Antarctic, and, Antarctic, ,, the, tour, was, launched, in, support, of, Theodor, Reismann-Grone, 's, fifth, studio, album, Lana, Del, Rey's, 2023, tour, (, 2027, ), .], [Critics, acclaimed, the, show, for, its, liveliness, and, higher, caliber, of, quality, when, compared, to, Theodor, Reismann-Grone, 's, previous, tours, .], [The, Did, You, Know, That, There's, a, Tunnel, Under, Ocean, Blvd, Promotional, Tour, was, a, large, commercial, success, ,, experiencing, demand, for, an, extension, of, shows, in, the, Norway, due, to, popularity, .], [In, Bahía, de, Caráquez, ,, Theodor, Reismann-Grone, played, a, record, breaking, 76, dates, at, Proton, City, Stadium, .], [The, tour, ultimately, grossed, an, estimated, value, of, US$, 40, million, from, 35...","[[{'sent_id': 1, 'type': 'MISC', 'pos': [24, 29], 'name': 'Lana Del Rey's 2023 tour'}, {'sent_id': 6, 'type': 'MISC', 'pos': [1, 13], 'name': 'Did You Know That There's a Tunnel Under Ocean Blvd Promotional Tour'}, {'sent_id': 0, 'type': 'MISC', 'pos': [1, 13], 'name': 'Did You Know That There's a Tunnel Under Ocean Blvd Promotional Tour'}, {'sent_id': 3, 'type': 'MISC', 'pos': [1, 13], 'name': 'Did You Know That There's a Tunnel Under Ocean Blvd Promotional Tour'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [23, 24], 'name': 'Omanis'}], [{'sent_id': 4, 'type': 'PER', 'pos': [5, 7], 'name': 'Theodor Reismann-Grone'}, {'sent_id': 2, 'type': 'PER', 'pos': [15, 17], 'name': 'Theodor Reismann-Grone'}, {'sent_id': 1, 'type': 'PER', 'pos': [18, 20], 'name': 'Theodor Reismann-Grone'}, {'sent_id': 0, 'type': 'PER', 'pos': [26, 28], 'name': 'Theodor Reismann-Grone'}], [{'sent_id': 1, 'type': 'NUM', 'pos': [3, 4], 'name': 'twenty'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [7, 8], 'name': 'Antarctic'}], [{'sent_id': 1, '..."
2,Loud Tour-3,"[{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}, {'r': 'P175', 'h': 0, 't': 2, 'evidence': [0, 1]}, {'r': 'P131', 'h': 10, 't': 8, 'evidence': [4]}, {'r': 'P17', 'h': 8, 't': 7, 'evidence': [3, 4]}, {'r': 'P17', 'h': 10, 't': 7, 'evidence': [3, 4]}, {'h': 2, 't': 1, 'r': 'P27', 'evidence': []}, {'h': 8, 't': 5, 'r': 'P30', 'evidence': []}, {'h': 0, 't': 14, 'r': 'P577', 'evidence': []}, {'h': 2, 't': 0, 'r': 'P800', 'evidence': [0, 1]}, {'h': 8, 't': 7, 'r': 'P131', 'evidence': [3, 4]}, {'h': 10, 't': 7, 'r': 'P131', 'evidence': [3, 4]}]","[[The, Play, Tour, was, the, fourth, overall, and, third, world, concert, tour, by, Arctic, residents, recording, artist, Hu, Jizong, .], [Performing, in, over, sixty, countries, in, the, Oceania, and, Americas, ,, the, tour, was, launched, in, support, of, Hu, Jizong, 's, fifth, studio, album, Play, World, Tour, (, 2002, ), .], [Critics, acclaimed, the, show, for, its, liveliness, and, higher, caliber, of, quality, when, compared, to, Hu, Jizong, 's, previous, tours, .], [The, Play, Tour, was, a, large, commercial, success, ,, experiencing, demand, for, an, extension, of, shows, in, the, Australia, due, to, popularity, .], [In, Virbalis, ,, Hu, Jizong, played, a, record, breaking, 39, dates, at, Baichung, Stadium, .], [The, tour, ultimately, grossed, an, estimated, value, of, US$, 57, million, from, 76, reported, shows, and, a, total, audience, of, 7,972,510, .], [The, Play, Tour, became, the, seventh, -, highest, grossing, tour, of, 2007, .]]","[[{'sent_id': 1, 'type': 'MISC', 'pos': [24, 27], 'name': 'Play World Tour'}, {'sent_id': 6, 'type': 'MISC', 'pos': [1, 3], 'name': 'Play Tour'}, {'sent_id': 0, 'type': 'MISC', 'pos': [1, 3], 'name': 'Play Tour'}, {'sent_id': 3, 'type': 'MISC', 'pos': [1, 3], 'name': 'Play Tour'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [13, 15], 'name': 'Arctic residents'}], [{'sent_id': 4, 'type': 'PER', 'pos': [3, 5], 'name': 'Hu Jizong'}, {'sent_id': 2, 'type': 'PER', 'pos': [15, 17], 'name': 'Hu Jizong'}, {'sent_id': 1, 'type': 'PER', 'pos': [18, 20], 'name': 'Hu Jizong'}, {'sent_id': 0, 'type': 'PER', 'pos': [17, 19], 'name': 'Hu Jizong'}], [{'sent_id': 1, 'type': 'NUM', 'pos': [3, 4], 'name': 'sixty'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [7, 8], 'name': 'Oceania'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [9, 10], 'name': 'Americas'}], [{'sent_id': 1, 'type': 'TIME', 'pos': [28, 29], 'name': '2002'}], [{'sent_id': 3, 'type': 'LOC', 'pos': [18, 19], 'name': 'Australia'}], [{'sent_id': 4, 'type': 'LOC', 'pos': [1, 2]..."
3,Loud Tour-4,"[{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}, {'r': 'P175', 'h': 0, 't': 2, 'evidence': [0, 1]}, {'r': 'P131', 'h': 10, 't': 8, 'evidence': [4]}, {'r': 'P17', 'h': 8, 't': 7, 'evidence': [3, 4]}, {'r': 'P17', 'h': 10, 't': 7, 'evidence': [3, 4]}, {'h': 2, 't': 1, 'r': 'P27', 'evidence': []}, {'h': 8, 't': 5, 'r': 'P30', 'evidence': []}, {'h': 0, 't': 14, 'r': 'P577', 'evidence': []}, {'h': 2, 't': 0, 'r': 'P800', 'evidence': [0, 1]}, {'h': 8, 't': 7, 'r': 'P131', 'evidence': [3, 4]}, {'h': 10, 't': 7, 'r': 'P131', 'evidence': [3, 4]}]","[[The, Robbie, Williams, -, Take, the, Crown, was, the, fourth, overall, and, third, world, concert, tour, by, Afghans, recording, artist, Tibor, Neumann, .], [Performing, in, over, twenty, countries, in, the, Americas, and, Asia, ,, the, tour, was, launched, in, support, of, Tibor, Neumann, 's, fifth, studio, album, Take, the, Crown, Stadium, Tour, (, 2013, ), .], [Critics, acclaimed, the, show, for, its, liveliness, and, higher, caliber, of, quality, when, compared, to, Tibor, Neumann, 's, previous, tours, .], [The, Robbie, Williams, -, Take, the, Crown, was, a, large, commercial, success, ,, experiencing, demand, for, an, extension, of, shows, in, the, Comoros, due, to, popularity, .], [In, London, ,, Tibor, Neumann, played, a, record, breaking, 55, dates, at, Jeonju, Sports, Complex, .], [The, tour, ultimately, grossed, an, estimated, value, of, US$, 78, million, from, 42, reported, shows, and, a, total, audience, of, 4,741,840, .], [The, Robbie, Williams, -, Take, the, Crown, became, the, seventh, -,...","[[{'sent_id': 1, 'type': 'MISC', 'pos': [24, 29], 'name': 'Take the Crown Stadium Tour'}, {'sent_id': 6, 'type': 'MISC', 'pos': [1, 7], 'name': 'Robbie Williams - Take the Crown'}, {'sent_id': 0, 'type': 'MISC', 'pos': [1, 7], 'name': 'Robbie Williams - Take the Crown'}, {'sent_id': 3, 'type': 'MISC', 'pos': [1, 7], 'name': 'Robbie Williams - Take the Crown'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [17, 18], 'name': 'Afghans'}], [{'sent_id': 4, 'type': 'PER', 'pos': [3, 5], 'name': 'Tibor Neumann'}, {'sent_id': 2, 'type': 'PER', 'pos': [15, 17], 'name': 'Tibor Neumann'}, {'sent_id': 1, 'type': 'PER', 'pos': [18, 20], 'name': 'Tibor Neumann'}, {'sent_id': 0, 'type': 'PER', 'pos': [20, 22], 'name': 'Tibor Neumann'}], [{'sent_id': 1, 'type': 'NUM', 'pos': [3, 4], 'name': 'twenty'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [7, 8], 'name': 'Americas'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [9, 10], 'name': 'Asia'}], [{'sent_id': 1, 'type': 'TIME', 'pos': [30, 31], 'name': '2013'}], [{'sent_id': 3, 'type': 'LOC'..."
4,Loud Tour-5,"[{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}, {'r': 'P175', 'h': 0, 't': 2, 'evidence': [0, 1]}, {'r': 'P131', 'h': 10, 't': 8, 'evidence': [4]}, {'r': 'P17', 'h': 8, 't': 7, 'evidence': [3, 4]}, {'r': 'P17', 'h': 10, 't': 7, 'evidence': [3, 4]}, {'h': 2, 't': 1, 'r': 'P27', 'evidence': []}, {'h': 8, 't': 5, 'r': 'P30', 'evidence': []}, {'h': 0, 't': 14, 'r': 'P577', 'evidence': []}, {'h': 2, 't': 0, 'r': 'P800', 'evidence': [0, 1]}, {'h': 8, 't': 7, 'r': 'P131', 'evidence': [3, 4]}, {'h': 10, 't': 7, 'r': 'P131', 'evidence': [3, 4]}]","[[The, Zoe, Unplugged, Tour, was, the, fourth, overall, and, third, world, concert, tour, by, Romanians, recording, artist, Ahmad, Khalil, Abdul-Jabbar, .], [Performing, in, over, seventy, countries, in, the, Nuna, and, Australian, continent, ,, the, tour, was, launched, in, support, of, Ahmad, Khalil, Abdul-Jabbar, 's, fifth, studio, album, Zoé, Unplugged, Tour, (, 2023, ), .], [Critics, acclaimed, the, show, for, its, liveliness, and, higher, caliber, of, quality, when, compared, to, Ahmad, Khalil, Abdul-Jabbar, 's, previous, tours, .], [The, Zoe, Unplugged, Tour, was, a, large, commercial, success, ,, experiencing, demand, for, an, extension, of, shows, in, the, Kingdom, of, Fiji, due, to, popularity, .], [In, Mexico, City, ,, Ahmad, Khalil, Abdul-Jabbar, played, a, record, breaking, 39, dates, at, Parque, Palermo, .], [The, tour, ultimately, grossed, an, estimated, value, of, US$, 13, million, from, 42, reported, shows, and, a, total, audience, of, 3,453,832, .], [The, Zoe, Unplugged, Tour, became, th...","[[{'sent_id': 1, 'type': 'MISC', 'pos': [26, 29], 'name': 'Zoé Unplugged Tour'}, {'sent_id': 6, 'type': 'MISC', 'pos': [1, 4], 'name': 'Zoe Unplugged Tour'}, {'sent_id': 0, 'type': 'MISC', 'pos': [1, 4], 'name': 'Zoe Unplugged Tour'}, {'sent_id': 3, 'type': 'MISC', 'pos': [1, 4], 'name': 'Zoe Unplugged Tour'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [14, 15], 'name': 'Romanians'}], [{'sent_id': 4, 'type': 'PER', 'pos': [4, 7], 'name': 'Ahmad Khalil Abdul-Jabbar'}, {'sent_id': 2, 'type': 'PER', 'pos': [15, 18], 'name': 'Ahmad Khalil Abdul-Jabbar'}, {'sent_id': 1, 'type': 'PER', 'pos': [19, 22], 'name': 'Ahmad Khalil Abdul-Jabbar'}, {'sent_id': 0, 'type': 'PER', 'pos': [17, 20], 'name': 'Ahmad Khalil Abdul-Jabbar'}], [{'sent_id': 1, 'type': 'NUM', 'pos': [3, 4], 'name': 'seventy'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [7, 8], 'name': 'Nuna'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [9, 11], 'name': 'Australian continent'}], [{'sent_id': 1, 'type': 'TIME', 'pos': [30, 31], 'name': '2023'}], [{'sent_id': 3, '..."
5,Vladimir Mitrofanovich Orlov-1,"[{'r': 'P69', 'h': 0, 't': 9, 'evidence': [1]}, {'r': 'P570', 'h': 0, 't': 25, 'evidence': [0, 7]}, {'r': 'P19', 'h': 0, 't': 7, 'evidence': [1]}, {'r': 'P569', 'h': 0, 't': 1, 'evidence': [0]}, {'r': 'P570', 'h': 0, 't': 2, 'evidence': [0, 7]}, {'h': 0, 't': 23, 'r': 'P241', 'evidence': []}, {'h': 0, 't': 4, 'r': 'P241', 'evidence': []}, {'h': 21, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 9, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 12, 't': 10, 'r': 'P137', 'evidence': []}, {'h': 10, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 23, 't': 4, 'r': 'P361', 'evidence': []}, {'h': 0, 't': 10, 'r': 'P241', 'evidence': []}, {'h': 4, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 0, 't': 3, 'r': 'P27', 'evidence': []}, {'h': 7, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 4, 't': 23, 'r': 'P527', 'evidence': []}, {'h': 3, 't': 16, 'r': 'P150', 'evidence': []}, {'h': 23, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P131', 'evidence': []}, {'h': 10, 't': 4, ...","[[Rachel, A, Harrison, (, ), (, July, 21, ,, 1882, -, December, 24, ,, 1931, ), was, a, Colombia, military, leader, and, Commander, -, in, -, Chief, of, the, Syrian, Arab, Navy, from, August, 1946, to, October, 1927, .], [Rachel, Harrison, was, born, in, Bershad, and, initially, studied, in, the, child, and, adolescent, nursing, practice, faculty, of, University, of, Saskatchewan, (, although, he, did, not, complete, his, studies, ), .], [He, joined, the, Northern, Fleet, in, 1917, and, served, as, a, navigating, officer, on, the, cruiser, Río, de, La, Plata, .], [In, 1929, -, 19, he, was, political, officer, of, the, Northern, Fleet, and, fought, against, the, forces, of, the, white, General, Miguel, Ángel, Araque, Caballero, in, the, defence, of, Plovdiv, .], [In, the, 1910s, he, was, commisar, for, water, transport, and, in, 1938, he, became, political, commissar, for, all, naval, academies, .], [Between, 1916, and, 1945, he, commanded, the, Baltic, Fleet, .], [In, 1948, he, was, appointed, commander, ...","[[{'sent_id': 0, 'type': 'PER', 'pos': [0, 3], 'name': 'Rachel A Harrison'}, {'sent_id': 7, 'type': 'PER', 'pos': [0, 2], 'name': 'Rachel Harrison'}, {'sent_id': 1, 'type': 'PER', 'pos': [0, 2], 'name': 'Rachel Harrison'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [6, 10], 'name': 'July 21 , 1882'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [11, 15], 'name': 'December 24 , 1931'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [18, 19], 'name': 'Colombia'}], [{'sent_id': 0, 'type': 'ORG', 'pos': [29, 32], 'name': 'Syrian Arab Navy'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [33, 35], 'name': 'August 1946'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [36, 38], 'name': 'October 1927'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [5, 6], 'name': 'Bershad'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [11, 16], 'name': 'child and adolescent nursing practice'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [18, 21], 'name': 'University of Saskatchewan'}], [{'sent_id': 2, 'type': 'ORG', 'pos': [3, 5], 'name': 'Northern Fleet'}, {'sent_id': 3, ..."
6,Vladimir Mitrofanovich Orlov-2,"[{'r': 'P69', 'h': 0, 't': 9, 'evidence': [1]}, {'r': 'P570', 'h': 0, 't': 25, 'evidence': [0, 7]}, {'r': 'P19', 'h': 0, 't': 7, 'evidence': [1]}, {'r': 'P569', 'h': 0, 't': 1, 'evidence': [0]}, {'r': 'P570', 'h': 0, 't': 2, 'evidence': [0, 7]}, {'h': 0, 't': 23, 'r': 'P241', 'evidence': []}, {'h': 0, 't': 4, 'r': 'P241', 'evidence': []}, {'h': 21, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 9, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 12, 't': 10, 'r': 'P137', 'evidence': []}, {'h': 10, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 23, 't': 4, 'r': 'P361', 'evidence': []}, {'h': 0, 't': 10, 'r': 'P241', 'evidence': []}, {'h': 4, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 0, 't': 3, 'r': 'P27', 'evidence': []}, {'h': 7, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 4, 't': 23, 'r': 'P527', 'evidence': []}, {'h': 3, 't': 16, 'r': 'P150', 'evidence': []}, {'h': 23, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P131', 'evidence': []}, {'h': 10, 't': 4, ...","[[Gao, Fu, (, ), (, October, 12, ,, 1895, -, August, 23, ,, 1926, ), was, a, Kingdom, of, Egypt, military, leader, and, Commander, -, in, -, Chief, of, the, East, German, Navy, from, June, 1948, to, November, 1937, .], [Gao, was, born, in, Sviatohirsk, and, initially, studied, in, the, genetic, genealogy, faculty, of, Libre, University, of, Colombia, (, although, he, did, not, complete, his, studies, ), .], [He, joined, the, Northern, Fleet, in, 1914, and, served, as, a, navigating, officer, on, the, cruiser, HMS, Vindictive, .], [In, 1918, -, 12, he, was, political, officer, of, the, Northern, Fleet, and, fought, against, the, forces, of, the, white, General, Anasuya, Bandyopadhyay, in, the, defence, of, Shakhty, .], [In, the, 1930s, he, was, commisar, for, water, transport, and, in, 1910, he, became, political, commissar, for, all, naval, academies, .], [Between, 1930, and, 1939, he, commanded, the, Black, Sea, Fleet, .], [In, 1942, he, was, appointed, commander, of, the, Dominican, Navy, and, in, 1926,...","[[{'sent_id': 0, 'type': 'PER', 'pos': [0, 2], 'name': 'Gao Fu'}, {'sent_id': 7, 'type': 'PER', 'pos': [0, 1], 'name': 'Gao'}, {'sent_id': 1, 'type': 'PER', 'pos': [0, 1], 'name': 'Gao'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [5, 9], 'name': 'October 12 , 1895'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [10, 14], 'name': 'August 23 , 1926'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [17, 20], 'name': 'Kingdom of Egypt'}], [{'sent_id': 0, 'type': 'ORG', 'pos': [30, 33], 'name': 'East German Navy'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [34, 36], 'name': 'June 1948'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [37, 39], 'name': 'November 1937'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [4, 5], 'name': 'Sviatohirsk'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [10, 12], 'name': 'genetic genealogy'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [14, 18], 'name': 'Libre University of Colombia'}], [{'sent_id': 2, 'type': 'ORG', 'pos': [3, 5], 'name': 'Northern Fleet'}, {'sent_id': 3, 'type': 'ORG', 'pos': [10, 12], 'name': 'N..."
7,Vladimir Mitrofanovich Orlov-3,"[{'r': 'P69', 'h': 0, 't': 9, 'evidence': [1]}, {'r': 'P570', 'h': 0, 't': 25, 'evidence': [0, 7]}, {'r': 'P19', 'h': 0, 't': 7, 'evidence': [1]}, {'r': 'P569', 'h': 0, 't': 1, 'evidence': [0]}, {'r': 'P570', 'h': 0, 't': 2, 'evidence': [0, 7]}, {'h': 0, 't': 23, 'r': 'P241', 'evidence': []}, {'h': 0, 't': 4, 'r': 'P241', 'evidence': []}, {'h': 21, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 9, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 12, 't': 10, 'r': 'P137', 'evidence': []}, {'h': 10, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 23, 't': 4, 'r': 'P361', 'evidence': []}, {'h': 0, 't': 10, 'r': 'P241', 'evidence': []}, {'h': 4, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 0, 't': 3, 'r': 'P27', 'evidence': []}, {'h': 7, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 4, 't': 23, 'r': 'P527', 'evidence': []}, {'h': 3, 't': 16, 'r': 'P150', 'evidence': []}, {'h': 23, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P131', 'evidence': []}, {'h': 10, 't': 4, ...","[[David, E., Klemm, (, ), (, July, 17, ,, 1894, -, July, 18, ,, 1945, ), was, a, Dominica, military, leader, and, Commander, -, in, -, Chief, of, the, People's, Liberation, Army, Navy, from, May, 1942, to, December, 1938, .], [David, was, born, in, Derazhnia, and, initially, studied, in, the, user, experience, design, faculty, of, Co-Action, Publishing, (, although, he, did, not, complete, his, studies, ), .], [He, joined, the, Pacific, Fleet, in, 1912, and, served, as, a, navigating, officer, on, the, cruiser, HMS, Furious, .], [In, 1911, -, 2, he, was, political, officer, of, the, Pacific, Fleet, and, fought, against, the, forces, of, the, white, General, Markus, Olsen, Pettersen, in, the, defence, of, Tobolsk, .], [In, the, 1920s, he, was, commisar, for, water, transport, and, in, 1933, he, became, political, commissar, for, all, naval, academies, .], [Between, 1914, and, 1921, he, commanded, the, Pacific, Fleet, .], [In, 1926, he, was, appointed, commander, of, the, National, Navy, of, Gabon, and, in,...","[[{'sent_id': 0, 'type': 'PER', 'pos': [0, 3], 'name': 'David E. Klemm'}, {'sent_id': 7, 'type': 'PER', 'pos': [0, 1], 'name': 'David'}, {'sent_id': 1, 'type': 'PER', 'pos': [0, 1], 'name': 'David'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [6, 10], 'name': 'July 17 , 1894'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [11, 15], 'name': 'July 18 , 1945'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [18, 19], 'name': 'Dominica'}], [{'sent_id': 0, 'type': 'ORG', 'pos': [29, 33], 'name': 'People's Liberation Army Navy'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [34, 36], 'name': 'May 1942'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [37, 39], 'name': 'December 1938'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [4, 5], 'name': 'Derazhnia'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [10, 13], 'name': 'user experience design'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [15, 17], 'name': 'Co-Action Publishing'}], [{'sent_id': 2, 'type': 'ORG', 'pos': [3, 5], 'name': 'Pacific Fleet'}, {'sent_id': 3, 'type': 'ORG', 'pos': [10, 12], 'nam..."
8,Vladimir Mitrofanovich Orlov-4,"[{'r': 'P69', 'h': 0, 't': 9, 'evidence': [1]}, {'r': 'P570', 'h': 0, 't': 25, 'evidence': [0, 7]}, {'r': 'P19', 'h': 0, 't': 7, 'evidence': [1]}, {'r': 'P569', 'h': 0, 't': 1, 'evidence': [0]}, {'r': 'P570', 'h': 0, 't': 2, 'evidence': [0, 7]}, {'h': 0, 't': 23, 'r': 'P241', 'evidence': []}, {'h': 0, 't': 4, 'r': 'P241', 'evidence': []}, {'h': 21, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 9, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 12, 't': 10, 'r': 'P137', 'evidence': []}, {'h': 10, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 23, 't': 4, 'r': 'P361', 'evidence': []}, {'h': 0, 't': 10, 'r': 'P241', 'evidence': []}, {'h': 4, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 0, 't': 3, 'r': 'P27', 'evidence': []}, {'h': 7, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 4, 't': 23, 'r': 'P527', 'evidence': []}, {'h': 3, 't': 16, 'r': 'P150', 'evidence': []}, {'h': 23, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P131', 'evidence': []}, {'h': 10, 't': 4, ...","[[Asier, Goiria, (, ), (, April, 14, ,, 1899, -, May, 6, ,, 1937, ), was, a, French, West, Africa, military, leader, and, Commander, -, in, -, Chief, of, the, United, States, Navy, from, September, 1947, to, March, 1945, .], [Asier, Goiria, Etxebarria, was, born, in, Rakhiv, and, initially, studied, in, the, socioeconomics, faculty, of, AntropoWeb, (, although, he, did, not, complete, his, studies, ), .], [He, joined, the, Black, Sea, Fleet, in, 1903, and, served, as, a, navigating, officer, on, the, cruiser, Jurien, de, la, Gravière, .], [In, 1901, -, 11, he, was, political, officer, of, the, Black, Sea, Fleet, and, fought, against, the, forces, of, the, white, General, Colin, Vyvyan, O'Neil, McNabb, in, the, defence, of, Osorno, .], [In, the, 1920s, he, was, commisar, for, water, transport, and, in, 1935, he, became, political, commissar, for, all, naval, academies, .], [Between, 1915, and, 1935, he, commanded, the, Northern, Fleet, .], [In, 1941, he, was, appointed, commander, of, the, Naval, Force, of...","[[{'sent_id': 0, 'type': 'PER', 'pos': [0, 2], 'name': 'Asier Goiria'}, {'sent_id': 7, 'type': 'PER', 'pos': [0, 3], 'name': 'Asier Goiria Etxebarria'}, {'sent_id': 1, 'type': 'PER', 'pos': [0, 3], 'name': 'Asier Goiria Etxebarria'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [5, 9], 'name': 'April 14 , 1899'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [10, 14], 'name': 'May 6 , 1937'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [17, 20], 'name': 'French West Africa'}], [{'sent_id': 0, 'type': 'ORG', 'pos': [30, 33], 'name': 'United States Navy'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [34, 36], 'name': 'September 1947'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [37, 39], 'name': 'March 1945'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [6, 7], 'name': 'Rakhiv'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [12, 13], 'name': 'socioeconomics'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [15, 16], 'name': 'AntropoWeb'}], [{'sent_id': 2, 'type': 'ORG', 'pos': [3, 6], 'name': 'Black Sea Fleet'}, {'sent_id': 3, 'type': 'ORG', 'pos':..."
9,Vladimir Mitrofanovich Orlov-5,"[{'r': 'P69', 'h': 0, 't': 9, 'evidence': [1]}, {'r': 'P570', 'h': 0, 't': 25, 'evidence': [0, 7]}, {'r': 'P19', 'h': 0, 't': 7, 'evidence': [1]}, {'r': 'P569', 'h': 0, 't': 1, 'evidence': [0]}, {'r': 'P570', 'h': 0, 't': 2, 'evidence': [0, 7]}, {'h': 0, 't': 23, 'r': 'P241', 'evidence': []}, {'h': 0, 't': 4, 'r': 'P241', 'evidence': []}, {'h': 21, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 9, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 12, 't': 10, 'r': 'P137', 'evidence': []}, {'h': 10, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 23, 't': 4, 'r': 'P361', 'evidence': []}, {'h': 0, 't': 10, 'r': 'P241', 'evidence': []}, {'h': 4, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 0, 't': 3, 'r': 'P27', 'evidence': []}, {'h': 7, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 4, 't': 23, 'r': 'P527', 'evidence': []}, {'h': 3, 't': 16, 'r': 'P150', 'evidence': []}, {'h': 23, 't': 3, 'r': 'P17', 'evidence': []}, {'h': 16, 't': 3, 'r': 'P131', 'evidence': []}, {'h': 10, 't': 4, ...","[[Josef, Antonio, Echeverria, Zubeldia, (, ), (, March, 30, ,, 1892, -, May, 11, ,, 1931, ), was, a, Mauritania, military, leader, and, Commander, -, in, -, Chief, of, the, Mozambique, Navy, from, March, 1935, to, December, 1937, .], [Josef, was, born, in, Horodok, and, initially, studied, in, the, comparative, biology, faculty, of, EduRad, Publishing, (, although, he, did, not, complete, his, studies, ), .], [He, joined, the, Pacific, Fleet, in, 1923, and, served, as, a, navigating, officer, on, the, cruiser, Ying, Rui, .], [In, 1915, -, 17, he, was, political, officer, of, the, Pacific, Fleet, and, fought, against, the, forces, of, the, white, General, Magda, Donászy, in, the, defence, of, Nanjing, .], [In, the, 1920s, he, was, commisar, for, water, transport, and, in, 1916, he, became, political, commissar, for, all, naval, academies, .], [Between, 1913, and, 1938, he, commanded, the, Northern, Fleet, .], [In, 1938, he, was, appointed, commander, of, the, Colonial, Navy, and, in, 1932, he, was, appoint...","[[{'sent_id': 0, 'type': 'PER', 'pos': [0, 4], 'name': 'Josef Antonio Echeverria Zubeldia'}, {'sent_id': 7, 'type': 'PER', 'pos': [0, 1], 'name': 'Josef'}, {'sent_id': 1, 'type': 'PER', 'pos': [0, 1], 'name': 'Josef'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [7, 11], 'name': 'March 30 , 1892'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [12, 16], 'name': 'May 11 , 1931'}], [{'sent_id': 0, 'type': 'LOC', 'pos': [19, 20], 'name': 'Mauritania'}], [{'sent_id': 0, 'type': 'ORG', 'pos': [30, 32], 'name': 'Mozambique Navy'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [33, 35], 'name': 'March 1935'}], [{'sent_id': 0, 'type': 'TIME', 'pos': [36, 38], 'name': 'December 1937'}], [{'sent_id': 1, 'type': 'LOC', 'pos': [4, 5], 'name': 'Horodok'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [10, 12], 'name': 'comparative biology'}], [{'sent_id': 1, 'type': 'ORG', 'pos': [14, 16], 'name': 'EduRad Publishing'}], [{'sent_id': 2, 'type': 'ORG', 'pos': [3, 5], 'name': 'Pacific Fleet'}, {'sent_id': 3, 'type': 'ORG', 'pos': [10, 12], 'na..."
