In [1]:
test_source_path = '/home/ml/cadencao/XSum/fairseq_files/test.source'
test_target_path = '/home/ml/cadencao/XSum/fairseq_files/test.target'
xsum_test_prediction = 'preds/cedar_xsum_test_sep13.hypo'

In [2]:
def read_lines(file_path):
    files = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            files.append(line.strip())
    return files

In [3]:
xsum_source = read_lines(test_source_path)
xsum_target = read_lines(test_target_path)
xsum_preds = read_lines(xsum_test_prediction)
assert len(xsum_source) == len(xsum_target) == len(xsum_preds)
print(len(xsum_source))

11301


#### Annotation

In [4]:
import spacy
nlp = spacy.load('en')

In [5]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [6]:
ex = 'Middlesbrough midfielder Federico Fazio has left the Championship club after his contract was terminated by mutual consent.'

In [7]:
# ne_tree = nltk.ne_chunk(pos_tag(word_tokenize(xsum_preds[ID])))
# # print(ne_tree)

#### Build Dataset

In [8]:
import json
import copy

from random import randint

In [9]:
xsum_test_ents = json.load(open('xsum_test_ents.json', 'r'))
xsum_pred_ents = json.load(open('xsum_pred_ents.json', 'r'))

In [10]:
print(len(xsum_test_ents))
print(len(xsum_pred_ents))

11301
11301


In [11]:
# format:
# {
#     'id': 0,
#     'pred': 'This is a test.',
#     'ents': [{'start': 62, 'end': 75, 'type': 'CARDINAL', 'ent': 'European', 'label': 0},
#              {'start': 91, 'end': 93, 'type': 'CARDINAL', 'ent': 'European', 'label': 0},
#              {'start': 109, 'end': 127, 'type': 'CARDINAL', 'ent': 'European', 'label': 0}
#             ],
#     'hallucinations': ['more than 100 jobs']
# }

annotated = [{'id': 8805,
  'pred': 'A biscuit maker has gone into administration with the loss of more than 100 jobs after the UK voted to leave the European Union.',
  'ents': [{'start': 62, 'end': 75, 'label': 'CARDINAL'},
           {'start': 91, 'end': 93, 'label': 'GPE'},
           {'start': 109, 'end': 127, 'label': 'ORG'}],
  'hallucination ents': [0],
  'hallucinations': ['more than 100 jobs'],
  'correctness': [False]},
 {'id': 9444,
  'pred': 'Twin-to-twin transfusion syndrome (TTTS) is being tracked by a hospital in Cardiff in a bid to save the lives of babies born with the condition.',
  'ents': [{'start': 35, 'end': 39, 'label': 'ORG'},
           {'start': 75, 'end': 82, 'label': 'ORG'}],
  'hallucination ents': [1],
  'hallucinations': ['in Cardiff'],
  'correctness': [False]},
 {'id': 1431,
  'pred': 'Middlesbrough midfielder Federico Fazio has left the Championship club after his contract was terminated by mutual consent.',
  'ents': [{'start': 25, 'end': 33, 'label': 'PERSON'},
           {'start': 34, 'end': 39, 'label': 'PERSON'}],
  'hallucination ents': [0, 1],
  'hallucinations': ['midfielder', 'Federico Fazio'],
  'correctness': [False, False]},
 {'id': 7079,
  'pred': "New Celtic manager Brendan Rodgers has met the club's captain for the first time as he prepares for his first game in charge of the club.",
  'ents': [{'start': 0, 'end': 10, 'label': 'NORP'},
           {'start': 19, 'end': 26, 'label': 'PERSON'},
           {'start': 27, 'end': 34, 'label': 'PERSON'},
           {'start': 70, 'end': 75, 'label': 'ORDINAL'},
           {'start': 104, 'end': 109, 'label': 'ORDINAL'}],
  'hallucination ents': [0],
  'hallucinations': ['New'],
  'correctness': [True]},
 {'id': 2427,
  'pred': 'The Catholic Church should apologise to the families of unwed mothers who died at mother-and-baby homes, the Archbishop of Dublin, Rowan Martin, has said.',
  'ents': [{'start': 0, 'end': 19, 'label': 'ORG'},
           {'start': 123, 'end': 129, 'label': 'GPE'},
           {'start': 131, 'end': 136, 'label': 'PERSON'},
           {'start': 137, 'end': 143, 'label': 'PERSON'}],
  'hallucination ents': [1, 2],
  'hallucinations': ['Dublin', 'Rowan'],
  'correctness': [False, False]},
 {'id': 6220,
  'pred': 'The body of a man whose body was found at the site of a collapsed building in Swansea has been removed from the site.',
  'ents': [{'start': 78, 'end': 85, 'label': 'ORG'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []}, # wrong, but not hallucinated
 {'id': 5860,
  'pred': 'The government has announced a voluntary higher rate of the London living wage, which will rise to £9.40 an hour from 1 July.',
  'ents': [{'start': 60, 'end': 66, 'label': 'GPE'},
   {'start': 100, 'end': 104, 'label': 'MONEY'},
   {'start': 118, 'end': 124, 'label': 'DATE'}],
  'hallucination ents': [2],
  'hallucinations': ['from 1 July'],
  'correctness': [False]},
 {'id': 10579,
  'pred': 'Sydney has marked the first anniversary of the siege at the cafe where two women were killed by a gunman in the city last year.',
  'ents': [{'start': 0, 'end': 6, 'label': 'GPE'},
   {'start': 22, 'end': 27, 'label': 'ORDINAL'},
   {'start': 71, 'end': 74, 'label': 'CARDINAL'},
   {'start': 117, 'end': 126, 'label': 'DATE'}],
  'hallucination ents': [],
  'hallucinations': ['two women'],
  'correctness': []},
 {'id': 5455,
  'pred': 'As Abid Naseer went on trial in the United States for his role in an al-Qaeda terror plot to carry out a "mass-casualty" bomb attack in Manchester, BBC News looks at what police believe would have happened if he had not been caught.',
  'ents': [{'start': 3, 'end': 14, 'label': 'ORG'},
   {'start': 32, 'end': 49, 'label': 'GPE'},
   {'start': 69, 'end': 77, 'label': 'ORG'},
   {'start': 136, 'end': 146, 'label': 'GPE'},
   {'start': 148, 'end': 156, 'label': 'ORG'}],
  'hallucination ents': [4],
  'hallucinations': [],
  'correctness': [True]},
 {'id': 1168,
  'pred': 'Belfast-based Mexican restaurant chain Boojum has been sold for an undisclosed sum to a group of investors in the Republic of Ireland.',
  'ents': [{'start': 0, 'end': 7, 'label': 'GPE'},
   {'start': 14, 'end': 21, 'label': 'NORP'},
   {'start': 39, 'end': 45, 'label': 'ORG'},
   {'start': 110, 'end': 133, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []}, # Wrong
 {'id': 2407,
  'pred': "The world's biggest asset manager, BlackRock, is planning to cut up to 1,000 jobs as part of a cost-cutting drive, according to reports.",
  'ents': [{'start': 35, 'end': 44, 'label': 'ORG'},
   {'start': 65, 'end': 76, 'label': 'CARDINAL'}],
  'hallucination ents': [1],
  'hallucinations': ['up to 1,000 jobs'],
  'correctness': [False]},
 {'id': 416,
  'pred': 'League Two side Derby County have signed Dutch striker Danny Verheydt from Dutch top-flight side Maastricht for an undisclosed fee.',
  'ents': [{'start': 0, 'end': 10, 'label': 'ORG'}, # League Two
           {'start': 16, 'end': 28, 'label': 'GPE'}, # Derby County
           {'start': 41, 'end': 46, 'label': 'NORP'}, # Dutch
           {'start': 55, 'end': 60, 'label': 'PERSON'}, # Danny
           {'start': 61, 'end': 69, 'label': 'PERSON'},
           {'start': 75, 'end': 80, 'label': 'NORP'}, # Dutch
           {'start': 97, 'end': 107, 'label': 'GPE'}],
  'hallucination ents': [0, 1, 2, 3, 5],
  'hallucinations': ['League Two', 'Derby County', 'Dutch striker Danny Verheydt', 'Dutch top-flight', 'for an undisclosed fee.'],
  'correctness': [True, True, True, False, True]},
 {'id': 6824,
  'pred': 'Leigh Centurions have agreed a deal to sign Salford Red Devils half-back Ryan Chase on a free transfer after he was cleared to leave the club following a disciplinary hearing.',
  'ents': [{'start': 0, 'end': 5, 'label': 'PERSON'},
   {'start': 6, 'end': 16, 'label': 'PERSON'},
   {'start': 44, 'end': 62, 'label': 'ORG'},
   {'start': 63, 'end': 67, 'label': 'CARDINAL'},
   {'start': 73, 'end': 77, 'label': 'PERSON'},
   {'start': 78, 'end': 83, 'label': 'PERSON'}],
  'hallucination ents': [1, 4],
  'hallucinations': ['Centurions', 'Ryan'],
  'correctness': [True, False]},
 {'id': 1513,
  'pred': 'Manchester United have failed to complete the signing of Spain goalkeeper David de Gea from Manchester United to Real Madrid in a deal that was due to be completed on Monday night, but the transfer window is still open in Spain.',
  'ents': [{'start': 0, 'end': 17, 'label': 'ORG'},
   {'start': 57, 'end': 62, 'label': 'GPE'},
   {'start': 74, 'end': 79, 'label': 'PERSON'},
   {'start': 80, 'end': 82, 'label': 'PERSON'},
   {'start': 83, 'end': 86, 'label': 'PERSON'},
   {'start': 92, 'end': 109, 'label': 'ORG'},
   {'start': 113, 'end': 124, 'label': 'ORG'},
   {'start': 167, 'end': 179, 'label': 'TIME'},
   {'start': 222, 'end': 227, 'label': 'GPE'}],
  'hallucination ents': [0, 2, 5, 7],
  'hallucinations': [],
  'correctness': [True, True, True, False]},
 {'id': 1819,
  'pred': 'The brother of one of the victims of Bloody Sunday has said plans to hold a march by former soldiers in Londonderry are "totally insensitive" to the people of the city.',
  'ents': [{'start': 104, 'end': 115, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 560,
  'pred': 'South Africa will appoint a new head coach ahead of their tour of England next month after the end of the current one-day international and Test series against Sri Lanka, reports BBC Sport.',
  'ents': [{'start': 0, 'end': 12, 'label': 'GPE'},
   {'start': 66, 'end': 73, 'label': 'GPE'},
   {'start': 74, 'end': 84, 'label': 'DATE'},
   {'start': 114, 'end': 121, 'label': 'DATE'},
   {'start': 160, 'end': 169, 'label': 'GPE'},
   {'start': 179, 'end': 188, 'label': 'ORG'}],
  'hallucination ents': [2, 5],
  'hallucinations': ['next month'],
  'correctness': [True, True]},
 {'id': 4174,
  'pred': 'A British tourist who was jailed in Malaysia for indecency after stripping off at a mountain peak has left the country on a flight to Kuala Lumpur.',
  'ents': [{'start': 2, 'end': 9, 'label': 'NORP'},
   {'start': 36, 'end': 44, 'label': 'GPE'},
   {'start': 134, 'end': 146, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 2194,
  'pred': '"I\'m a very good storyteller," says David Chase, as he tells the story of his life.',
  'ents': [{'start': 36, 'end': 41, 'label': 'PERSON'},
   {'start': 42, 'end': 47, 'label': 'PERSON'}],
  'hallucination ents': [0],
  'hallucinations': ['David'],
  'correctness': [False]},
 {'id': 2517,
  'pred': "Saturday's League One game between Oldham Athletic and Bolton Wanderers has been postponed because of a waterlogged pitch.",
  'ents': [{'start': 0, 'end': 8, 'label': 'DATE'},
           {'start': 11, 'end': 21, 'label': 'CARDINAL'},
           {'start': 35, 'end': 50, 'label': 'ORG'},  # Oldham Athletic
           {'start': 55, 'end': 71, 'label': 'ORG'}], # Bolton Wanderers
  'hallucination ents': [0, 1, 2, 3],
  'hallucinations': [],
  'correctness': [True, True, True, False]},
 {'id': 922,
  'pred': 'The way GCSEs are graded in England has failed to meet five conditions set out by the exams watchdog Ofqual, according to a new report.',
  'ents': [{'start': 28, 'end': 35, 'label': 'GPE'},
   {'start': 55, 'end': 59, 'label': 'CARDINAL'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 10066,
  'pred': 'Police investigating the disappearance of a man who went missing in May have launched a national appeal for information about his disappearance.',
  'ents': [{'start': 68, 'end': 71, 'label': 'DATE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 # mark
 {'id': 1367,
  'pred': 'Jockey David Mullins rode Rule The World to victory in the Grand National at Aintree for the first time at the age of 19.',
  'ents': [{'start': 0, 'end': 6, 'label': 'PERSON'},
   {'start': 7, 'end': 12, 'label': 'PERSON'},
   {'start': 13, 'end': 20, 'label': 'PERSON'},
   {'start': 26, 'end': 40, 'label': 'PERSON'},
   {'start': 77, 'end': 84, 'label': 'LOC'},
   {'start': 118, 'end': 120, 'label': 'DATE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 11207,
  'pred': 'The jury in the trial of the man accused of carrying out the Boston Marathon bombings has heard graphic testimony about the injuries suffered by the victims of the attack.',
  'ents': [{'start': 61, 'end': 76, 'label': 'EVENT'}],
  'hallucination ents': [0],
  'hallucinations': [],
  'correctness': [True]},
 {'id': 694,
  'pred': "Increasing levels of carbon dioxide (CO2) in the world's oceans are making the waters more acidic than at any time since the start of the industrial revolution, according to a new report.",
  'ents': [],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 3211,
  'pred': 'A man has been charged in the US state of Michigan after a student with a severe peanut allergy was allegedly forced to eat peanut butter as part of a fraternity initiation ritual.',
  'ents': [{'start': 30, 'end': 32, 'label': 'GPE'},
   {'start': 42, 'end': 50, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 10524,
  'pred': "Edinburgh Zoo's giant panda Tian Tian is expected to give birth at the end of the month, the zoo has said.",
  'ents': [{'start': 0, 'end': 15, 'label': 'ORG'},
   {'start': 28, 'end': 37, 'label': 'PERSON'},
   {'start': 67, 'end': 87, 'label': 'DATE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 10488,
  'pred': 'Pupils who are able and talented are not being given enough support in the education system, according to the chief inspector of schools at Estyn.',
  'ents': [{'start': 140, 'end': 145, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 4809,
  'pred': 'US Secretary of State Rex Tillerson has said he was "convinced" by his wife to take up the job by President Donald Trump, who asked him to be his secretary of state.',
  'ents': [{'start': 0, 'end': 2, 'label': 'GPE'},
   {'start': 16, 'end': 21, 'label': 'ORG'},
   {'start': 22, 'end': 25, 'label': 'PERSON'},
   {'start': 26, 'end': 35, 'label': 'PERSON'},
   {'start': 108, 'end': 114, 'label': 'PERSON'},
   {'start': 115, 'end': 120, 'label': 'PERSON'},
   {'start': 159, 'end': 164, 'label': 'ORG'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 6228,
  'pred': 'A group of MPs has called on the government to legalise medical cannabis after a study found that one million people across the UK rely on the drug for medical reasons, but there is limited or no convincing evidence that it works.',
  'ents': [{'start': 98, 'end': 109, 'label': 'CARDINAL'},
   {'start': 128, 'end': 130, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 10517,
  'pred': 'Police ombudsman Michael Maguire has said he welcomes the re-opening of historical investigations into allegations of wrongdoing by the PSNI in Northern Ireland.',
  'ents': [{'start': 17, 'end': 24, 'label': 'PERSON'},
   {'start': 25, 'end': 32, 'label': 'PERSON'},
   {'start': 136, 'end': 140, 'label': 'ORG'},
   {'start': 144, 'end': 160, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 7247,
  'pred': 'Dundee Stars head coach Marc LeFebvre hopes his side can learn from their experience at the Elite League play-off finals and return to the finals next season.',
  'ents': [{'start': 24, 'end': 28, 'label': 'PERSON'},
   {'start': 29, 'end': 37, 'label': 'PERSON'},
   {'start': 88, 'end': 104, 'label': 'ORG'},
   {'start': 146, 'end': 157, 'label': 'DATE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 2724,
  'pred': 'Cardiff and its city region should be given more power by the Welsh assembly to work better with the "Northern Powerhouse" region of England, the chairman of the Northern Powerhouse group has said.',
  'ents': [{'start': 0, 'end': 7, 'label': 'ORG'},
   {'start': 62, 'end': 67, 'label': 'ORG'},
   {'start': 133, 'end': 140, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 4630,
  'pred': 'Paralympic gold medallist Bethany Brown says she will not change her bow to recurve in order to qualify for the 2016 Rio Games.',
  'ents': [{'start': 26, 'end': 33, 'label': 'PERSON'},
   {'start': 34, 'end': 39, 'label': 'PERSON'},
   {'start': 112, 'end': 116, 'label': 'DATE'},
   {'start': 117, 'end': 126, 'label': 'EVENT'}],
  'hallucination ents': [0],
  'hallucinations': ['Bethany'],
  'correctness': [False]},
 {'id': 6126,
  'pred': 'The Supreme Court has ruled that the assembly has the power to set minimum wages for agricultural workers in Wales.',
  'ents': [{'start': 0, 'end': 17, 'label': 'ORG'},
   {'start': 109, 'end': 114, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 3529,
  'pred': 'The daughter of the first baby to be born on an air ambulance has joined Loganair as a cabin crew attendant, 40 years after her birth.',
  'ents': [{'start': 20, 'end': 25, 'label': 'ORDINAL'},
   {'start': 73, 'end': 81, 'label': 'ORG'},
   {'start': 109, 'end': 117, 'label': 'DATE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 11006,
  'pred': 'Bristol City moved three points clear at the top of League One with a hard-fought victory over Ipswich Town.',
  'ents': [{'start': 0, 'end': 12, 'label': 'ORG'},
   {'start': 19, 'end': 24, 'label': 'CARDINAL'},
   {'start': 95, 'end': 107, 'label': 'GPE'}],
  'hallucination ents': [1],
  'hallucinations': ['moved three points clear'],
  'correctness': [False]},
 {'id': 8447,
  'pred': 'The White House has confirmed that President Barack Obama and First Lady Michelle Obama were among the celebrities who attended a private party in New York to celebrate their wedding anniversary.',
  'ents': [{'start': 0, 'end': 15, 'label': 'ORG'},
   {'start': 45, 'end': 51, 'label': 'PERSON'},
   {'start': 52, 'end': 57, 'label': 'PERSON'},
   {'start': 73, 'end': 81, 'label': 'PERSON'},
   {'start': 82, 'end': 87, 'label': 'PERSON'},
   {'start': 147, 'end': 155, 'label': 'GPE'}],
  'hallucination ents': [7],
  'hallucinations': ['in New York', 'to celebrate their wedding anniversary'],
  'correctness': [False]},
 {'id': 9980,
  'pred': 'Tottenham Hotspur have agreed to pay £1m to the family of a 17-year-old boy who suffered brain injuries after signing for the club despite having a potentially fatal heart condition.',
  'ents': [{'start': 0, 'end': 9, 'label': 'PERSON'},
   {'start': 10, 'end': 17, 'label': 'PERSON'},
   {'start': 37, 'end': 40, 'label': 'MONEY'},
   {'start': 60, 'end': 71, 'label': 'DATE'}],
  'hallucination ents': [2, 3],
  'hallucinations': ['£1m', 'a 17-year-old'], # '17-year-old' is wrong now, now he is 27
  'correctness': [False, False]},
 {'id': 8685,
  'pred': 'League One side Wigan Athletic have signed Bournemouth midfielder Chris MacDonald on loan until the end of the season.',
  'ents': [{'start': 16, 'end': 30, 'label': 'LOC'},
  {'start': 43, 'end': 54, 'label': 'PERSON'},
  {'start': 66, 'end': 71, 'label': 'PERSON'},
  {'start': 72, 'end': 81, 'label': 'PERSON'},
  {'start': 96, 'end': 117, 'label': 'DATE'}],
  'hallucination ents': [2, 4],
  'hallucinations': ['Chris', 'on loan until the end of the season'],
  'correctness': [False, False]},
 {'id': 11080,
  'pred': 'The new head of the Public Services Complaints Commission for Wales has been appointed.',
  'ents': [{'start': 16, 'end': 67, 'label': 'ORG'}],
  'hallucination ents': [0],
  'hallucinations': ['the Public Services Complaints Commission'],
  'correctness': [False]},
 {'id': 8558,
  'pred': 'A Conservative MP has said he is "personally very disappointed" that plans to devolve tax-raising powers to the Welsh Government without a referendum have been rejected by the Welsh Secretary.',
  'ents': [{'start': 108, 'end': 128, 'label': 'ORG'},
   {'start': 176, 'end': 181, 'label': 'ORG'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 6931,
  'pred': "Prime Minister Narendra Modi's visit to Silicon Valley is the first by an Indian prime minister to the US in more than a decade, and the first since he took office in May 2014.",
  'ents': [{'start': 15, 'end': 23, 'label': 'PERSON'},
   {'start': 24, 'end': 28, 'label': 'PERSON'},
   {'start': 40, 'end': 54, 'label': 'LOC'},
   {'start': 74, 'end': 80, 'label': 'NORP'},
   {'start': 103, 'end': 105, 'label': 'GPE'},
   {'start': 109, 'end': 127, 'label': 'DATE'},
   {'start': 137, 'end': 142, 'label': 'ORDINAL'},
   {'start': 167, 'end': 175, 'label': 'DATE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 6373,
  'pred': 'Police investigating the disappearance of an RAF serviceman who went missing after a night out in Bury St Edmunds have released CCTV images of three teenagers they want to trace.',
  'ents': [{'start': 45, 'end': 48, 'label': 'ORG'},
   {'start': 83, 'end': 90, 'label': 'TIME'},
   {'start': 98, 'end': 113, 'label': 'ORG'},
   {'start': 128, 'end': 132, 'label': 'ORG'},
   {'start': 143, 'end': 148, 'label': 'CARDINAL'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 9961,
  'pred': 'Three Syrian refugees who overpowered a suspected terror suspect in the German city of Leipzig and tied him up have been hailed as "heroes" by the country\'s media and politicians.',
  'ents': [{'start': 0, 'end': 5, 'label': 'CARDINAL'},
   {'start': 6, 'end': 12, 'label': 'NORP'},
   {'start': 72, 'end': 78, 'label': 'NORP'},
   {'start': 87, 'end': 94, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 6235,
  'pred': "China's Tiangong-1 space station will burn up when it re-enters the Earth's atmosphere later this week, according to the country's space agency.",
  'ents': [{'start': 0, 'end': 5, 'label': 'GPE'},
   {'start': 68, 'end': 73, 'label': 'LOC'},
   {'start': 87, 'end': 102, 'label': 'DATE'}],
  'hallucination ents': [2],
  'hallucinations': [],
  'correctness': [False]},
 {'id': 5414,
  'pred': 'Chinese search giant Baidu has unveiled a voice-activated artificial intelligence (AI) that can be used on a smart speaker.',
  'ents': [{'start': 0, 'end': 7, 'label': 'NORP'},
   {'start': 21, 'end': 26, 'label': 'ORG'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 7987,
  'pred': "The Northern Ireland Chamber of Commerce has said that the renegotiation of the UK's relationship with the European Union (EU) is having little impact on day-to-day business in Northern Ireland.",
  'ents': [{'start': 0, 'end': 40, 'label': 'ORG'},
   {'start': 80, 'end': 82, 'label': 'GPE'},
   {'start': 103, 'end': 121, 'label': 'ORG'},
   {'start': 123, 'end': 125, 'label': 'ORG'},
   {'start': 154, 'end': 164, 'label': 'DATE'},
   {'start': 177, 'end': 193, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 8495,
  'pred': 'British pair Naomi Broady and Heather Watson were beaten 6-3 6-2 by Chan Yung-jan and Chan Shuai-Wei in the doubles final of the Hong Kong Open.',
  'ents': [{'start': 0, 'end': 7, 'label': 'NORP'},
   {'start': 13, 'end': 18, 'label': 'PERSON'},
   {'start': 19, 'end': 25, 'label': 'PERSON'},
   {'start': 30, 'end': 37, 'label': 'PERSON'},
   {'start': 38, 'end': 44, 'label': 'PERSON'},
   {'start': 57, 'end': 60, 'label': 'CARDINAL'},
   {'start': 68, 'end': 72, 'label': 'PERSON'},
   {'start': 73, 'end': 81, 'label': 'PERSON'},
   {'start': 86, 'end': 90, 'label': 'PERSON'},
   {'start': 91, 'end': 100, 'label': 'PERSON'},
   {'start': 125, 'end': 143, 'label': 'ORG'}],
  'hallucination ents': [1, 3, 5, 7, 9, 10],
  'hallucinations': ['Naomi', 'Heather', '6-3', 'Yung-jan', 'Shuai-Wei', 'the Hong Kong Open'],
  'correctness': [True, True, False, True, False, True]},
 {'id': 6486,
  'pred': 'North Korean leader Kim Jong-un has threatened to launch a "fire and fury" missile attack on the US island of Guam, in what has been described as a warning to President Donald Trump.',
  'ents': [{'start': 0, 'end': 12, 'label': 'NORP'},
   {'start': 20, 'end': 23, 'label': 'PERSON'},
   {'start': 24, 'end': 31, 'label': 'PERSON'},
   {'start': 97, 'end': 99, 'label': 'GPE'},
   {'start': 110, 'end': 114, 'label': 'GPE'},
   {'start': 169, 'end': 175, 'label': 'PERSON'},
   {'start': 176, 'end': 181, 'label': 'PERSON'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 1584,
  'pred': 'Northampton Town have been placed into administration by Northampton Borough Council.',
  'ents': [{'start': 0, 'end': 11, 'label': 'PERSON'},
   {'start': 12, 'end': 16, 'label': 'PERSON'},
   {'start': 57, 'end': 84, 'label': 'ORG'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 9526,
  'pred': "The first World Cup was held in India in 1950, and the country's first football team, the Indian Football Federation (IFF), won a place in the final of the inaugural tournament.",
  'ents': [{'start': 4, 'end': 9, 'label': 'ORDINAL'},
   {'start': 10, 'end': 19, 'label': 'EVENT'},
   {'start': 32, 'end': 37, 'label': 'GPE'},
   {'start': 41, 'end': 45, 'label': 'DATE'},
   {'start': 65, 'end': 70, 'label': 'ORDINAL'},
   {'start': 86, 'end': 116, 'label': 'ORG'},
   {'start': 118, 'end': 121, 'label': 'ORG'}],
  'hallucination ents': [3, 5, 6],
  'hallucinations': ['The first World Cup was held in India in 1950', 'Indian Football Federation (IFF)'],
  'correctness': [False, False, False]},
 {'id': 10958,
  'pred': 'Police have said the death of a man at a hotel in Edinburgh is not being treated as suspicious after his body was found.',
  'ents': [{'start': 50, 'end': 59, 'label': 'GPE'}],
  'hallucination ents': [0],
  'hallucinations': ['in Edinburgh'],
  'correctness': [True]},
 {'id': 11017,
  'pred': 'The RSPCA has called on Wrexham council to drop plans to ban dogs from play areas and sports pitches, saying it could punish responsible dog owners.',
  'ents': [{'start': 4, 'end': 9, 'label': 'ORG'},
   {'start': 24, 'end': 39, 'label': 'ORG'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 11037,
  'pred': 'A buzzard has been released back into the wild after becoming stuck in a fence during a chase in Cheshire.',
  'ents': [{'start': 97, 'end': 105, 'label': 'GPE'}],
  'hallucination ents': [0],
  'hallucinations': ['in Cheshire'],
  'correctness': [False]},
 {'id': 5148,
  'pred': 'Five children at a Neath Port Talbot nursery have been diagnosed with measles, health officials have said.',
  'ents': [{'start': 0, 'end': 4, 'label': 'CARDINAL'},
   {'start': 19, 'end': 36, 'label': 'FAC'}],
  'hallucination ents': [0],
  'hallucinations': ['Five children'],
  'correctness': [True]},
 {'id': 10380,
  'pred': 'A Palestinian woman who was seriously injured in a firebombing in the occupied West Bank that killed her toddler son and her husband has died in an Israeli hospital, her family says.',
  'ents': [{'start': 2, 'end': 13, 'label': 'NORP'},
   {'start': 79, 'end': 88, 'label': 'GPE'},
   {'start': 148, 'end': 155, 'label': 'NORP'}],
  'hallucination ents': [],
  'hallucinations': ['in a firebombing',
   'in an Israeli hospital',
   'her family says'],
  'correctness': []},
 {'id': 5747,
  'pred': 'Six teenagers have died from carbon monoxide poisoning after a party at a remote hut in northern Germany, police say.',
  'ents': [{'start': 0, 'end': 3, 'label': 'CARDINAL'},
   {'start': 97, 'end': 104, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 10943,
  'pred': 'More than 100 people have died in Vanuatu after a cyclone hit the Pacific island nation on Friday, officials say.',
  'ents': [{'start': 0, 'end': 13, 'label': 'CARDINAL'},
   {'start': 34, 'end': 41, 'label': 'GPE'},
   {'start': 66, 'end': 80, 'label': 'LOC'},
   {'start': 91, 'end': 97, 'label': 'DATE'}],
  'hallucination ents': [0, 3],
  'hallucinations': ['More than 100 people have died', 'on Friday'],
  'correctness': [False, False]},
 {'id': 9369,
  'pred': 'A Canadian man has described the moment he was struck by lightning during his son-in-law\'s wedding ceremony - just as he was about to tell him he was "a lucky guy".',
  'ents': [{'start': 2, 'end': 10, 'label': 'NORP'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 8242,
  'pred': "Nigeria's main opposition party has lost control of all but one of the country's 36 states in the first elections since the end of military rule in 1999.",
  'ents': [{'start': 0, 'end': 7, 'label': 'GPE'},
   {'start': 60, 'end': 63, 'label': 'CARDINAL'},
   {'start': 81, 'end': 83, 'label': 'CARDINAL'},
   {'start': 98, 'end': 103, 'label': 'ORDINAL'},
   {'start': 148, 'end': 152, 'label': 'DATE'}],
  'hallucination ents': [2],
  'hallucinations': ["lost control of all but one of the country's 36 states",
   'in the first elections'],
  'correctness': [True]},
 {'id': 9185,
  'pred': 'A pine marten has given birth to its first baby in Wales in two years, according to a wildlife charity.',
  'ents': [{'start': 37, 'end': 42, 'label': 'ORDINAL'},
   {'start': 51, 'end': 56, 'label': 'GPE'},
   {'start': 60, 'end': 69, 'label': 'DATE'}],
  'hallucination ents': [],
  'hallucinations': ['in two years'],
  'correctness': []},
 {'id': 3611,
  'pred': "A Rwandan man who called for the mass murder of Tutsi people during the country's genocide has been sentenced to life in prison by a court in Rwanda.",
  'ents': [{'start': 2, 'end': 9, 'label': 'NORP'},
   {'start': 142, 'end': 148, 'label': 'GPE'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 1754,
  'pred': 'Prince William has said he takes his duties as a monarch "very seriously" but does not take them "completely to heart" as he prepares to take on more responsibility.',
  'ents': [{'start': 0, 'end': 6, 'label': 'PERSON'},
   {'start': 7, 'end': 14, 'label': 'PERSON'}],
  'hallucination ents': [],
  'hallucinations': [],
  'correctness': []},
 {'id': 10244,
  'pred': 'A man has been arrested in China for allegedly driving a Lamborghini at more than twice the legal speed limit around Beijing, state media report.',
  'ents': [{'start': 27, 'end': 32, 'label': 'GPE'},
   {'start': 57, 'end': 68, 'label': 'LOC'},
   {'start': 117, 'end': 124, 'label': 'GPE'}],
  'hallucination ents': [1],
  'hallucinations': ['driving a Lamborghini', 'more than twice'],
  'correctness': [False]},
 {'id': 4850,
 'pred': 'The deaths of two pensioners at their home in Ballycastle, County Antrim, are being treated as unexplained, police have said.',
 'ents': [{'start': 14, 'end': 17, 'label': 'CARDINAL'},
  {'start': 46, 'end': 57, 'label': 'GPE'},
  {'start': 59, 'end': 72, 'label': 'PERSON'}],
 'hallucination ents': [2],
 'hallucinations': [],
 'correctness': [True]},
 {'id': 1575,
 'pred': 'Rules requiring schools to promote "fundamental British values", including the role of law, are "deeply ill-considered" and "political posturing", a teachers\' union has warned.',
 'ents': [{'start': 48, 'end': 55, 'label': 'NORP'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 5686,
 'pred': 'Politicians on both sides of the EU referendum will have to work hard to ensure that the public is not left disappointed by the outcome of the referendum.',
 'ents': [{'start': 33, 'end': 35, 'label': 'ORG'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 9254,
 'pred': 'Two men have been found guilty of murdering a man who was shot dead after leaving a pub in Greater Manchester.',
 'ents': [{'start': 0, 'end': 3, 'label': 'CARDINAL'},
  {'start': 91, 'end': 109, 'label': 'GPE'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []}, # wrong?
 {'id': 2759,
 'pred': 'A chameleon has been put down by RSPCA Cymru after it was found injured and abandoned in a Cardiff park.',
 'ents': [{'start': 33, 'end': 44, 'label': 'ORG'},
  {'start': 91, 'end': 98, 'label': 'ORG'}],
 'hallucination ents': [1],
 'hallucinations': [],
 'correctness': [True]},
 {'id': 8953,
 'pred': 'The BBC is to create a new Ideas Service to bring the "best from Britain\'s great cultural institutions and thinkers" to everyone, as part of a major overhaul of the corporation.',
 'ents': [{'start': 4, 'end': 7, 'label': 'ORG'},
  {'start': 27, 'end': 40, 'label': 'ORG'},
  {'start': 65, 'end': 72, 'label': 'GPE'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 7619,
 'pred': 'An NHS trust has been ordered to pay £1.6m in damages to the family of a baby girl who was left paralysed after being born with severe brain injuries.',
 'ents': [{'start': 3, 'end': 6, 'label': 'ORG'},
  {'start': 37, 'end': 42, 'label': 'MONEY'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 700,
 'pred': 'The war on drugs in the United States is at a draw, with more than 600,000 people in jail, $40bn (Â£27bn) of spending a year in prison, and the number of people caught up in the drug trade increasing by the day.',
 'ents': [{'start': 20, 'end': 37, 'label': 'GPE'},
  {'start': 57, 'end': 74, 'label': 'CARDINAL'},
  {'start': 91, 'end': 96, 'label': 'MONEY'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 3379,
 'pred': 'Stagecoach has announced plans to invest more than £100m in new buses and coaches over the next three years.',
 'ents': [{'start': 0, 'end': 10, 'label': 'ORG'},
  {'start': 41, 'end': 56, 'label': 'MONEY'},
  {'start': 87, 'end': 107, 'label': 'DATE'}],
 'hallucination ents': [1, 2],
 'hallucinations': ['more than £100m', 'over the next three years'],
 'correctness': [False, False]},
 {'id': 1110,
 'pred': 'Chelsea manager Jose Mourinho said the Premier League is "not the best league in the world" after his side\'s 1-0 defeat by West Ham was like "football from the 19th Century".',
 'ents': [{'start': 0, 'end': 7, 'label': 'ORG'},
  {'start': 16, 'end': 20, 'label': 'PERSON'},
  {'start': 21, 'end': 29, 'label': 'PERSON'},
  {'start': 35, 'end': 53, 'label': 'ORG'},
  {'start': 109, 'end': 112, 'label': 'CARDINAL'},
  {'start': 123, 'end': 131, 'label': 'LOC'},
  {'start': 156, 'end': 172, 'label': 'DATE'}],
 'hallucination ents': [4],
 'hallucinations': ['1-0 defeat by West Ham'],
 'correctness': [False]},
 {'id': 3180,
 'pred': 'Burton Albion goalkeeper Connor McLaughlin is one of eight players who will leave the club at the end of the season after being released by the Championship club.',
 'ents': [{'start': 0, 'end': 13, 'label': 'ORG'},
  {'start': 25, 'end': 31, 'label': 'PERSON'},
  {'start': 32, 'end': 42, 'label': 'PERSON'},
  {'start': 53, 'end': 58, 'label': 'CARDINAL'},
  {'start': 94, 'end': 115, 'label': 'DATE'}],
 'hallucination ents': [1, 3, 4],
 'hallucinations': ['Connor', 'eight players', 'at the end of the season'],
 'correctness': [False, False, True]},
 {'id': 7021,
 'pred': 'A woman has been given a suspended prison sentence for defrauding a charity set up in memory of a Cumbrian teenager who died from cancer.',
 'ents': [{'start': 98, 'end': 106, 'label': 'NORP'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 7021,
 'pred': 'A woman has been given a suspended prison sentence for defrauding a charity set up in memory of a Cumbrian teenager who died from cancer.',
 'ents': [{'start': 98, 'end': 106, 'label': 'NORP'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 3030,
 'pred': "Former England and Tottenham midfielder David Ginola has launched his bid to challenge Sepp Blatter for the presidency of world football's governing body Fifa.",
 'ents': [{'start': 7, 'end': 14, 'label': 'GPE'},
  {'start': 19, 'end': 28, 'label': 'ORG'},
  {'start': 40, 'end': 45, 'label': 'PERSON'},
  {'start': 46, 'end': 52, 'label': 'PERSON'},
  {'start': 87, 'end': 91, 'label': 'PERSON'},
  {'start': 92, 'end': 99, 'label': 'PERSON'},
  {'start': 154, 'end': 158, 'label': 'PERSON'}],
 'hallucination ents': [0, 4],
 'hallucinations': ['England'],
 'correctness': [False, True]},
 {'id': 8461,
 'pred': 'Nicola Sturgeon has said she is "angry" at the UK government\'s "walk-on-by attitude" to the refugee crisis after the death of a three-year-old Syrian boy washed up on a beach in Turkey.',
 'ents': [{'start': 0, 'end': 6, 'label': 'PERSON'},
  {'start': 7, 'end': 15, 'label': 'PERSON'},
  {'start': 47, 'end': 49, 'label': 'GPE'},
  {'start': 128, 'end': 142, 'label': 'DATE'},
  {'start': 143, 'end': 149, 'label': 'NORP'},
  {'start': 178, 'end': 184, 'label': 'GPE'}],
 'hallucination ents': [0, 3, 4, 5],
 'hallucinations': ['three-year-old'],
 'correctness': [True, True, True, True]},
 {'id': 191,
 'pred': 'Work to improve a roundabout on the A55 and A483 in Chester has been delayed for a second time, causing "chaos" for motorists.',
 'ents': [{'start': 36, 'end': 39, 'label': 'ORG'},
  {'start': 44, 'end': 48, 'label': 'ORG'},
  {'start': 52, 'end': 59, 'label': 'GPE'},
  {'start': 83, 'end': 89, 'label': 'ORDINAL'}],
 'hallucination ents': [3],
 'hallucinations': ['for a second time'],
 'correctness': [False]},
{'id': 8770,
 'pred': 'Everton have signed Oxford United midfielder Luke Kenny on loan until the end of the season, with the option of a permanent move.',
 'ents': [{'start': 0, 'end': 7, 'label': 'GPE'},
  {'start': 20, 'end': 33, 'label': 'ORG'},
  {'start': 45, 'end': 49, 'label': 'PERSON'},
  {'start': 50, 'end': 55, 'label': 'PERSON'},
  {'start': 70, 'end': 91, 'label': 'DATE'}],
 'hallucination ents': [0, 1, 2, 4],
 'hallucinations': ['Everton', 'Oxford United', 'midfielder', 'Luke', 'on loan', 'until the end of the season', 'with the option of a permanent move'],
 'correctness': [False, True, False, True]},
{'id': 5620,
 'pred': 'A second sinkhole has opened up in Manchester city centre, the latest in a series of sinkholes to open up in the city.',
 'ents': [{'start': 2, 'end': 8, 'label': 'ORDINAL'},
  {'start': 35, 'end': 45, 'label': 'GPE'}],
 'hallucination ents': [0],
 'hallucinations': ['second'],
 'correctness': [False]},
 {'id': 1948,
 'pred': 'A mother and her partner have appeared in court charged in connection with the death of a six-year-old boy who was found dead at a water park.',
 'ents': [{'start': 90, 'end': 102, 'label': 'DATE'}],
 'hallucination ents': [0],
 'hallucinations': [],
 'correctness': [False]},
 {'id': 10,
 'pred': 'A hospital where a patient died and another was left sitting on a bedpan for more than an hour in the emergency department has been ordered to improve its A&E services after inspectors found poor standards of care.',
 'ents': [{'start': 77, 'end': 94, 'label': 'TIME'},
  {'start': 155, 'end': 158, 'label': 'ORG'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 2156,
 'pred': "The public has been asked to vote for their favourite bird to be named as the UK's national bird ahead of the general election.",
 'ents': [{'start': 78, 'end': 80, 'label': 'GPE'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 2991,
 'pred': "A man who hid his girlfriend's body in a cupboard and took her benefits for several months after her death has been jailed.",
 'ents': [{'start': 76, 'end': 90, 'label': 'DATE'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 3088,
 'pred': "Adele's new single, Hello, has become the fastest-selling single of the year in the UK, with more than 200,000 downloads in its first week of release.",
 'ents': [{'start': 0, 'end': 5, 'label': 'PERSON'},
  {'start': 68, 'end': 76, 'label': 'DATE'},
  {'start': 84, 'end': 86, 'label': 'GPE'},
  {'start': 93, 'end': 110, 'label': 'CARDINAL'},
  {'start': 124, 'end': 138, 'label': 'DATE'}],
 'hallucination ents': [3],
 'hallucinations': ['the fastest-selling single of the year', 'more than 200,000 downloads in its first week of release'],
 'correctness': [True]},
 {'id': 10306,
 'pred': 'Taliban leader Mullah Akhtar Mansour and his bodyguards have been shot dead in the Pakistani city of Quetta, officials say, in the first such incident since his appointment last month.',
 'ents': [{'start': 0, 'end': 7, 'label': 'ORG'},
  {'start': 15, 'end': 21, 'label': 'PERSON'},
  {'start': 22, 'end': 28, 'label': 'PERSON'},
  {'start': 29, 'end': 36, 'label': 'PERSON'},
  {'start': 83, 'end': 92, 'label': 'NORP'},
  {'start': 101, 'end': 107, 'label': 'GPE'},
  {'start': 131, 'end': 136, 'label': 'ORDINAL'},
  {'start': 173, 'end': 183, 'label': 'DATE'}],
 'hallucination ents': [2],
 'hallucinations': ['have been shot dead', 'officials say', 'Akhtar'],
 'correctness': [True]},
 {'id': 5290,
 'pred': 'A hacking group based in North Korea is believed to have been behind the global ransomware attack that hit the NHS in May, the BBC has learned.',
 'ents': [{'start': 25, 'end': 36, 'label': 'GPE'},
  {'start': 111, 'end': 114, 'label': 'ORG'},
  {'start': 118, 'end': 121, 'label': 'DATE'},
  {'start': 127, 'end': 130, 'label': 'ORG'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []},
 {'id': 7871,
 'pred': "A man in his 20s has been arrested on suspicion of attempted murder after a man was stabbed outside a McDonald's in Londonderry.",
 'ents': [{'start': 13, 'end': 16, 'label': 'DATE'},
  {'start': 102, 'end': 110, 'label': 'ORG'},
  {'start': 116, 'end': 127, 'label': 'GPE'}],
 'hallucination ents': [1, 2],
 'hallucinations': ['has been arrested', "outside a McDonald's in Londonderry."],
 'correctness': [True, False]},
 {'id': 6041,
 'pred': 'The owner of Merlin Entertainments is to appear in court over a rollercoaster crash at Alton Towers in which five people were injured.',
 'ents': [{'start': 13, 'end': 34, 'label': 'ORG'},
  {'start': 87, 'end': 99, 'label': 'ORG'},
  {'start': 109, 'end': 113, 'label': 'CARDINAL'}],
 'hallucination ents': [0, 1],
 'hallucinations': ['The owner of Merlin Entertainments is to appear in court', 'at Alton Towers'],
 'correctness': [True, True]},
 {'id': 3466,
 'pred': 'The body of a man has been found off the coast of St Fergus in Aberdeenshire, police have confirmed.',
 'ents': [{'start': 50, 'end': 59, 'label': 'GPE'},
  {'start': 63, 'end': 76, 'label': 'GPE'}],
 'hallucination ents': [1],
 'hallucinations': ['in Aberdeenshire'],
 'correctness': [True]},
 {'id': 5706,
 'pred': "Kenya's main opposition leader, Raila Odinga, has called for a one-day strike on Monday to protest against the result of last week's presidential election.",
 'ents': [{'start': 0, 'end': 5, 'label': 'GPE'},
  {'start': 32, 'end': 37, 'label': 'PERSON'},
  {'start': 38, 'end': 44, 'label': 'PERSON'},
  {'start': 63, 'end': 70, 'label': 'DATE'},
  {'start': 81, 'end': 87, 'label': 'DATE'},
  {'start': 121, 'end': 132, 'label': 'DATE'}],
 'hallucination ents': [1, 3, 4],
 'hallucinations': ['Raila', 'one-day strike on Monday'],
 'correctness': [True, False, False]},
 {'id': 8702,
 'pred': 'Eritrea has accused the United Nations of carrying out an "unprecedented attack" on its government after it published a report accusing the Eritrean government of human rights abuses.',
 'ents': [{'start': 0, 'end': 7, 'label': 'GPE'},
  {'start': 20, 'end': 38, 'label': 'ORG'},
  {'start': 140, 'end': 148, 'label': 'NORP'}],
 'hallucination ents': [],
 'hallucinations': ['"unprecedented attack"'],
 'correctness': []},
 {'id': 9128,
 'pred': 'More than a quarter of teachers in Wales have been banned from the profession for misconduct involving sexual content on social media in the past three years.',
 'ents': [{'start': 0, 'end': 19, 'label': 'DATE'},
  {'start': 35, 'end': 40, 'label': 'GPE'},
  {'start': 137, 'end': 157, 'label': 'DATE'}],
 'hallucination ents': [0, 2],
 'hallucinations': ['More than a quarter of ', 'in the past three years.'],
 'correctness': [False, False]}
]

In [12]:
len(annotated)

95

In [13]:
ID = 8770
print(ID)

8770


In [14]:
new_dict = {}
new_dict['id'] = xsum_pred_ents[ID]['id']
new_dict['pred'] = xsum_pred_ents[ID]['pred']
new_dict['ents'] = []

for ent in nlp(new_dict['pred']).to_json()['ents']:
    if ent['label'] != 'PERSON':
        new_dict['ents'].append(ent)
    else:
        name_parts = xsum_pred_ents[ID]['pred'][ent['start']: ent['end']].split()
        init_start = ent['start']
        for p in name_parts:
            new_dict['ents'].append({'start': init_start, 'end': init_start + len(p), 'label': 'PERSON'})
            init_start = init_start + len(p) + 1

new_dict['hallucination ents'] = []
new_dict['hallucinations'] = []
new_dict['correctness'] = []

In [15]:
new_dict

{'id': 8770,
 'pred': 'Everton have signed Oxford United midfielder Luke Kenny on loan until the end of the season, with the option of a permanent move.',
 'ents': [{'start': 0, 'end': 7, 'label': 'GPE'},
  {'start': 20, 'end': 33, 'label': 'ORG'},
  {'start': 45, 'end': 49, 'label': 'PERSON'},
  {'start': 50, 'end': 55, 'label': 'PERSON'},
  {'start': 70, 'end': 91, 'label': 'DATE'}],
 'hallucination ents': [],
 'hallucinations': [],
 'correctness': []}

In [16]:
for i, e in enumerate(new_dict['ents']):
    print('{} - {}'.format(i, new_dict['pred'][e['start']: e['end']]))

print()
print(xsum_target[ID])
print()
print(xsum_preds[ID])
print()
print(xsum_source[ID])

0 - Everton
1 - Oxford United
2 - Luke
3 - Kenny
4 - the end of the season

Oxford United have extended the loan of right-back Jonjoe Kenny from Premier League side Everton until the end of the season.

Everton have signed Oxford United midfielder Luke Kenny on loan until the end of the season, with the option of a permanent move.

The 19-year-old will now be eligible to play against Barnsley in the Football League Trophy final on Sunday. Kenny, who joined up with the England Under-20 squad over Easter, has so far made 13 appearances for the U's. "He has done really well for us and is enjoying his football," said Oxford head coach Michael Appleton. "He is highly thought of at Goodison Park and has a big future."


In [17]:
# import json

# with open('annotated.json', 'w') as fout:
#     json.dump(annotated , fout)