In [1]:
import json

#### Read Google Dataset

In [3]:
factuality_data = json.load(open('../Dataset/xsum_hallucination_annotations/factuality_annotations_xsum_summaries.json'))
hallucination_data = json.load(open('../Dataset/xsum_hallucination_annotations/hallucination_annotations_xsum_summaries.json'))

In [4]:
print(len(factuality_data))
print(len(hallucination_data))

5597
11185


In [5]:
factuality_data[0]

{'bbcid': 29911712,
 'system': 'BERTS2S',
 'summary': 'more than 50 pupils at a bristol academy have been sent home from school because of a lack of uniform.',
 'is_factual': 'no',
 'worker_id': 'wid_0'}

In [6]:
hallucination_data[0]

{'bbcid': 34687720,
 'system': 'BERTS2S',
 'summary': 'rory mcilroy will take a one-shot lead into the final round of the wgc-hsbc champions after carding a three-under',
 'hallucination_type': 'extrinsic',
 'hallucinated_span': 'rory mcilroy will take a one-shot lead into the final round of the wgc-hsbc champions after carding a three-under',
 'worker_id': 'wid_0'}

#### 

#### Read Calculated Probability

In [7]:
google_data_with_proba = json.load(open('google_data_with_proba.json'))

In [8]:
google_data_with_proba['34687720']['Gold']

{'summary': 'rory mcilroy moved to within a shot of joint leaders victor dubuisson and jaco van zyl after the third round of the turkish airlines open.',
 'summary_upper': 'Rory McIlroy moved to within a shot of joint leaders Victor Dubuisson and Jaco van Zyl after the third round of the Turkish Airlines open .',
 'ents': [{'start': 0,
   'end': 4,
   'label': -1,
   'type': 'PERSON',
   'ent': 'Rory',
   'prior': 0.380859375,
   'posterior': 0.93017578125},
  {'start': 5,
   'end': 12,
   'label': -1,
   'type': 'PERSON',
   'ent': 'McIlroy',
   'prior': 0.9189453125,
   'posterior': 0.78173828125},
  {'start': 53,
   'end': 59,
   'label': -1,
   'type': 'PERSON',
   'ent': 'Victor',
   'prior': 0.0023136138916015625,
   'posterior': 0.000522613525390625},
  {'start': 60,
   'end': 69,
   'label': -1,
   'type': 'PERSON',
   'ent': 'Dubuisson',
   'prior': 0.97119140625,
   'posterior': 0.82958984375},
  {'start': 74,
   'end': 78,
   'label': -1,
   'type': 'PERSON',
   'ent': 'Jaco

#### Claculate Factuality Correlation

In [11]:
factuality_data[1]

{'bbcid': 29911712,
 'system': 'BERTS2S',
 'summary': 'more than 50 pupils at a bristol academy have been sent home from school because of a lack of uniform.',
 'is_factual': 'no',
 'worker_id': 'wid_1'}

In [10]:
factuality = {}
for i, f in enumerate(factuality_data):
    if f['bbcid'] not in factuality:
        factuality[f['bbcid']] = {}
    if f['system'] not in factuality[f['bbcid']]:
        factuality[f['bbcid']][f['system']] = []
        
    if f['is_factual'] == 'yes':
        factuality[f['bbcid']][f['system']].append(True)
    elif f['is_factual'] == 'no':
        factuality[f['bbcid']][f['system']].append(False)
    elif f['is_factual'] is None:
        factuality[f['bbcid']][f['system']].append(False)
    else:
        print(i)
        raise Exception('Unkown Label: {}'.format(f['is_factual']))

In [88]:
def factuality_score(scores):
    if None in scores: return 0.0
    return sum(scores) / len(scores)
#     if sum(scores) * 2 >= len(scores):
#         return 1.0
#     else:
#         return 0.0

In [132]:
def factuality_score_proba(ents):
    posteriors = []
    if len(ents) == 0:
        return 0.5
    for e in ents:
        posteriors.append(e['posterior'])
    return min(posteriors)

In [133]:
print(factuality[29911712]['BERTS2S'])
factuality_score(factuality[29911712]['BERTS2S'])

[False, False, False]


0.0

In [134]:
print(google_data_with_proba['29911712']['BERTS2S'])
factuality_score_proba(google_data_with_proba['29911712']['BERTS2S']['ents'])

{'summary': 'more than 50 pupils at a bristol academy have been sent home from school because of a lack of uniform.', 'summary_upper': 'More than 50 pupils at a Bristol Academy have been sent home from school because of a lack of uniform .', 'ents': [{'start': 0, 'end': 12, 'label': 0, 'type': 'CARDINAL', 'ent': 'More than 50', 'prior': 0.0021419525146484375, 'posterior': 0.0176849365234375}, {'start': 25, 'end': 40, 'label': 0, 'type': 'ORG', 'ent': 'Bristol Academy', 'prior': 3.0994415283203125e-06, 'posterior': 0.0010528564453125}]}


0.0010528564453125

In [135]:
human_factuality_scores = []
model_factuality_scores = []

for bbcid in factuality:
    if bbcid == 33928888 or bbcid == 39553812: continue
    for system in factuality[bbcid]:
        human_factuality_scores.append(factuality_score(factuality[bbcid][system]))
        model_factuality_scores.append(factuality_score_proba(google_data_with_proba[str(bbcid)][system]['ents']))
        
assert len(human_factuality_scores) == len(model_factuality_scores)

In [136]:
human_factuality_scores[:10]

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

In [137]:
model_factuality_scores[:10]

[0.0010528564453125,
 6.377696990966797e-06,
 0.00458526611328125,
 0.5,
 0.833984375,
 0.026458740234375,
 6.300210952758789e-05,
 0.0011138916015625,
 1.5795230865478516e-05,
 2.2590160369873047e-05]

In [138]:
from scipy import stats

In [139]:
stats.spearmanr(human_factuality_scores, model_factuality_scores)

SpearmanrResult(correlation=0.15243036060606607, pvalue=3.843634366747758e-11)

#### Calculate Hallucination Correlation

In [140]:
hallucination_data[0]

{'bbcid': 34687720,
 'system': 'BERTS2S',
 'summary': 'rory mcilroy will take a one-shot lead into the final round of the wgc-hsbc champions after carding a three-under',
 'hallucination_type': 'extrinsic',
 'hallucinated_span': 'rory mcilroy will take a one-shot lead into the final round of the wgc-hsbc champions after carding a three-under',
 'worker_id': 'wid_0'}

In [141]:
hallucination = {}
for h in hallucination_data:
    if h['bbcid'] not in hallucination: hallucination[h['bbcid']] = {}
    if h['hallucinated_span'] is None:
        hallucination[h['bbcid']][h['system']] = False
    else:
        hallucination[h['bbcid']][h['system']] = True

In [142]:
human_hallucination_scores = []
model_hallucination_scores = []

for bbcid in hallucination:
    if bbcid == 33928888 or bbcid == 39553812: continue
    for system in hallucination[bbcid]:
        if hallucination[bbcid][system]:
            human_hallucination_scores.append(0)
        else:
            human_hallucination_scores.append(1)
        model_hallucination_scores.append(factuality_score_proba(google_data_with_proba[str(bbcid)][system]['ents']))
        
assert len(human_hallucination_scores) == len(model_hallucination_scores)

In [143]:
stats.spearmanr(human_hallucination_scores, model_hallucination_scores)

SpearmanrResult(correlation=0.13230240288473855, pvalue=3.409231248510517e-11)