In [1]:
import json
import math
import torch
import torch.nn as nn

from fairseq.models.bart import BARTModel
from utils import read_lines

from transformers import BartTokenizer

In [3]:
PATH = json.load(open('../path_config.json'))

In [4]:
# posterior_bart = BARTModel.from_pretrained(PATH['bart.large.xsum'],
#                                            checkpoint_file='model.pt',
#                                            data_name_or_path=PATH['bart.large.xsum'])

posterior_bart = BARTModel.from_pretrained(PATH['xsum_cmlm_bos'],
                                           checkpoint_file='checkpoint_best.pt',
                                           data_name_or_path=PATH['data_name_or_path'])

# posterior_bart = BARTModel.from_pretrained(PATH['xsum_cmlm_scratch_cedar_warmup_10000'],
#                                            checkpoint_file='checkpoint_best.pt',
#                                            data_name_or_path=PATH['data_name_or_path'])

In [5]:
prior_bart = BARTModel.from_pretrained(PATH['bart.large'],
                                       checkpoint_file='model.pt',
                                       data_name_or_path=PATH['bart.large'])

# prior_bart = BARTModel.from_pretrained(PATH['cnndm_cmlm_cedar'],
#                                        checkpoint_file='checkpoint_best.pt',
#                                        data_name_or_path=PATH['data_name_or_path'])

# prior_bart = BARTModel.from_pretrained(PATH['cnndm_cmlm_scratch_cedar_warmup_20000'],
#                                        checkpoint_file='checkpoint_best.pt',
#                                        data_name_or_path=PATH['data_name_or_path'])

#### Read XSum

In [6]:
document_path = PATH['xsum_fariseq'] + '/test.source'
target_path = PATH['xsum_fariseq'] + '/test.target'
xsum_source = read_lines(document_path)
xsum_target = read_lines(target_path)
print(len(xsum_source))
assert len(xsum_source) == len(xsum_target)

11301


#### Generate Summary

In [11]:
from model import ConditionalSequenceGenerator
from utils import prepare_cmlm_inputs, prepare_mlm_inputs, prepare_clm_inputs, get_cmlm_probability, get_prior_probability

In [12]:
from utils import prepare_clm_inputs, prepare_mlm_inputs, prepare_cmlm_inputs

#### Test One Example

In [13]:
import spacy

nlp = spacy.load('en_core_web_sm')

In [14]:
INDEX = 9444

source = xsum_source[INDEX]
target = 'Twin-to-twin transfusion syndrome (TTTS) is being tracked by a hospital in Cardiff in a bid to save the lives of babies born with the condition.'

In [15]:
ent_parts = [{'start': 35, 'end': 39, 'label': 0, 'type': 'ORG', 'ent': 'TTTS'},
             {'start': 75, 'end': 82, 'label': 2, 'type': 'LOC', 'ent': 'Cardiff'}]

for e in ent_parts:
    print('{} - {}'.format(e, target[e['start']: e['end']]))

{'start': 35, 'end': 39, 'label': 0, 'type': 'ORG', 'ent': 'TTTS'} - TTTS
{'start': 75, 'end': 82, 'label': 2, 'type': 'LOC', 'ent': 'Cardiff'} - Cardiff


In [16]:
prior_model = ConditionalSequenceGenerator(prior_bart)
posterior_model = ConditionalSequenceGenerator(posterior_bart)

pri_args = prepare_mlm_inputs(source, target, ent_parts)
pos_args = prepare_cmlm_inputs(source, target, ent_parts)

prior_probs = get_prior_probability(prior_model, pri_args[0], pri_args[1], pri_args[2], pri_args[3])
posterior_probs = get_cmlm_probability(posterior_model, pos_args[0], pos_args[1], pos_args[2], pos_args[3])

assert len(prior_probs) == len(posterior_probs)

In [17]:
print('- prior: {}'.format(prior_probs))
print('- posterior: {}'.format(posterior_probs))

- prior: [0.0028667449951171875, 0.0011749267578125]
- posterior: [0.77490234375, 0.1083984375]


#### Read QA Data

In [18]:
from utils import read_jsonl

In [19]:
cnndm_data = read_jsonl('/home/mcao610/Missing_information/Dataset/QA_dataset/mturk_cnndm_truecase.jsonl')
xsum_data = read_jsonl('/home/mcao610/Missing_information/Dataset/QA_dataset/mturk_xsum_truecase.jsonl')

In [20]:
print(len(xsum_data))
print(len(cnndm_data))

239
235


In [21]:
print(xsum_data[0].keys())
print(xsum_data[3]['summary_sentences'])

dict_keys(['article', 'summary_sentences'])
[{'sentence': 'Former Leyton orient striker Dean Cox says he will have to wait four months to play in the English football league .', 'responses': [{'worker_id': 2, 'response': 'no'}, {'worker_id': 7, 'response': 'yes'}, {'worker_id': 1, 'response': 'no'}]}]


#### Calculate Probability for Each Entity

In [22]:
from tqdm import tqdm

In [23]:
def split_name(ent_parts, summary):
    new_parts = []
    for ent in ent_parts:
        if ent['label'] != 'PERSON':
            new_parts.append(ent)
        else:
            name_parts = summary[ent['start']: ent['end']].split()
            init_start = ent['start']
            for p in name_parts:
                new_parts.append({'start': init_start, 'end': init_start + len(p), 'label': 'PERSON', 'ent': p})
                init_start = init_start + len(p) + 1
    return new_parts

In [24]:
# split_name([{'start': 0,
#    'end': 11,
#    'label': 'PERSON',
#    'ent': 'Warren Sapp',
#    'prior': 0.0003418922424316406,
#    'posterior': 0.01715087890625},
#   {'start': 89,
#    'end': 92,
#    'label': 'MONEY',
#    'ent': '600',
#    'prior': 0.9267578125,
#    'posterior': 0.775390625}], "Warren Sapp admits he paid for oral sex and that ` everyone got naked ' after he ` put $ 600 on the table ' in his hotel room .")

In [25]:
data = xsum_data

for INDEX in tqdm(range(len(data))):
    source = data[INDEX]['article']
    target = data[INDEX]['summary_sentences'][0]['sentence']
    ent_parts = nlp(target).to_json()['ents']
    for e in ent_parts:
        e['ent'] = target[e['start']: e['end']]
#     data[INDEX]['ents'] = split_name(ent_parts, target)
    data[INDEX]['ents'] = ent_parts
    
    if len(data[INDEX]['ents']) > 0:
        pri_args = prepare_mlm_inputs(source, target, data[INDEX]['ents'])
        pos_args = prepare_cmlm_inputs(source, target, data[INDEX]['ents'])
        
#         if INDEX == 4:
#             print(pri_args)
#             outputs = prior_model.generate(pri_args[0], tgt_input=None)
#             init_input, tokens, token_probs = outputs
#             print(tokens)
            
        prior_probs = get_prior_probability(prior_model, pri_args[0], pri_args[1], pri_args[2], pri_args[3])
        posterior_probs = get_cmlm_probability(posterior_model, pos_args[0], pos_args[1], pos_args[2], pos_args[3])

        assert len(prior_probs) == len(posterior_probs) == len(data[INDEX]['ents']), "{};\n {};\n {}".format(prior_probs, posterior_probs, data[INDEX]['ents'])
        for i in range(len(prior_probs)):
            data[INDEX]['ents'][i]['prior'] = prior_probs[i]
            data[INDEX]['ents'][i]['posterior'] = posterior_probs[i]

100%|██████████| 239/239 [02:59<00:00,  1.33it/s]


In [42]:
for d in data:
    for e in d['ents']:
        if e['ent'].lower() in d['article'].lower():
            e['overlap'] = 1
        else:
            e['overlap'] = 0

In [43]:
data[3]

{'article': "Winger Dean Cox says he will have to remain patient as he searches for a new club after leaving league two side Leyton orient by mutual consent . the 29-year-old terminated his contract with the O 's after the transfer window closed , and can not join another EFL side until January . `` I would n't say I 'm in a predicament , but I have never been in this position before , '' Cox told BBC radio London . `` it is not a nice thing for a footballer . I 'm not able to do my job . '' the former Brighton man continued : `` I am going to have to sit it out again for four months before I can kick a ball in the league again . `` I 'll try to make the best of it . it is hard to train on your own and keep yourself motivated but it is something which has got to be done . '' Cox left orient on 1 September after turning down a move to league one Northampton town . having spent six years with the O 's , scoring 59 times in 275 appearances , Cox said he was `` an emotional wreck '' on his

#### Get Factuality Score for Each Summary

In [27]:
import pickle
import numpy as np

In [48]:
knn = pickle.load(open('classifiers/knn_mlm_cmlm_3.pkl', 'rb'))

In [49]:
def get_human_score(responses):
    """
    Args:
        responses: [{'worker_id': 0, 'response': 'yes'},
                    {'worker_id': 8, 'response': 'yes'},
                    {'worker_id': 1, 'response': 'no'}]}]
    """
    scores = []
    for r in responses:
        scores.append(r['response'] == 'yes')
    return sum(scores) / len(scores)

In [50]:
def get_model_score(ents):
    """
    Args:
        ents: [{'start': 0, 'end': 3, 'label': 'CARDINAL', 'ent': 'Two', 'prior': 0.054473876953125, 'posterior': 0.9091796875},
               {'start': 71, 'end': 80, 'label': 'GPE', 'ent': 'edinburgh', 'prior': 0.0, 'posterior': 1.1920928955078125e-07}]
    """
    if len(ents) == 0: return 0.5
    posteriors = []
    for e in ents:
        posteriors.append(e['posterior'])
    return min(posteriors)

In [51]:
def get_knn_score(ents):
    """
    Args:
        ents: [{'start': 0, 'end': 3, 'label': 'CARDINAL', 'ent': 'Two', 'prior': 0.054473876953125, 'posterior': 0.9091796875},
               {'start': 71, 'end': 80, 'label': 'GPE', 'ent': 'edinburgh', 'prior': 0.0, 'posterior': 1.1920928955078125e-07}]
    """
    if len(ents) == 0: return 0.3
    features = []
    for e in ents:
        features.append([e['posterior'], e['prior'], e['overlap']])
    preds = knn.predict(np.array(features))
    return np.min(preds)

In [52]:
get_knn_score([{'start': 0, 'end': 3, 'label': 'CARDINAL', 'ent': 'Two', 'prior': 0.054473876953125, 'posterior': 0.9091796875, 'overlap': 1},
               {'start': 71, 'end': 80, 'label': 'GPE', 'ent': 'edinburgh', 'prior': 0.0, 'posterior': 1.1920928955078125e-07, 'overlap': 1}])

1

In [53]:
human_scores = []
model_scores = []
knn_scores = []
for d in data:
    human_scores.append(get_human_score(d['summary_sentences'][0]['responses']))
    model_scores.append(get_model_score(d['ents']))
    knn_scores.append(get_knn_score(d['ents']))

In [54]:
model_scores[:10]

[0.0006322860717773438,
 0.28955078125,
 6.264448165893555e-05,
 0.5947265625,
 0.5,
 0.51513671875,
 0.869140625,
 0.81884765625,
 0.00025343894958496094,
 0.00026988983154296875]

In [55]:
human_scores[:10]

[0.6666666666666666,
 0.3333333333333333,
 0.0,
 0.3333333333333333,
 1.0,
 0.6666666666666666,
 1.0,
 0.0,
 0.3333333333333333,
 1.0]

#### Calculate Correlation

In [56]:
from scipy import stats

In [57]:
stats.spearmanr(human_scores, model_scores)

SpearmanrResult(correlation=0.25704254970164386, pvalue=5.80225653132512e-05)

In [58]:
# XSUM posterior: SpearmanrResult(correlation=0.2638292356301218, pvalue=3.615967316333796e-05)
# XSUM prior: SpearmanrResult(correlation=0.3213558454590241, pvalue=3.8282097630483723e-07)
# XSUM KNN: SpearmanrResult(correlation=0.22219752312978966, pvalue=0.0005392932428629086)
# XSUM KNN proba: SpearmanrResult(correlation=0.2134086529828822, pvalue=0.0008993598651778425)

# CNN/DM prior: SpearmanrResult(correlation=0.26699069825714755, pvalue=3.374679859317052e-05)
# CNN/DM posterior: SpearmanrResult(correlation=0.21077493981288759, pvalue=0.00115208952067579)
# CNN/DM KNN: SpearmanrResult(correlation=0.1441617378794903, pvalue=0.027123968278036766)
# CNN/DM KNN proba: SpearmanrResult(correlation=0.100354334221829, pvalue=0.12501122321386574)

In [59]:
stats.pearsonr(human_scores, knn_scores)

(0.2679415408591233, 2.6979949254603694e-05)