In [1]:
import numpy as np
import pandas as pd
import torch
import json
import itertools
import nltk
# nltk.download('punkt')
from collections import Counter

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ROBERTA-LARGE-MNLI 
model = AutoModelForSequenceClassification.from_pretrained('roberta-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('roberta-large-mnli')
label_mapping = ['contradiction', 'neutral','entailment']


# CROSS-ENCODER NLI-ROBERTA-BASE
# model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/nli-roberta-base')
# tokenizer = AutoTokenizer.from_pretrained('cross-encoder/nli-roberta-base')
# label_mapping = ['contradiction', 'entailment', 'neutral']

device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = model.to(device)

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
data = json.load(open('data/combined_data.json', 'r'))

In [4]:
# CREATE ALL PAIRWISE SENTENCES TOGETHER INTO LISTs FOR NLI

pair_refs = []
pair_gens = []
pair_ref_a_sources = []
pair_gen_a_sources = []
pair_ref_b_sources = []
pair_gen_b_sources = []

pair_refs_ind = []
pair_gens_ind = []
pair_ref_a_sources_ind = []
pair_gen_a_sources_ind = []
pair_ref_b_sources_ind = []
pair_gen_b_sources_ind = []

source_gen_a_inds =[]
source_gen_b_inds = []
source_ref_a_inds = []
source_ref_b_inds = []

for d in range(20,len(data)):
    
    ################################################################################
    
    # source reviews 
    source_a = [nltk.sent_tokenize(i) for i in data[d]['source_reviews_a']]
    source_b = [nltk.sent_tokenize(i) for i in data[d]['source_reviews_b']]

    # ref summaries just first reference
    ref_a_sum = nltk.sent_tokenize(data[d]['refs_a'][0])
    ref_b_sum = nltk.sent_tokenize(data[d]['refs_b'][0])
    ref_comm_sum = nltk.sent_tokenize(data[d]['refs_comm'][0])

    # gen summaries
    gen_a_sum = nltk.sent_tokenize(data[d]['gen_a'])
    gen_b_sum = nltk.sent_tokenize(data[d]['gen_b'])
    gen_comm_sum = nltk.sent_tokenize(data[d]['gen_comm'])
    
    ###############################################################################

    # pairwise ref  FOR CONTRAST
    cont_ref = list(itertools.product(ref_a_sum, ref_b_sum))
    comm_a_ref = list(itertools.product(ref_a_sum, ref_comm_sum))
    comm_b_ref = list(itertools.product(ref_b_sum, ref_comm_sum))
    
    pair_ref = cont_ref + comm_a_ref + comm_b_ref
    pair_refs.append(pair_ref)
    pair_refs_ind.append([d]*len(pair_ref))
    
    # pairwise gen FOR CONTRAST
    cont_gen = list(itertools.product(gen_a_sum, gen_b_sum))
    comm_a_gen = list(itertools.product(gen_a_sum, gen_comm_sum))
    comm_b_gen = list(itertools.product(gen_b_sum, gen_comm_sum))
    
    pair_gen = cont_gen+comm_a_gen+comm_b_gen
    pair_gens.append(pair_gen)
    pair_gens_ind.append([d]*len(pair_gen))

    ###############################################################################
    
    # pairwise source gen FOR POPULAR OPINION FACTUALITY
    rev_gen_a = [j for i in source_a for j in itertools.product(i,gen_a_sum)]
    rev_gen_b = [j for i in source_b for j in itertools.product(i,gen_b_sum)]
    rev_gen_com_a_source = [j for i in source_a for j in itertools.product(i,gen_comm_sum)]
    rev_gen_com_b_source = [j for i in source_b for j in itertools.product(i,gen_comm_sum)]

    source_gen_a_ind = [i for i in range(len(source_a)) for j in itertools.product(source_a[i],list(range(len(gen_a_sum))))] + [i for i in range(len(source_a)) for j in itertools.product(source_a[i],list(range(len(gen_comm_sum))))]
    source_gen_b_ind = [i for i in range(len(source_b)) for j in itertools.product(source_b[i],list(range(len(gen_b_sum))))] + [i for i in range(len(source_b)) for j in itertools.product(source_b[i],list(range(len(gen_comm_sum))))]

    source_gen_a_inds.append(source_gen_a_ind)
    source_gen_b_inds.append(source_gen_b_ind)
    
    pair_gen_a_source = rev_gen_a+rev_gen_com_a_source
    pair_gen_a_sources.append(pair_gen_a_source)
    pair_gen_a_sources_ind.append([d]*len(pair_gen_a_source))

    pair_gen_b_source = rev_gen_b+rev_gen_com_b_source
    pair_gen_b_sources.append(pair_gen_b_source)
    pair_gen_b_sources_ind.append([d]*len(pair_gen_b_source))
    
    # pairwise source ref FOR POPULAR OPINION FACTUALITY
    rev_ref_a = [j for i in source_a for j in itertools.product(i,ref_a_sum)]
    rev_ref_b = [j for i in source_b for j in itertools.product(i,ref_b_sum)]
    rev_ref_com_a_source = [j for i in source_a for j in itertools.product(i,ref_comm_sum)]
    rev_ref_com_b_source = [j for i in source_b for j in itertools.product(i,ref_comm_sum)]

    source_ref_a_ind = [i for i in range(len(source_a)) for j in itertools.product(source_a[i],list(range(len(ref_a_sum))))] + [i for i in range(len(source_a)) for j in itertools.product(source_a[i],list(range(len(ref_comm_sum))))]
    source_ref_b_ind = [i for i in range(len(source_b)) for j in itertools.product(source_b[i],list(range(len(ref_b_sum))))] + [i for i in range(len(source_b)) for j in itertools.product(source_b[i],list(range(len(ref_comm_sum))))]

    source_ref_a_inds.append(source_ref_a_ind)
    source_ref_b_inds.append(source_ref_b_ind)
    
    pair_ref_a_source = rev_ref_a+rev_ref_com_a_source
    pair_ref_a_sources.append(pair_ref_a_source)
    pair_ref_a_sources_ind.append([d]*len(pair_ref_a_source))
    
    pair_ref_b_source = rev_ref_b+rev_ref_com_b_source
    pair_ref_b_sources.append(pair_ref_b_source)
    pair_ref_b_sources_ind.append([d]*len(pair_ref_b_source))

In [5]:
ref =["ref" for i in pair_refs for j in i]
gen = ["gen" for i in pair_gens for j in i]
ref_source_a =["ref" for i in pair_ref_a_sources for j in i]
gen_source_a = ["gen" for i in pair_gen_a_sources for j in i]
ref_source_a_entity =["a" for i in pair_ref_a_sources for j in i]
gen_source_a_entity = ["a" for i in pair_gen_a_sources for j in i]
ref_source_b =["ref" for i in pair_ref_b_sources for j in i]
gen_source_b = ["gen" for i in pair_gen_b_sources for j in i]
ref_source_b_entity =["b" for i in pair_ref_b_sources for j in i]
gen_source_b_entity = ["b" for i in pair_gen_b_sources for j in i]

In [6]:
def compute_NLI(pair_list, rev=False):

    label_lists =[]
    stats =[]
    counter = 0
    
    for data in pair_list:

        col1 = [i[0] for i in data]
        col2 = [i[1] for i in data]
        
        if rev == True:
            features = tokenizer(col2,col1,  padding=True, truncation=True, return_tensors="pt")
            features.to(device) 
        else:    
            features = tokenizer(col1,col2,  padding=True, truncation=True, return_tensors="pt")
            features.to(device)

        model.eval()
        with torch.no_grad():
            scores = model(**features).logits
            labels = [label_mapping[score_max] for score_max in scores.argmax(dim=1).detach().cpu().numpy()]

            label_lists.append(labels)
            # print(Counter(labels))
            agg = Counter(labels)
            
            stats.append([agg['neutral'], agg['contradiction'], agg['entailment']])
            if counter % 5 ==0:
                print(counter)
            counter += 1
            
    return label_lists, stats    

In [7]:
# contrastiveness between generated summaries
gen_label_lists, gen_stats = compute_NLI(pair_gens)

0
5
10
15
20
25


In [8]:
# contrastiveness between generated summaries REVERSED
gen_label_lists_rev, gen_stats_rev = compute_NLI(pair_gens, rev=True)

0
5
10
15
20
25


In [9]:
# contrastiveness between reference summaries 
ref_label_lists, ref_stats = compute_NLI(pair_refs)

0
5
10
15
20
25


In [10]:
# contrastiveness between reference summaries REVERSED
ref_label_lists_rev, ref_stats_rev = compute_NLI(pair_refs, rev= True)

0
5
10
15
20
25


In [11]:
# factual consistency popular opinion on generated summaries A
gen_source_a_lists, gen_source_a_stats = compute_NLI(pair_gen_a_sources)

0
5
10
15
20
25


In [12]:
# factual consistency popular opinion on reference summaries A
ref_source_a_lists, ref_source_a_stats = compute_NLI(pair_ref_a_sources)

0
5
10
15
20
25


In [13]:
# factual consistency popular opinion on generated summaries B
gen_source_b_lists, gen_source_b_stats = compute_NLI(pair_gen_b_sources)

0
5
10
15
20
25


In [14]:
# factual consistency popular opinion on reference summaries B
ref_source_b_lists, ref_source_b_stats = compute_NLI(pair_ref_b_sources)

0
5
10
15
20
25


In [26]:
cont_df = pd.DataFrame(
    {'SentenceA': [j[0] for i in pair_refs for j in i] + [j[0] for i in pair_gens for j in i],
     'SentenceB': [j[1] for i in pair_refs for j in i] + [j[1] for i in pair_gens for j in i],
     'Sample': [j for i in pair_refs_ind for j in i] + [j for i in pair_gens_ind for j in i],
     'A_B_neut': [j if j == 'neutral' else '' for i in ref_label_lists for j in i] + [j if j == 'neutral' else '' for i in gen_label_lists for j in i],
     'A_B_cont': [j if j == 'contradiction' else '' for i in ref_label_lists for j in i] + [j if j == 'contradiction' else '' for i in gen_label_lists for j in i],
     'A_B_ent': [j if j == 'entailment' else '' for i in ref_label_lists for j in i] + [j if j == 'entailment' else '' for i in gen_label_lists for j in i],
     'B_A_neut': [j if j == 'neutral' else '' for i in ref_label_lists_rev for j in i] + [j if j == 'neutral' else '' for i in gen_label_lists_rev for j in i] ,
     'B_A_cont': [j if j == 'contradiction' else '' for i in ref_label_lists_rev for j in i] + [j if j == 'contradiction' else '' for i in gen_label_lists_rev for j in i],
     'B_A_ent': [j if j == 'entailment' else '' for i in ref_label_lists_rev for j in i] + [j if j == 'entailment' else '' for i in gen_label_lists_rev for j in i],
     'Type': ref + gen
    })

In [27]:
cont_df.head()

Unnamed: 0,SentenceA,SentenceB,Sample,A_B_neut,A_B_cont,A_B_ent,B_A_neut,B_A_cont,B_A_ent,Type
0,The hotel has conference rooms available to re...,The hotel is especially memorable during a win...,20,neutral,,,neutral,,,ref
1,The hotel has conference rooms available to re...,The room was okay but on the small side includ...,20,neutral,,,neutral,,,ref
2,The hotel has conference rooms available to re...,After upgrading rooms to a suite this was abso...,20,neutral,,,neutral,,,ref
3,The hotel has conference rooms available to re...,Whilst the breakfast and dinners are both quit...,20,neutral,,,neutral,,,ref
4,The hotel has conference rooms available to re...,But a coffee maker and coffee was not free of ...,20,neutral,,,neutral,,,ref


In [35]:
source_rev_num = [j for i in source_ref_a_inds for j in i]+[j for i in source_gen_a_inds for j in i]+[j for i in source_ref_b_inds for j in i]+[j for i in source_gen_b_inds for j in i]
comb_senta = [j[0] for i in pair_ref_a_sources for j in i]+[j[0] for i in pair_gen_a_sources for j in i]+[j[0] for i in pair_ref_b_sources for j in i]+[j[0] for i in pair_gen_b_sources for j in i]
comb_sentb = [j[1] for i in pair_ref_a_sources for j in i]+[j[1] for i in pair_gen_a_sources for j in i]+[j[1] for i in pair_ref_b_sources for j in i]+[j[1] for i in pair_gen_b_sources for j in i]
entity = ref_source_a_entity + gen_source_a_entity + ref_source_b_entity + gen_source_b_entity
sum_type = ref_source_a + gen_source_a + ref_source_b + gen_source_b
sample = [j for i in pair_ref_a_sources_ind for j in i]+[j for i in pair_gen_a_sources_ind for j in i]+[j for i in pair_ref_b_sources_ind for j in i]+[j for i in pair_gen_b_sources_ind for j in i]

a_b_neut = [j if j == 'neutral' else '' for i in ref_source_a_lists for j in i] + [j if j == 'neutral' else '' for i in gen_source_a_lists for j in i] + [j if j == 'neutral' else '' for i in ref_source_b_lists for j in i] + [j if j == 'neutral' else '' for i in gen_source_b_lists for j in i]
a_b_cont = [j if j == 'contradiction' else '' for i in ref_source_a_lists for j in i] + [j if j == 'contradiction' else '' for i in gen_source_a_lists for j in i] + [j if j == 'contradiction' else '' for i in ref_source_b_lists for j in i] + [j if j == 'contradiction' else '' for i in gen_source_b_lists for j in i]
a_b_ent = [j if j == 'entailment' else '' for i in ref_source_a_lists for j in i] + [j if j == 'entailment' else '' for i in gen_source_a_lists for j in i] + [j if j == 'entailment' else '' for i in ref_source_b_lists for j in i] + [j if j == 'entailment' else '' for i in gen_source_b_lists for j in i]


In [None]:
pair_gen_a_sources

In [36]:
fact_pop_df = pd.DataFrame(
    {'SentenceA': comb_senta,
     'SentenceB': comb_sentb,
     'Sample': sample,
     'A_B_neut': a_b_neut,
     'A_B_cont': a_b_cont,
     'A_B_ent': a_b_ent,
     'Source_Rev_num': source_rev_num ,
     'Entity': entity,
     'Type': sum_type
    })

In [38]:
fact_pop_df.head(20)

Unnamed: 0,SentenceA,SentenceB,Sample,A_B_neut,A_B_cont,A_B_ent,Source_Rev_num,Entity,Type
0,Our company has rented conference rooms and he...,The hotel has conference rooms available to re...,20,,,entailment,0,a,ref
1,Our company has rented conference rooms and he...,The location of this hotel makes it easy to ac...,20,neutral,,,0,a,ref
2,Our company has rented conference rooms and he...,The rooms are a good size and well decorated.,20,neutral,,,0,a,ref
3,Our company has rented conference rooms and he...,The bathroom in the hotel was large and with a...,20,neutral,,,0,a,ref
4,Our company has rented conference rooms and he...,Small touches like a chocolate on the pillow a...,20,neutral,,,0,a,ref
5,Our company has rented conference rooms and he...,The hotel has a roof terrace for drinks.,20,neutral,,,0,a,ref
6,Our company has rented conference rooms and he...,Unfortunately the hotel is missing the availab...,20,neutral,,,0,a,ref
7,There was always someone to greet and help wit...,The hotel has conference rooms available to re...,20,neutral,,,0,a,ref
8,There was always someone to greet and help wit...,The location of this hotel makes it easy to ac...,20,neutral,,,0,a,ref
9,There was always someone to greet and help wit...,The rooms are a good size and well decorated.,20,neutral,,,0,a,ref


In [39]:
cont_df.to_csv('contrast.csv', index = None, header=True) 
fact_pop_df.to_csv('factuality_popular.csv', index = None, header=True) 