In [1]:
import json

In [2]:
test_path = '/home/ml/cadencao/Two-Steps-Summarization/datasets/FactCC/test/data-dev.jsonl'
pred_path = 'preds/fc_test_nodup_bm1_cpbest2.hypo'

In [3]:
def read_summaries(file_path):
    lines = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            lines.append(line.strip())
    return lines

In [4]:
def read_metadata(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line.strip()))
    return data

In [5]:
test = read_metadata(test_path)
targets = [t['claim'][0].upper() + t['claim'][1:] for t in test]
labels = [t['label'] for t in test]
preds = read_summaries(pred_path)
assert len(targets) == len(preds) == len(labels)

In [6]:
def post_process(preds):
    processed_preds = []
    for p in preds:
#         if p[0] == p[1] or p[0] == '"' or p[0] == "'":
#             processed_preds.append(p[1:])
        if (p[0] == '"' or p[0] == "'") and p.count(p[0]) % 2 == 1:
            processed_preds.append(p[1:])
        elif p[0] == p[1]:
            processed_preds.append(p[1:])
        else:
            processed_preds.append(p)
    return processed_preds

In [7]:
preds = post_process(preds)

In [8]:
print(labels[0])
print(targets[0])
print(preds[0])

CORRECT
Georgia southern university was in mourning after five nursing students died.
Georgia southern university was in mourning after five nursing students died.


#### Classification Accuracy

In [9]:
from sklearn.metrics import classification_report, accuracy_score, balanced_accuracy_score

In [10]:
pred_labels = []
for i, (t, p) in enumerate(zip(targets, preds)):
    p_tokens = p.lower().split()
    t_tokens = t.lower().split()

    if p_tokens == t_tokens:
        pred_labels.append('CORRECT')
    else:
        pred_labels.append('INCORRECT')

In [11]:
print(pred_labels[: 10])
print(labels[: 10])

['CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT']
['CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT', 'CORRECT']


In [12]:
print(classification_report(labels, pred_labels))

              precision    recall  f1-score   support

     CORRECT       0.91      0.89      0.90       441
   INCORRECT       0.34      0.39      0.36        62

   micro avg       0.83      0.83      0.83       503
   macro avg       0.63      0.64      0.63       503
weighted avg       0.84      0.83      0.84       503



In [13]:
accuracy_score(labels, pred_labels, normalize=True)

0.831013916500994

In [14]:
balanced_accuracy_score(labels, pred_labels)

0.6402604052373637

#### Corrupted Summaries Analysis

In [15]:
counter = 1
for i, (l, t, p) in enumerate(zip(labels, targets, preds)):
    if l == 'INCORRECT' and t.lower() != p.lower():
        print("- {} (#{})".format(counter, i + 1))
        print("- o: " + t)
        print("- p: " + p)
        counter += 1

- 1 (#14)
- o: France's memorial day commemoration is for bereaved family members as braham.
- p: Israel's memorial day commemoration is for bereaved family members as braham.
- 2 (#24)
- o: Gallagher is basing his claim on the works' names and personalities.
- p: Peter Gallagher is basing his claim on the works' names and personalities.
- 3 (#40)
- o: The vatican says he is "regret and am sorry for the hurt these events have caused".
- p: Bishop Robert Finn says he is "regret and am sorry for the hurt these events have caused".
- 4 (#51)
- o: Dzhokhar tsarnaev, 21, was killed in the first blast at the 2013 boston marathon.
- p: Dzhokhar ts Tsarnaev, 21, was killed in the first blast at the 2013 boston marathon.
- 5 (#73)
- o: Iran is alienating some of our closest allies because of the iran deal, and iran is picking up new ones.
- p: America is alienating some of our closest allies because of the Iran deal, and Iran is picking up new ones.
- 6 (#75)
- o: A supreme court decision on le

In [16]:
# manual analysis
# CORRECT: 9
# INCORRECT: 15
# RATE: 9 / 24 = 0.375

#### Correct Summaries Analysis

In [17]:
counter = 1
for i, (l, t, p) in enumerate(zip(labels, targets, preds)):
    if l == 'CORRECT' and t.lower() != p.lower():
        print("- {} (#{})".format(counter, i + 1))
        print("- o: " + t)
        print("- p: " + p)
        counter += 1

- 1 (#48)
- o: A: bench warrant for failing to appear in child support court isn't about catching criminals.
- p: QA: bench warrant for failing to appear in child support court isn't about catching criminals.
- 2 (#50)
- o: Obama has argued with the progressive potentate elizabeth warren.
- p: PresidentObama has argued with the progressive potentate elizabeth warren.
- 3 (#59)
- o: Water temperatures warmer than they should be.
- p: "The Blob" "Water temperatures warmer than they should be. No snow where there should be feet of it. Some scientists are saying "The Blobs" could be playing a factor. As monikers go, the blob doesn't sound very worrisome. But if you're a salmon fisherman in Washington or a California resident hoping to see the end of the drought, the Blob could become an enemy of top concern. A University of Washington climate scientist and his associates have been studying the blob -- a huge area of unusually warm water in the Pacific -- for months. "In the fall of
- 4 (#6

In [None]:
# manual analysis (correct)
# CORRECT: 40
# INCORRECT: 7
# RATE: 0.015873015873015872

In [None]:
# manual analysis (incorrect)
# CORRECT: 9
# INCORRECT: 15
# RATE: 9 / 24 = 0.375