# Evaluating with ROUGE, SMART, SummaC and BERT

## ROUGE

In [4]:
import pandas as pd
import numpy as np
cnn_summaries = pd.read_parquet('datasets/cnn_experiment.parquet')

In [9]:
from occams.summarize import SummaryUnits
UNITS = SummaryUnits.CHARS

def get_target_length(summaries, UNITS=UNITS, length_quantile=0.5):
    num_units = [UNITS.len(summary) for summary in summaries]
    target_length = int(np.quantile(num_units, length_quantile))
    return num_units, target_length

num_units, target_length = get_target_length(cnn_summaries["highlights"])


In [10]:
from occams.rouge_eval import rouge_n_r
import numpy as np
from occams.summarize import SummaryUnits
UNITS = SummaryUnits.CHARS

def compute_rouge_scores(extracts, summaries, target_length, units=SummaryUnits.WORDS):
    rouges = []
    for this_summary, model in zip(extracts, summaries):
        rouge = rouge_n_r(
            this_summary, model, summary_length=target_length, units=units, max_n=4
        )
        rouges.append(rouge[1:])
    rouges = np.array(rouges)
    return rouges


rouges = np.mean(compute_rouge_scores(
    cnn_summaries['occams_summ_f'], cnn_summaries["highlights"], target_length, units=UNITS
), axis=0)

In [11]:
rouges

array([0.33349086, 0.1109468 , 0.05340301, 0.03196522])

In [12]:
print(f"occams_summ_f rouge: {rouges}")

occams_summ_f rouge: [0.33349086 0.1109468  0.05340301 0.03196522]


In [13]:
summary_cols = ['occams_summ_f', "occams_gpt_summ", "gpt_gpt_summ"]
for col in summary_cols:
    rouges = np.mean(compute_rouge_scores(
    cnn_summaries[col], cnn_summaries["highlights"], target_length, units=UNITS
), axis=0)
    print(f"{col} rouge: {rouges}")

occams_summ_f rouge: [0.33349086 0.1109468  0.05340301 0.03196522]
occams_gpt_summ rouge: [0.26889998 0.0666852  0.02038136 0.00596117]
gpt_gpt_summ rouge: [0.28060483 0.07934564 0.02603879 0.00936921]


## SMART

In [1]:
## SWITCH KERNELS! rouge_score is not installed, they use a different method than occams does for rouge.
from smart_eval import matching_functions as mf
from smart_eval import summeval_utils as utils
from smart_eval import scorer

In [11]:
summary_cols = ['occams_summ_f', "occams_gpt_summ", "gpt_gpt_summ"]

matcher = mf.chrf_matcher
smart_scorer = scorer.SmartScorer(matching_fn=matcher)

from collections import OrderedDict
 

def smart_score(row, metric):
    all_smart_scores = []
    for col in summary_cols:
        scores = smart_scorer.smart_score(row['highlights'], row[col])
        for score in scores.keys():
            scores[score] = scores[score][metric]
        scores['summary_type'] = col
        all_smart_scores.append(scores)
    return all_smart_scores

smart_scores_df = cnn_summaries.copy()
smart_scores_df['smart_scores'] = cnn_summaries.apply(lambda x: smart_score(x, 'recall'), axis=1)

In [12]:
smart_score_df = smart_scores_df[['id', 'smart_scores']].explode('smart_scores')
smart_score_df = pd.concat([smart_score_df.drop(['smart_scores'], axis=1), smart_score_df['smart_scores'].apply(pd.Series)], axis=1)
smart_score_df.groupby(by=['summary_type'])[['smart1', 'smart2', 'smartL']].mean()

Unnamed: 0_level_0,smart1,smart2,smartL
summary_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gpt_gpt_summ,28.030778,19.258911,27.453943
occams_gpt_summ,27.281604,18.637877,26.877511
occams_summ_f,28.408948,18.837552,27.875472


## SummaC

In [7]:
# CHANGE KERNEL! Python 3.8

from summac.model_summac import SummaCZS, SummaCConv

model_zs = SummaCZS(granularity="sentence", model_name="vitc", device="cuda", max_doc_sents=500) # If you have a GPU: switch to: device="cuda"
model_zs_nocon = SummaCZS(granularity="sentence", model_name="vitc", device="cuda", max_doc_sents=500, use_con=False) # If you have a GPU: switch to: device="cuda"

model_conv = SummaCConv(models=["vitc"], bins='percentile', granularity="sentence", nli_labels="e", device="cuda", start_file="default", agg="mean", max_doc_sents=500)

<All keys matched successfully>


In [12]:
summary_cols = ['occams_summ_f', "occams_gpt_summ", "gpt_gpt_summ"]


def summac_score(row):
    all_summac_scores = []
    with open('untitled.txt', 'a') as f:
        f.write('hi')
    for col in summary_cols:
        scores = {}
        scores['SummaCZS'] = model_zs.score([row['article']], [row[col]])['scores'][0]
        scores['SummaCZS_no_con'] =model_zs_nocon.score([row['article']], [row[col]])['scores'][0]
        scores['SummaConv'] = model_conv.score([row['article']], [row[col]])['scores'][0]
        scores['summary_type'] = col
        all_summac_scores.append(scores)
    return all_summac_scores

summac_scores_df = cnn_summaries.copy()
summac_scores_df['summac_scores'] = cnn_summaries.apply(summac_score, axis=1)

In [13]:
summac_score_df = summac_scores_df[['id', 'summac_scores']].explode('summac_scores')
summac_score_df = pd.concat([summac_score_df.drop(['summac_scores'], axis=1), summac_score_df['summac_scores'].apply(pd.Series)], axis=1)
summac_score_df.groupby(by=['summary_type'])[['SummaCZS', 'SummaCZS_no_con', 'SummaConv']].mean().T

summary_type,gpt_gpt_summ,occams_gpt_summ,occams_summ_f
SummaCZS,-0.10251,-0.086492,0.448445
SummaCZS_no_con,0.504205,0.490599,0.993038
SummaConv,0.450414,0.455483,0.913763


In [11]:
len(cnn_summaries)

247

# BERT Score

In [1]:
import sys
!{sys.executable} -m pip install bert_score



In [2]:
import pandas as pd

cnn_summaries = pd.read_parquet('datasets/cnn_experiment.parquet')
cnn_summaries.head()

Unnamed: 0,article,highlights,id,gpt_e_summ,occams_summ_f,occams_summ_h,occams_gpt_summ_full,gpt_gpt_summ_full,occams_gpt_summ,gpt_gpt_summ,gpt_a_summ_full,gpt_a_summ
1881,"NEW YORK (CNN) -- Nonnie Dotson, a nurse in th...",Nonnie Dotson was visiting brother in suburb o...,9880666126c7836dbe99b0b42becfee3dcab88cf,"Nonnie Dotson, a nurse in the U.S. Air Force, ...","NEW YORK (CNN) -- Nonnie Dotson, a nurse in th...","NEW YORK (CNN) -- Nonnie Dotson, a nurse in th...","Nonnie Dotson, a nurse in the U.S. Air Force, ...","Nonnie Dotson, a nurse in the U.S. Air Force, ...",Nurse Nonnie Dotson disappeared amid a child s...,Nurse Nonnie Dotson disappeared during a child...,"{\n ""timestamp"": ""2023-06-30T09:15:00"",\n ""t...",The Battle of Zentaria Strait accelerated Oper...
1332,(CNN) -- With a week to go before Election Day...,Obama leads by 8 points in most recent CNN nat...,c24025591e20b5d21062d1a1ec8fbf4686b963b7,The most recent national CNN poll of polls sho...,"In Florida, a CNN/USA Today/Gallup poll conduc...",(CNN) -- With a week to go before Election Day...,Recent national polls show Democratic presiden...,The most recent national CNN poll of polls sho...,Recent national polls show Obama leading McCai...,The most recent national CNN poll of polls sho...,Recent national polls show Democratic presiden...,"Recent polls show Obama leading McCain, but hi..."
444,"BAGHDAD, Iraq (CNN) -- Four U.S. soldiers die...","NEW: As Iraq war enters sixth year, American d...",d80e6a3be826df05df00a87f49cc426fd597f085,Four U.S. soldiers died in a roadside bombing ...,"BAGHDAD, Iraq (CNN) -- Four U.S. soldiers died...","BAGHDAD, Iraq (CNN) -- Four U.S. soldiers died...",Four U.S. soldiers were killed in a roadside b...,Four U.S. soldiers were killed in a roadside b...,4 US soldiers and 8 Pentagon contractors were ...,"Four U.S. soldiers died, reaching 4,000 Americ...",Four U.S. soldiers were killed in a roadside b...,"4 US soldiers die in Iraq bombing, marking 4,0..."
86,"ALGIERS, Algeria (CNN) -- Rescuers are sifting...",Two bombs explode in Algerian capital near gov...,4cf51ce9372dff8ff7f44f098eab1c1d7569af7a,Rescuers are sifting through the rubble of the...,"ALGIERS, Algeria (CNN) -- Rescuers are sifting...","ALGIERS, Algeria (CNN) -- Rescuers are sifting...",A powerful bomb destroyed the United Nations h...,A bomb attack has caused significant damage to...,A powerful bomb by al Qaeda destroyed the UN h...,"Bomb attack on UN headquarters in Algiers, kil...",A powerful bomb ripped off the facade of the U...,Al Qaeda-linked group destroys U.N. HQ in Algi...
958,(CNN) -- The crib in Ellen Darcy's Boston home...,"Guatemala and Vietnam say corruption, baby-ste...",7551503e7e57bd519913b0df90ca1e80d5305b05,Guatemala has announced it will conduct a case...,(CNN) -- The crib in Ellen Darcy's Boston home...,(CNN) -- The crib in Ellen Darcy's Boston home...,Guatemala is conducting a review of all pendin...,Guatemala is conducting a review of all pendin...,"Guatemala reviews pending foreign adoptions, m...",Guatemala is conducting a review of all pendin...,The countries of Guatemala and Vietnam have re...,Guatemala and Vietnam suspend international ad...


In [3]:
import torch
import spacy
from selfcheckgpt.modeling_selfcheck import SelfCheckBERTScore
torch.manual_seed(28)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

2023-07-11 15:15:38.131765: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-11 15:15:42.564899: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-11 15:15:42.572017: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-

cuda


In [4]:
passage = """
Michael Alan Weiner (born March 31, 1942), better known by his professional name Michael Savage, is an American radio host, author, activist, nutritionist, and conservative political commentator. He is the host of The Savage Nation, a nationally syndicated talk show that aired on Talk Radio Network across the United States until 2012, and in 2009 was the second most listened-to radio talk show in the country with an audience of over 20 million listeners on 400 stations across the United States. Since October 23, 2012, Michael Savage has been syndicated by Cumulus Media Networks. He holds master's degrees from the University of Hawaii in medical botany and medical anthropology, and a Ph.D. from the University of California, Berkeley in nutritional ethnomedicine. As Michael Weiner, he has written books on nutrition, herbal medicine, and homeopathy.
""".replace("\n", " ").strip()
passage_edited = """
Michael Alan Weiner (born January 13, 1995), better known by his professional name Joshua King, is an Austrain footballer, and businessman. He is the host of The Savage Nation, a nationally syndicated talk show that aired on Talk Radio Network across the United States until 2012, and in 2009 was the second most listened-to radio talk show in the country with an audience of over 20 million listeners on 400 stations across the United States. Since October 23, 2002, Michael King has been syndicated by Virgin Media Networks. He holds master's degrees from the University of Hawaii in medical botany and medical anthropology, and a Ph.D. from the University of California, San Diego in nutritional ethnomedicine. As Michael Weiner, he has written books on nutrition, herbal medicine, and homeopathy.
""".replace("\n", " ").strip()

sample1 = """
Michael Alan Weiner (born March 31, 1942), better known by his professional name Michael Savage, is an American radio host, author, and political commentator. He is the host of The Savage Nation, a nationally syndicated talk show broadcast on Talk Radio Network across the United States, and on Comet in Canada. He holds master's degrees from the University of Hawaii in medical botany and medical anthropology, and a Ph.D. from the University of California, Berkeley in epidemiology and nutrition sciences. Savage has summarized his political philosophy in three words: borders, language, and culture. He has characterized his views as conservative nationalism, while critics have characterized them as fascism. He is a fierce critic of liberalism, and the War on Drugs. He believes that political correctness has "ruined the United States" and that the American government has spent too much money on social programs throughout its history.
""".replace("\n", " ").strip()

sample2 = """
Michael A. Savage (born Michael Alan Weiner; March 31, 1942) is an American radio host, author, activist, nutritionist, and conservative political commentator. He is the host of The Savage Nation, a nationally syndicated talk show that aired on Talk Radio Network across the United States until 2012. The Savage Nation has a combined audience of over 20 million listeners on 400 stations across the United States and is one of the top-rated shows in the country. He is also the host of The Michael Savage Show, a daily podcast and webcast heard around the world. He has been called "the godfather of Trumpmania".
Savage has written more than 25 books and is an outspoken supporter of traditional American values and break-down of the culture of political correctness. He advocates an "America first" foreign policy and rigorous opposition to illegal immigration. For his social and political views, Savage has been described as a conservative and a right-wing populist. He has campaigned for numerous political causes, including opposition to same-sex marriage and the legalization of marijuana.
""".replace("\n", " ").strip()

sample3 = """
Michael Alan Weiner (born March 31, 1942),[2] better known by his professional name Michael Savage, is an American radio host, author, activist, nutritionist, and conservative political commentator. He is the host of The Savage Nation, a nationally syndicated talk show that aired on Talk Radio Network across the United States until 2012, and in 2009 was the second most listened-to radio talk show in the country with an audience of over 20 million listeners on 400 stations across the United States. Since October 23, 2012, Michael Savage has been syndicated by Cumulus Media Networks. He holds master's degrees from the University of Hawaii in medical botany and medical anthropology, and a Ph.D. from the University of California, Berkeley in nutritional ethnomedicine. As Michael Weiner, he has written books on nutrition, herbal medicine, and homeopathy.
Savage has summarized his political philosophy in three words: borders, language, and culture. He believes that the United States should end foreign aid, impose much stricter immigration laws, deport illegal immigrants, and put an end to birthright citizenship. Savage has characterized his views as conservative nationalism, free
""".replace("\n", " ").strip()

In [5]:
selfcheck_bertscore = SelfCheckBERTScore()

nlp = spacy.load("en_core_web_sm")
sentences = [sent for sent in nlp(passage).sents] # List[spacy.tokens.span.Span]
sentences = [sent.text.strip() for sent in sentences if len(sent) > 3]

SelfCheck-BERTScore initialized


In [7]:
sent_scores_bertscore = selfcheck_bertscore.predict(
    sentences,
    [sample1, sample2, sample3],
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

In [8]:
sent_scores_bertscore

array([0.0252773 , 0.0385858 , 0.09815407, 0.05635832, 0.08287221])