In [3]:
import pandas as pd
import sys, os.path
from torch import nn
from datasets import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
import nltk
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PegasusTokenizer
import datetime
from transformers import AutoModel, AutoTokenizer, AutoConfig
from huggingface_hub import PyTorchModelHubMixin
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
sys.path.append('../rsasumm/')
sys.path.append('../glimpse/evaluate/')
from rsa_reranker import RSAReranking
from evaluate_common_metrics_samples import evaluate_rouge
from evaluate_bartbert_metrics import evaluate_bartbert

Please run data processing if the folder ../data/processed doesn't contain processed files

# Summary Generation

In [2]:
def prepare_dataset(dataset_path) -> Dataset: 
    try:
        dataset = pd.read_csv(dataset_path)
    except:
        raise ValueError(f"Unknown dataset {dataset_path}")

    # make a dataset from the dataframe
    dataset = Dataset.from_pandas(dataset)

    return dataset

def evaluate_summarizer(dataset: Dataset) -> Dataset:
    """
    @param dataset: A dataset with the text
    @return: The same dataset with the summaries added
    """

    # generate summaries
    summaries = []
    print("Generating summaries...")

    # (tqdm library for progress bar) 
    for sample in tqdm(dataset):
        text = sample["text"] 
        
        text = text.replace('-----', '\n')
        sentences = nltk.sent_tokenize(text)
        # remove empty sentences
        sentences = [sentence for sentence in sentences if sentence != ""]
        summaries.append(sentences)

    # add summaries to the huggingface dataset
    dataset = dataset.map(lambda example: {"summary": summaries.pop(0)})

    return dataset

def parse_summaries(summaries_dataset:Dataset) -> pd.DataFrame:
    
    try:
        summaries = summaries_dataset
    except:
        raise ValueError(f"Unknown dataset! Error with summaries")

    # check if the dataframe has the right columns
    if not all(
        col in summaries.columns for col in ["index", "id", "text", "gold", "summary", "id_candidate"]
    ):
        raise ValueError(
            "The dataframe must have columns ['index', 'id', 'text', 'gold', 'summary', 'id_candidate']"
        )

    return summaries

def compute_rsa(summaries: pd.DataFrame, model, tokenizer, device):
    results = []
    for name, group in tqdm(summaries.groupby(["id"])):
        rsa_reranker = RSAReranking(
            model,
            tokenizer,
            device=device,
            candidates=group.summary.unique().tolist(),
            source_texts=group.text.unique().tolist(),
            batch_size=32,
            rationality=3,
        )
        
        (
            best_rsa,
            best_base,
            speaker_df,
            listener_df,
            initial_listener,
            language_model_proba_df,
            initial_consensuality_scores,
            consensuality_scores,
        ) = rsa_reranker.rerank(t=2)

        gold = group['gold'].tolist()[0]

        results.append(
            {
                "id": name,
                "best_rsa": best_rsa,  # best speaker score
                "best_base": best_base,  # naive baseline
                "speaker_df": speaker_df,  # all speaker results
                "listener_df": listener_df,  # all listener results (chances of guessing correctly)
                "initial_listener": initial_listener,
                "language_model_proba_df": language_model_proba_df,
                "initial_consensuality_scores": initial_consensuality_scores,
                "consensuality_scores": consensuality_scores,  # uniqueness scores
                "gold": gold,
                "rationality": 3,  # hyperparameter
                "text_candidates" : group
            }
        )

    return results

## Generate Extractive Summaries

In [5]:
dataset_path = "../data/processed/all_reviews_2017.csv"
year = dataset_path.split("/")[-1].split("_")[-1].split(".")[0]
print(f"Using {year} dataset")
# |indexes| of reviews selected
limit = None

# prepare the dataset
dataset = prepare_dataset(dataset_path)

#limit the number of samples
if limit is not None:
    _lim = min(limit, len(dataset))
    dataset = dataset.select(range(_lim))

# generate summaries
dataset = evaluate_summarizer(dataset)

df_dataset = dataset.to_pandas()
df_dataset = df_dataset.explode("summary")
df_dataset = df_dataset.reset_index()
# add an idx with  the id of the summary for each example
df_dataset["id_candidate"] = df_dataset.groupby(["index"]).cumcount()

# removing missing values
if df_dataset.isnull().values.sum() > 0:
    df_dataset.dropna(axis=0,inplace=True)
    assert df_dataset.isnull().values.sum() == 0, "Missing Values!"

Using 2017 dataset
Generating summaries...


100%|██████████| 1511/1511 [00:00<00:00, 2763.22it/s]


Map:   0%|          | 0/1511 [00:00<?, ? examples/s]

In [6]:
# save the dataset
# create output dir if it doesn't exist
output_dir = "../data/candidates/" 
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

df_dataset.to_csv(f"{output_dir}extractive_summaries{year}.csv", index=False, encoding="utf-8")

## Generate Abstractive summaries

In [35]:
GENERATION_CONFIGS = {
    "top_p_sampling": {
        "max_new_tokens": 200,
        "do_sample": True,
        "top_p": 0.95,
        "temperature": 1.0,
        "num_return_sequences": 8,
        "num_beams" : 1,

        #"num_beam_groups" : 4,
    },
    **{
        f"sampling_topp_{str(topp).replace('.', '')}": {
            "max_new_tokens": 200,
            "do_sample": True,
            "num_return_sequences": 8,
            "top_p": 0.95,
        }
        for topp in [0.5, 0.8, 0.95, 0.99]
    },
}

# add base.csv config to all configs
for key, value in GENERATION_CONFIGS.items():
    GENERATION_CONFIGS[key] = {
        # "max_length": 2048,
        "min_length": 0,
        "early_stopping": True,
        **value,
    }


def prepare_dataset(dataset_path) -> Dataset:
    try:
        dataset = pd.read_csv(dataset_path)
    except:
        raise ValueError(f"Unknown dataset {dataset_path}")

    # make a dataset from the dataframe
    dataset = Dataset.from_pandas(dataset)

    return dataset


def evaluate_summarizer(
    model, tokenizer, dataset: Dataset, decoding_config, batch_size: int,
    device: str, trimming: bool
) -> Dataset:
    """
    @param model: The model used to generate the summaries
    @param tokenizer: The tokenizer used to tokenize the text and the summary
    @param dataset: A dataset with the text
    @param decoding_config: Dictionary with the decoding config
    @param batch_size: The batch size used to generate the summaries
    @return: The same dataset with the summaries added
    """
    # create a dataset with the text and the summary

    # create a dataloader
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=trimming)
    # generate summaries
    summaries = []
    print("Generating summaries...")

    for batch in tqdm(dataloader):
        text = batch["text"]

        inputs = tokenizer(
            text,
            max_length=1024,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        
        # move inputs to device
        inputs = {key: value.to(device) for key, value in inputs.items()}

        # generate summaries
        outputs = model.module.generate(
            **inputs,
            **decoding_config,
        )

        
        total_size = outputs.numel()  # Total number of elements in the tensor
        target_size = batch_size * outputs.shape[-1]  # Target size of the last dimension
        pad_size = (target_size - (total_size % target_size)) % target_size  # Calculate the required padding size to make the total number of elements divisible by the target size

        # Pad the tensor with zeros to make the total number of elements divisible by the target size
        if not trimming and pad_size != 0: outputs = torch.nn.functional.pad(outputs, (0, 0, 0, pad_size // outputs.shape[-1]))

        # output : (batch_size * num_return_sequences, max_length)
        try:
            outputs = outputs.reshape(batch_size, -1, outputs.shape[-1])
        except Exception as e:
            print(f"Error reshaping outputs: {e}")
            raise ValueError(f"Cannot reshape tensor of size {outputs.numel()} into shape "
                            f"({batch_size}, -1, {outputs.shape[-1]}).")
        
    
        
        # decode summaries
        for b in range(batch_size):
            summaries.append(
                [
                    tokenizer.decode(
                        outputs[b, i],
                        skip_special_tokens=True,
                    )
                    for i in range(outputs.shape[1])
                ]
            )

    # if trimming the last batch, remove them from the dataset
    if trimming: dataset = dataset.select(range(len(summaries)))
    
    # add summaries to the huggingface dataset
    dataset = dataset.map(lambda example: {"summary": summaries.pop(0)})
    
    return dataset


def sanitize_model_name(model_name: str) -> str:
    """
    Sanitize the model name to be used as a folder name.
    @param model_name: The model name
    @return: The sanitized model name
    """
    return model_name.replace("/", "_")

In [39]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"using {device} device")

# Fixed configuration
model_name = "facebook/bart-large-cnn"
dataset_path = "../data/processed/all_reviews_2017.csv"
decoding_config = "top_p_sampling" 
batch_size = 32
trimming = True
output_dir = "../data/candidates/" 
limit = None  # You can change this value
scripted_run = False

# Load the model
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.unk_token_id

# Multiple GPU
model = model.to(device)
if torch.cuda.device_count()>1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model=nn.DataParallel(model)

# Load the dataset
print("Loading dataset...")
dataset = prepare_dataset(dataset_path)

# Limit the number of samples
_lim = min(limit, len(dataset))
dataset = dataset.select(range(_lim))

# Generate summaries
dataset = evaluate_summarizer(
    model,
    tokenizer,
    dataset,
    GENERATION_CONFIGS[decoding_config],
    batch_size=batch_size,
    device=device,
    trimming=trimming,
)

df_dataset = dataset.to_pandas()
df_dataset = df_dataset.explode("summary").reset_index()
df_dataset['id_candidate'] = df_dataset.groupby(['index']).cumcount()

using cuda device
Using 2 GPUs!
Loading dataset...
Generating summaries...


100%|██████████| 100/100 [03:01<00:00,  1.82s/it]


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [40]:
# Save the dataset
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d-%H-%M-%S")
model_name_sanitized = sanitize_model_name(model_name)
padding_status = "trimmed" if trimming else "padded"

# save the dataset
# create output dir if it doesn't exist
output_dir = "../data/candidates/" 
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

df_dataset.to_csv(f"{output_dir}abstractive_summaries{year}.csv", index=False, encoding="utf-8")

## Compute RSA

In [3]:
# download model
model_name = "google/pegasus-arxiv"
#summaries = "/kaggle/working/candidates/extractive_summaries.csv"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"using {device} device")

# load the model and the tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Multiple GPU 
model=model.to(device)
if torch.cuda.device_count()>1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model=nn.DataParallel(model)


if "pegasus" in model_name: 
    tokenizer = PegasusTokenizer.from_pretrained(model_name)
else:
    tokenizer = AutoTokenizer.from_pretrained(model_name)

using cpu device


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-arxiv and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
output_path = "../output/quality/"

# load the summaries
summaries = parse_summaries(df_dataset)
n_articles = 'all_articles'
# # take first 5 summary candidates for each id
n_articles = 5
selected_articles = list(summaries.groupby("id").count().index[:n_articles])
mask = [summaries["id"][i] in selected_articles for i in range(len(summaries))]
selected_summaries = summaries[mask]
assert len(selected_summaries.groupby("id").count()) == n_articles, "Error in selecting articles!"
print(f"using a dataset with {len(selected_summaries.groupby('id').count())} articles")


# rerank the summaries
results = compute_rsa(selected_summaries, model, tokenizer, device)

results = {"results": results}
results["metadata/reranking_model"] = model_name
results["metadata/rsa_iterations"] = 3

print("Best Summaries generated succesfully!")

# save dataframe with base summaries
all_base_df = pd.DataFrame(results["results"])
base_df = all_base_df.loc[:,["id","best_rsa","gold"]]
display(base_df)
base_df.to_csv(f"{output_path}base_glimpse_{year}_{n_articles}samples.csv", index=False)

# to get back some gpu memory
torch.cuda.empty_cache()

# Quality RSA

In [None]:
class QualityModel(nn.Module, PyTorchModelHubMixin):
    def __init__(self, config):
        super(QualityModel, self).__init__()
        self.model = AutoModel.from_pretrained(config["base_model"])
        self.dropout = nn.Dropout(config["fc_dropout"])
        self.fc = nn.Linear(self.model.config.hidden_size, len(config["id2label"]))

    def forward(self, input_ids, attention_mask):
        features = self.model(
            input_ids=input_ids, attention_mask=attention_mask
        ).last_hidden_state
        dropped = self.dropout(features)
        outputs = self.fc(dropped)
        return torch.softmax(outputs[:, 0, :], dim=1)

#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
config = AutoConfig.from_pretrained("nvidia/quality-classifier-deberta")
tokenizer = AutoTokenizer.from_pretrained("nvidia/quality-classifier-deberta")
model = QualityModel.from_pretrained("nvidia/quality-classifier-deberta").to(device)
model.eval()
print("Model ready!")

In [20]:
base_df = pd.DataFrame(results["results"])
# list of speaker matrixes
mat_list = base_df.loc[:,"speaker_df"]
# new dataframe that will collect improved summaries 
new_df = base_df.loc[:,["id","best_rsa","gold"]]
new_df.rename(columns={"best_rsa":"base_rsa"},inplace=True)
new_df.insert(1, "best_rsa", "0", allow_duplicates=True)

# use LLM to generate new summaries
for i in tqdm(range(len(mat_list))):
    # all potential summaries (sentences in reviews)
    candidates_summ = list(mat_list[i].columns)
    # tokenize and evaluate summaries
    inputs = tokenizer(candidates_summ, return_tensors="pt", padding="longest", truncation=True).to(device)
    outputs = model(inputs["input_ids"], inputs["attention_mask"])
    
    quality_scores_raw = outputs.cpu().detach().numpy()
    # To get a quality score, we can sum the probability of "High" and 0.7 times the probability of "Medium". 
    # This is a simple heuristic to get a score between 0 and 1. 
    # "Medium" is multiplied by 0.7 to give it less importance than "High".
    quality_scores = quality_scores_raw[:,0] + 0.7 * quality_scores_raw[:,1]

    new_scores = mat_list[i] + quality_scores
    new_df.loc[i,"best_rsa"] = str(new_scores.idxmax(axis=1).values)

100%|██████████| 5/5 [00:29<00:00,  5.84s/it]


# Rouge Score

## Base Extractive RSA 

In [16]:
# input base_df.astype("str") to get base scores, new_df to get improved ones
metrics = evaluate_rouge(base_df.astype("str"))
df = pd.DataFrame.from_dict(metrics)
# scores for the base model
print("Base Glimpse Scores")
print(df)
df.mean(axis=0)

Base Glimpse Scores
     rouge1    rouge2    rougeL  rougeLsum
0  0.073394  0.000000  0.055046   0.073394
1  0.296296  0.025316  0.148148   0.148148
2  0.224900  0.024291  0.120482   0.160643
3  0.228070  0.035714  0.140351   0.175439
4  0.133163  0.025674  0.071703   0.089629


rouge1       0.191165
rouge2       0.022199
rougeL       0.107146
rougeLsum    0.129450
dtype: float64

## Quality-based Extractive RSA 

In [21]:
# input base_df.astype("str") to get base scores, new_df to get improved ones
metrics = evaluate_rouge(new_df.astype("str"))
df = pd.DataFrame.from_dict(metrics)
# scores for the base model
print("Base Glimpse Scores")
print(df)
df.mean(axis=0)

Base Glimpse Scores
     rouge1    rouge2    rougeL  rougeLsum
0  0.213836  0.025478  0.163522   0.201258
1  0.361111  0.042254  0.250000   0.291667
2  0.284722  0.034965  0.152778   0.201389
3  0.224852  0.047904  0.118343   0.177515
4  0.175000  0.037594  0.095000   0.120000


rouge1       0.251904
rouge2       0.037639
rougeL       0.155929
rougeLsum    0.198366
dtype: float64

## Base Abstractive RSA

In [46]:
# input base_df.astype("str") to get base scores, new_df to get improved ones
metrics = evaluate_rouge(base_df.astype("str"))
df = pd.DataFrame.from_dict(metrics)
# scores for the base model
print("Quality Abstractive RSA Scores")
print(df)
df.mean(axis=0)

Quality Abstractive RSA Scores
     rouge1    rouge2    rougeL  rougeLsum
0  0.525253  0.418367  0.454545   0.515152
1  0.307692  0.029126  0.192308   0.259615
2  0.220994  0.033520  0.110497   0.165746
3  0.107527  0.021739  0.086022   0.096774
4  0.018692  0.000000  0.018692   0.018692


rouge1       0.236032
rouge2       0.100550
rougeL       0.172413
rougeLsum    0.211196
dtype: float64

## Quality-based Abstractive RSA

In [49]:
# input base_df.astype("str") to get base scores, new_df to get improved ones
metrics = evaluate_rouge(new_df)
df = pd.DataFrame.from_dict(metrics)
# scores for the base model
print("Base Glimpse Scores")
print(df)
df.mean(axis=0)

Base Glimpse Scores
     rouge1    rouge2    rougeL  rougeLsum
0  0.587678  0.421053  0.483412   0.568720
1  0.315353  0.041841  0.190871   0.265560
2  0.220994  0.033520  0.110497   0.165746
3  0.101852  0.028037  0.074074   0.074074
4  0.013889  0.000000  0.013889   0.013889


rouge1       0.247953
rouge2       0.104890
rougeL       0.174549
rougeLsum    0.217598
dtype: float64

# Bartbert Score

## Base Extractive RSA 

In [18]:
display(base_df)
metrics = evaluate_bartbert(base_df.astype("str"))
# make a dataframe with the metric
df = pd.DataFrame(metrics)
# merge the metrics with the summaries
#bert_df = pd.concat([rouge_df,df], axis=1)
#bert_df
# base model bartbert scores
print(df)
df.mean(axis=0)

Unnamed: 0,id,best_rsa,gold
0,"(https://openreview.net/forum?id=B1-Hhnslg,)","[1)., Instead of considering each support poin...",The program committee appreciates the authors'...
1,"(https://openreview.net/forum?id=B1-q5Pqxl,)",[The paper looks at the problem of locating th...,This paper provides two approaches to question...
2,"(https://openreview.net/forum?id=B16Jem9xe,)",[I just noticed I submitted my review as a pre...,"Hello Authors, Congratulations on the accepta..."
3,"(https://openreview.net/forum?id=B16dGcqlx,)",[This paper proposed a novel adversarial frame...,pros: - new problem - huge number of experim...
4,"(https://openreview.net/forum?id=B184E5qee,)",[Unlike much related work in neural networks w...,Reviewers agree that this paper is based on a ...


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

   BERTScore
0   0.810492
1   0.819667
2   0.802348
3   0.814407
4   0.812400


BERTScore    0.811863
dtype: float64

## Quality Extractive RSA

In [22]:
display(new_df)
metrics = evaluate_bartbert(new_df.astype("str"))
# make a dataframe with the metric
df = pd.DataFrame(metrics)
# merge the metrics with the summaries
#bert_df = pd.concat([rouge_df,df], axis=1)
#bert_df
# base model bartbert scores
print(df)
df.mean(axis=0)

Unnamed: 0,id,best_rsa,base_rsa,gold
0,"(https://openreview.net/forum?id=B1-Hhnslg,)",['It seems to me when dealing with 1-shot case...,"[1)., Instead of considering each support poin...",The program committee appreciates the authors'...
1,"(https://openreview.net/forum?id=B1-q5Pqxl,)",['For this the paper proposes to combine two e...,[The paper looks at the problem of locating th...,This paper provides two approaches to question...
2,"(https://openreview.net/forum?id=B16Jem9xe,)",['A variant of least-squares likelihood estima...,[I just noticed I submitted my review as a pre...,"Hello Authors, Congratulations on the accepta..."
3,"(https://openreview.net/forum?id=B16dGcqlx,)",['This paper proposed a novel adversarial fram...,[This paper proposed a novel adversarial frame...,pros: - new problem - huge number of experim...
4,"(https://openreview.net/forum?id=B184E5qee,)",['The authors present a simple method to affix...,[Unlike much related work in neural networks w...,Reviewers agree that this paper is based on a ...


   BERTScore
0   0.817730
1   0.840635
2   0.809344
3   0.812275
4   0.818157


BERTScore    0.819628
dtype: float64

## Base Abstractive RSA

In [56]:
display(base_df)
metrics = evaluate_bartbert(base_df.astype("str"))
# make a dataframe with the metric
df = pd.DataFrame(metrics)
# merge the metrics with the summaries
#bert_df = pd.concat([rouge_df,df], axis=1)
#bert_df
# base model bartbert scores
print(df)
df.mean(axis=0)

Unnamed: 0,id,best_rsa,best_base,speaker_df,listener_df,initial_listener,language_model_proba_df,initial_consensuality_scores,consensuality_scores,gold,rationality,text_candidates
0,"(https://openreview.net/forum?id=B1jnyXXJx,)",[Paleo-Ferrari-Barrini (2015) a regularizer th...,[Paleo-Ferrari-Barrini (2015) a regularizer th...,...,...,...,...,This article looks at an optimization method u...,This article looks at an optimization method u...,The paper proposes a method for accelerating o...,3,index ...
1,"(https://openreview.net/forum?id=BJ46w6Ule,)",[The paper addresses the problem of learning c...,[The paper addresses the problem of learning c...,...,...,...,...,The paper addresses the problem of learning co...,The paper addresses the problem of learning co...,This paper is about learning distributed repre...,3,index ...
2,"(https://openreview.net/forum?id=BJO-BuT1g,)",[This paper addresses the problem of efficient...,[This paper addresses the problem of efficient...,...,...,...,...,This paper addresses the problem of efficient ...,This paper addresses the problem of efficient ...,The reviewers (two of whom stated maximum conf...,3,index ...
3,"(https://openreview.net/forum?id=Bk8N0RLxx,)",[This paper conducts a series of experiments o...,[This paper conducts a series of experiments o...,...,...,...,...,This paper conducts a series of experiments on...,This paper conducts a series of experiments on...,The reviewers agree that the method is excitin...,3,index ...
4,"(https://openreview.net/forum?id=BkCPyXm1l,)",[The paper introduced a regularization scheme ...,[The paper introduced a regularization scheme ...,...,...,...,...,Cons: Bad baseline results. Poor consistency w...,Cons: Bad baseline results. Poor consistency w...,The reviewers unanimously recommend rejection.,3,index ...


   BERTScore
0   0.886830
1   0.831309
2   0.822337
3   0.830322
4   0.825558


BERTScore    0.839271
dtype: float64

## Quality Abstractive RSA

In [None]:
display(new_df)
metrics = evaluate_bartbert(new_df.astype("str"))
# make a dataframe with the metric
df = pd.DataFrame(metrics)
# merge the metrics with the summaries
#bert_df = pd.concat([rouge_df,df], axis=1)
#bert_df
# base model bartbert scores
print(df)
df.mean(axis=0)

Unnamed: 0,id,best_rsa,base_rsa,gold
0,"(https://openreview.net/forum?id=B1jnyXXJx,)",['The method is inspired from physics and uses...,[Paleo-Ferrari-Barrini (2015) a regularizer th...,The paper proposes a method for accelerating o...
1,"(https://openreview.net/forum?id=BJ46w6Ule,)",['The paper addresses the problem of learning ...,[The paper addresses the problem of learning c...,This paper is about learning distributed repre...
2,"(https://openreview.net/forum?id=BJO-BuT1g,)",['This paper addresses the problem of efficien...,[This paper addresses the problem of efficient...,The reviewers (two of whom stated maximum conf...
3,"(https://openreview.net/forum?id=Bk8N0RLxx,)",['This paper conducts a series of experiments ...,[This paper conducts a series of experiments o...,The reviewers agree that the method is excitin...
4,"(https://openreview.net/forum?id=BkCPyXm1l,)",['The paper introduced a regularization scheme...,[The paper introduced a regularization scheme ...,The reviewers unanimously recommend rejection.


   BERTScore
0   0.895531
1   0.840526
2   0.822337
3   0.828523
4   0.821937


BERTScore    0.841771
dtype: float64