In [None]:
import evaluate
import datasets
import tqdm
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from utils import eval_score
import pandas as pd
from hierarchy_node import HierarchyNode
import summarization
import numpy as np

rouge = evaluate.load('rouge')

base_dir = '/var/scratch/mca305'
model_name = 'ROBERTA_BASE'
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")
model = AutoModelForSequenceClassification.from_pretrained("FacebookAI/roberta-base", output_hidden_states=True)

embedding_dir = f'embeddings_{model_name}'
data_dir = f'data_{model_name}'

eval_methods = {
    'Method': [],
    'Summary': [],
    'Label': [],
    'rouge1': [],
    'rouge2': [],
    'rougeL': [],
    'rougeLsum': []
}
cnt = 0

small_DS = datasets.load_from_disk(f"{base_dir}/{data_dir}")
#small_DS = cnn_data_validation.filter(lambda x: x['id'] in small_DS['article_id'])
# TODO remove ?
#small_DS = small_DS.map(split_in_sents, remove_columns=small_DS.column_names, batched=True)
small_DS = datasets.load_from_disk(f"{base_dir}/{embedding_dir}")

print('Embeddings extracted... now calculating STs', flush=True)
unique_ids = list(set(small_DS['article_id']))

In [None]:
def eval_score(sents, embs, summary, rouge):
    references = [summary.strip().replace('\n', '')]

    #embs = np.array(embs).reshape(-1, 1)
    hierarchy = HierarchyNode(embs)
    hierarchy.calculate_persistence()
    adjacency = hierarchy.h_nodes_adj
    #n_leaves = np.min(list(adjacency.keys()))
    #n_nodes = np.max(list(adjacency.keys())) + 1
    trimming_summary, trimmed, important = summarization.get_hierarchy_summary_ids(embs)


    summary_1 = summarization.get_k_center_summary(summary_length=len(trimming_summary), embs=embs, sents=sents)
    summary_1 = ['.'.join(summary_1)]

    output = rouge.compute(predictions=summary_1, references=references)


    summary_2 = ['.'.join(summarization.get_hierarchy_summary(embs, sents)[0])]
    output_2 = rouge.compute(predictions=summary_2, references=references)

    summary_random = ['.'.join(np.random.choice(sents, 5))]
    output_3 = rouge.compute(predictions=summary_random, references=references)

    summary_4 = ['.'.join(summarization.get_k_center_summary_after_trimming(summary_length=len(trimming_summary), adjacency=adjacency, embs=embs, sents=sents, trimmed=trimmed))]
    output_4 = rouge.compute(predictions=summary_4, references=references)


    return output, output_2, output_3, output_4, (summary_1, summary_2, summary_random, summary_4)

In [None]:

for u_id in tqdm(unique_ids):
    current_ds = small_DS.filter(lambda x: x['article_id'] == u_id)

    sents = current_ds['sentence']
    summary = current_ds['highlight'][0]

    # Eval scores BERT
    k_center, stl_k_center, random_e, k_center_trim, summaries = eval_score(sents, current_ds['embeddings'], summary, rouge)


    ## Eval scores ROBERTA
    print(f'{cnt} out of {len(unique_ids)}', flush=True)

df = pd.DataFrame.from_dict(eval_methods)
df.to_csv(f'scoring_{model_name}.csv')