In [None]:
import pandas as pd
import pickle
import Levenshtein
import evaluate
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from accelerate import Accelerator
import torch
import numpy as np


accelerator = Accelerator(cpu=False)
torch.cuda.empty_cache()
print("-------Device:", accelerator.device)

In [None]:
def format_floats(float_list):
    return [float(f"{num:.3f}") for num in float_list]

model_sentence_transformers = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v1')
model_sentence_transformers = model_sentence_transformers.to(accelerator.device) 
def compute_metrics(reference_sentence, sentence_to_compare):
    embed_pred = model_sentence_transformers.encode([sentence_to_compare], convert_to_tensor=True)
    embed_reference = model_sentence_transformers.encode([reference_sentence], convert_to_tensor=True)

    # Compute cosine similarity
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    sen_trans_score = cos(embed_pred, embed_reference)
    cosine_score = tuple(sen_trans_score.detach().cpu().numpy())

    rouge = evaluate.load('rouge')
    rouge_results = rouge.compute(predictions=[sentence_to_compare], references=[reference_sentence], use_aggregator=False)
    rouge_L = rouge_results["rougeL"][0]
    rouge_1 = rouge_results["rouge1"][0]
    
    ref = reference_sentence
    comp = sentence_to_compare
    length = max(len(ref), len(comp))
    ed = (length - Levenshtein.distance(ref, comp)) / length  
    ref_set, comp_set = set(ref.split()), set(comp.split())
    jac = float(len(ref_set & comp_set)) / len(ref_set | comp_set)
        
    return format_floats([cosine_score, rouge_1, rouge_L, ed, jac])


In [None]:
def calculate_scores(csv_file_path, output_pickle_path, start_idx=0, end_idx=None, checkpoint_interval=500):
    # Load the combined CSV file
    df = pd.read_csv(csv_file_path)

    # Determine the end index if not provided
    if end_idx is None:
        end_idx = len(df)

    # Initialize a list to hold all other scores
    all_other_scores = []

    for idx in tqdm(range(start_idx, end_idx), desc="Processing rows: "):
        row_other_scores = []
        
        reference_para = []
        generated_para = []
        
        for i in range(45): # should be max 45 sentences
            sentence_col = f"Sentence_{i}"
            generated_col = f"Generated_{i}"

            if sentence_col in df.columns and generated_col in df.columns:
                reference_sentence = df.at[idx, sentence_col]
                generated_sentence = df.at[idx, generated_col]

                if str(reference_sentence) != "nan" and str(generated_sentence) != "nan":
                    reference_para.append(reference_sentence)
                    generated_para.append(generated_sentence)

                    ref_combined = "".join(reference_para)
                    gen_combined = "".join(generated_para)

                    scores = compute_metrics(ref_combined, gen_combined)
                    row_other_scores.append(scores)
        
        all_other_scores.append(row_other_scores)

        # Save the checkpoint
        if (idx + 1) % checkpoint_interval == 0:
            with open(output_pickle_path, 'wb') as checkpoint_file:
                pickle.dump(all_other_scores, checkpoint_file)

    # Save the final results to the pickle file
    with open(output_pickle_path, 'wb') as output_file:
        pickle.dump(all_other_scores, output_file)

    print(f"Results saved to {output_pickle_path}")

calculate_scores("splitted_paragraph_filtered.csv", "scores.pkl") # TODO:


In [None]:
# here you can run statistical analysis on the scores
def read_and_print_pickle(pickle_file_path):
    # Load the pickle file
    with open(pickle_file_path, 'rb') as file:
        data = pickle.load(file)
    
    # Print the contents of the pickle file
    firsts = []
    for idx, row in enumerate(data):
        firsts.append(row[0])
        # print(f"Row {idx}: {row}")
    print(f"Percentage of numbers above 0.5: {sum(1 for number in firsts if number > 0.5) / len(firsts) * 100:.2f}%")
    print(f"Percentage of numbers above 0.9: {sum(1 for number in firsts if number > 0.9) / len(firsts) * 100:.2f}%")

read_and_print_pickle("scores.pkl")


In [None]:
# this function was used to calculate the table in the paper:
def calculate_statistics(data, k):
    # Filter data for sentence numbers less than k
    filtered_data = [d for d in data if d["sentence_number"] <= k]
    
    # Initialize dictionaries to store metrics
    metrics = {"COSINE": [], "R1": [], "RL": [], "ED": [], "J": []}
    
    # Populate the metrics dictionaries
    for d in filtered_data:
        for metric in metrics:
            metrics[metric].append(d[metric])
    
    # Calculate mean, std, and percentage of scores above 0.9 and 1
    statistics = {}
    if k != 45:
        statistics["sentence_number"] = k
    else:
        statistics["sentence_number"] = "ALL"
    for metric, values in metrics.items():
        if values:  # Check if there are values to avoid division by zero
            mean_value = np.mean(values)
            std_value = np.std(values)
            pct_above_0_9 = np.sum(np.array(values) >= 0.9) / len(values) * 100
            pct_above_1 = np.sum(np.array(values) >= 1) / len(values) * 100
        else:
            mean_value = std_value = pct_above_0_9 = pct_above_1 = 0
        if metric == "COSINE":
            pct_above_05 = np.sum(np.array(values) >= 0.5) / len(values) * 100
            statistics[metric] = {
                f'{metric} mean +- std' : fr'$ {mean_value:.2f} \pm {std_value:.2f} $',
                f"{metric} >= 0.9": pct_above_0_9,
                f"{metric} >= 1.0": pct_above_1, 
                f"{metric} >= 0.5": pct_above_05
            }
        else:
            statistics[metric] = {
                f'{metric} mean +- std' : fr'$ {mean_value:.2f} \pm {std_value:.2f} $',
                f"{metric} >= 0.9": pct_above_0_9,
                f"{metric} >= 1.0": pct_above_1
            }
    
    return statistics

def make_dict(Index, sentenceNumber, cosine, R1, RL, ED, J):
    return {"Paragraph": Index, "sentence_number": sentenceNumber, "COSINE": cosine, "R1": R1, "RL": RL, "ED": ED, "J": J}

scores = pickle.load(open("scores.pkl", "rb"))
total_scores = []
for index in range(len(scores)):
    for sentenceNumber in range(len(scores[index])):
        total_scores.append(make_dict(index, sentenceNumber + 1, scores[index][sentenceNumber][0], scores[index][sentenceNumber][1], 
                                      scores[index][sentenceNumber][2], scores[index][sentenceNumber][3], scores[index][sentenceNumber][4]))
rows = []
for k in [1,2,3,5,10,20,45]:
    rows.append(calculate_statistics(total_scores, k))

print(rows)
table_df = pd.DataFrame(rows)
table_df.to_csv("table_for_paper_all_scores.csv", index=False)
