In [5]:
import evaluate

def normalize_text(s):
    """Removing articles and punctuation, and standardizing whitespace are all typical text processing steps."""
    import string, re

    def remove_articles(text):
        regex = re.compile(r"\b(el|los|las|las)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def f1(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()
    
    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)
    
    common_tokens = set(pred_tokens) & set(truth_tokens)
    
    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
        return 0
    
    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)
    
    return 2 * (prec * rec) / (prec + rec)

bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")

2024-11-17 12:14:57.443972: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-17 12:14:57.444197: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-17 12:14:57.506266: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-17 12:14:57.662327: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
import pandas as pd

dataset = pd.read_csv('../data/qa_dataset_v2.csv')

references = dataset['Respuesta'].tolist()
predictions = dataset['Respuesta modelo'].tolist()

references = [normalize_text(ref) for ref in references]
predictions = [normalize_text(pred) for pred in predictions]

In [16]:
from tqdm import tqdm 

scores = {'bleu': [], 'rouge': [], 'f1': []}

for ref, pred in tqdm(zip(references, predictions), total=len(references)):
    bleu_score = bleu.compute(references=[ref], predictions=[pred])
    rouge_score = rouge.compute(references=[ref], predictions=[pred])
    f1_score = f1(pred, ref)
    
    scores['bleu'].append(bleu_score)
    scores['rouge'].append(rouge_score)
    scores['f1'].append(f1_score)

100%|██████████| 198/198 [00:44<00:00,  4.41it/s]
