# String Similarity

In [None]:
!pip install -q "tensorflow-text==2.8.*"

[K     |████████████████████████████████| 4.9 MB 7.7 MB/s 
[K     |████████████████████████████████| 462 kB 14.3 MB/s 
[?25h

In [None]:
import tensorflow as tf
import tensorflow_text as text

In [None]:
# convert the answer to string, return metrics in last step: return []


In [None]:
# Preprocessing 
def normalize_text(s):
    """Removing articles and punctuation, and standardizing whitespace are all typical text processing steps."""
    import string, re

    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

In [None]:
# example
predictions = tf.ragged.constant([['captain', 'of', 'the', 'delta', 'flight']])
gold_answers = tf.ragged.constant([['delta', 'air', 'lines', 'flight']])

In [None]:
result = text.metrics.rouge_l(predictions, gold_answers)
print('F-Measure: %s' % result.f_measure)
print('P-Measure: %s' % result.p_measure)
print('R-Measure: %s' % result.r_measure)

F-Measure: tf.Tensor([0.44444448], shape=(1,), dtype=float32)
P-Measure: tf.Tensor([0.4], shape=(1,), dtype=float32)
R-Measure: tf.Tensor([0.5], shape=(1,), dtype=float32)


Choose the alpha

In [None]:
# Compute ROUGE-L with alpha=0
result = text.metrics.rouge_l(predictions, gold_answers, alpha=0)
print('F-Measure (alpha=0): %s' % result.f_measure)
print('P-Measure (alpha=0): %s' % result.p_measure)
print('R-Measure (alpha=0): %s' % result.r_measure)
result = text.metrics.rouge_l(predictions, gold_answers, alpha=1)
print('F-Measure (alpha=1): %s' % result.f_measure)
print('P-Measure (alpha=1): %s' % result.p_measure)
print('R-Measure (alpha=1): %s' % result.r_measure)

F-Measure (alpha=0): tf.Tensor([0.5 0.5], shape=(2,), dtype=float32)
P-Measure (alpha=0): tf.Tensor([0.4       0.6666667], shape=(2,), dtype=float32)
R-Measure (alpha=0): tf.Tensor([0.5 0.5], shape=(2,), dtype=float32)
F-Measure (alpha=1): tf.Tensor([0.4       0.6666667], shape=(2,), dtype=float32)
P-Measure (alpha=1): tf.Tensor([0.4       0.6666667], shape=(2,), dtype=float32)
R-Measure (alpha=1): tf.Tensor([0.5 0.5], shape=(2,), dtype=float32)


# Alternative: Exact Match and F1



In [None]:
def compute_exact_match(prediction, truth):
    return int(normalize_text(prediction) == normalize_text(truth))

def compute_f1(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()
    
    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)
    
    common_tokens = set(pred_tokens) & set(truth_tokens)
    
    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
        return 0
    
    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)
    
    return 2 * (prec * rec) / (prec + rec)

def get_gold_answers(example):
    """helper function that retrieves all possible true answers from a squad2.0 example"""
    
    gold_answers = [answer["text"] for answer in example.answers if answer["text"]]

    # if gold_answers doesn't exist it's because this is a negative example - 
    # the only correct answer is an empty string
    if not gold_answers:
        gold_answers = [""]
        
    return gold_answers

In [None]:
predictions = []
gold_answers = []

In [None]:
em_score = max((compute_exact_match(predictions, gold_answers) for answer in gold_answers)
f1_score = max((compute_f1(prediction, answer)) for answer in gold_answers)


print(f"EM: {em_score} \t F1: {f1_score}")