# InfoLM

NLG evaluation metric introduced in [InfoLM: A New Metric to Evaluate Summarization & Data2Text Generation
](https://arxiv.org/abs/2112.01589) by Pierre Colombo, Chloe Clavel, Pablo Piantanida.

## Settings

In [1]:
import os

import torch
from nlg_eval_via_simi_measures.infolm import InfoLM

os.environ["TOKENIZERS_PARALLELISM"]="false"

In [2]:
MODEL_NAME = "google/bert_uncased_L-2_H-128_A-2"

## Test config

In [3]:
HYPOTHESIS = [
    "It is a guide to action which ensures that the military always obeys the commands of the party",
    "he read the book because he was interested in world history",
    "he read the book because he was interested in world history",
    "the cat the   cat on the mat ",
]

REFERENCES = [
    "It is a guide to action that ensures that the military will forever heed Party commands",
    "he was interested in world history because he read the book",
    "he was interested in world history because he read the book",
    "the  cat is     on the mat ",
]

In [4]:
configs = [
    ("kl_divergence", False, 0.25, 0.25),
    ("alpha_divergence", True, 0.4, 0.3),
    ("beta_divergence", False, None, 0.6),
    ("ab_divergence", True, 0.25, 0.25),
    ("renyi_divergence", False, 0.3, 0.1),
    ("l1_distance", True, None, None),
    ("l2_distance", False, None, None),
    ("linf_distance", True, None, None),
    ("fisher_rao_distance", False, 0.25, 0.25),
]

## Generate test results

In [5]:
results = {}

for config in configs:
    # Reference metric
    measure = config[0].replace("_divergence", "").replace("_distance", "")
    info_lm = InfoLM(
        model_name=MODEL_NAME,
        measure_to_use=measure,
        use_idf_weights=config[1],
        alpha=config[2],
        beta=config[3],
    )
    
    if config[1] == True:
        # batch_size = 2
        idf_hyp, idf_ref = info_lm.prepare_idfs(HYPOTHESIS[:2], REFERENCES[:2])
        res1 = info_lm.evaluate_batch(HYPOTHESIS[:2], REFERENCES[:2], idf_hyp, idf_ref)[measure]
        idf_hyp, idf_ref = info_lm.prepare_idfs(HYPOTHESIS[2:], REFERENCES[2:])
        res2 = info_lm.evaluate_batch(HYPOTHESIS[2:], REFERENCES[2:], idf_hyp, idf_ref)[measure]
        results[f"{config[0]}_bs2"] = torch.tensor(res1 + res2)
        # batch_size = 4
        idf_hyp, idf_ref = info_lm.prepare_idfs(HYPOTHESIS, REFERENCES)
        res = info_lm.evaluate_batch(HYPOTHESIS, REFERENCES, idf_hyp, idf_ref)[measure]
        results[f"{config[0]}_bs4"] = torch.tensor(res)
    else:
        res = info_lm.evaluate_batch(HYPOTHESIS, REFERENCES)[measure]
        results[config[0]] = torch.tensor(res)

Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  dict_logits_distribution[str(self.temperature)] = torch.nn.Softmax()(
Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertForMaskedLM: ['cls.seq_relat

In [6]:
results

{'kl_divergence': tensor([-2.5192, -0.0989, -0.0989, -2.1052]),
 'alpha_divergence_bs2': tensor([-1.2924, -0.1424, -0.1424, -1.4661]),
 'alpha_divergence_bs4': tensor([-1.3036, -0.1424, -0.1424, -1.6018]),
 'beta_divergence': tensor([0.5291, 0.0597, 0.0597, 0.3080]),
 'ab_divergence_bs2': tensor([5.9517, 0.5222, 0.5222, 7.0017]),
 'ab_divergence_bs4': tensor([5.9565, 0.5222, 0.5222, 7.1950]),
 'renyi_divergence': tensor([0.4651, 0.0425, 0.0425, 0.4088]),
 'l1_distance_bs2': tensor([0.9679, 0.1877, 0.1877, 0.9561]),
 'l1_distance_bs4': tensor([0.9591, 0.1877, 0.1877, 1.0823]),
 'l2_distance': tensor([0.2053, 0.1114, 0.1114, 0.2522]),
 'linf_distance_bs2': tensor([0.0789, 0.0869, 0.0869, 0.2324]),
 'linf_distance_bs4': tensor([0.0777, 0.0869, 0.0869, 0.2614]),
 'fisher_rao_distance': tensor([1.5637, 0.4957, 0.4957, 1.4570])}