# Group semantically equivalent answers into bins for each question
Model: Deberta (https://huggingface.co/sileod/deberta-v3-large-tasksource-nli)

In [37]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import yaml
import os
import pickle
import glob
from tqdm import tqdm

with open("config.yaml", "r") as f:
    config = yaml.safe_load(f)

In [4]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

In [13]:
model_dir = config["model_dir"]
save_path = config["path_to_saved_generations"]

In [8]:
model_name = "sileod/deberta-v3-large-tasksource-nli"
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=model_dir)
model = AutoModelForSequenceClassification.from_pretrained(model_name, cache_dir=model_dir).to(device)

In [16]:
def load_pickle_files(folder):
    data_groups = []
    pickle_files = glob.glob(f"{folder}/group*.pkl")
    for pickle_file in pickle_files:
        with open(pickle_file, "rb") as f:
            data_groups.append(pickle.load(f))

    return data_groups

## Example
Two answers are semantically equivalent if 
- "Question: *question* Answer: *generated answer*" $\Rightarrow$ "Question: *question* Answer: *true answer*" **and** 
- "Question: *question* Answer: *true answer*" $\Rightarrow$ "Question: *question* Answer: *generated answer*"

In [11]:
question = "Question: How many continents does the world have?"
sequence1 = "Answer: There are seven continents."
sequence2 = "Answer: Seven."

# Direction 1
premise = question + " " + sequence1
hypothesis = question + " " + sequence2
print(premise + " => " + hypothesis)
input = tokenizer(premise, hypothesis, return_tensors="pt")
output = model(input["input_ids"].to(device))
prediction = torch.softmax(output["logits"][0], -1).tolist()
label_names = ["entailment", "neutral", "contradiction"]
prediction = {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, label_names)}
print(prediction, end="\n\n")

# Direction 2
premise = question + " " + sequence2
hypothesis = question + " " + sequence1
print(premise + " => " + hypothesis)
input = tokenizer(premise, hypothesis, return_tensors="pt")
output = model(input["input_ids"].to(device))
prediction = torch.softmax(output["logits"][0], -1).tolist()
prediction = {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, label_names)}
print(prediction)

Question: How many continents does the world have? Answer: There are seven continents. => Question: How many continents does the world have? Answer: Seven.
{'entailment': 99.4, 'neutral': 0.6, 'contradiction': 0.0}

Question: How many continents does the world have? Answer: Seven. => Question: How many continents does the world have? Answer: There are seven continents.
{'entailment': 97.1, 'neutral': 2.9, 'contradiction': 0.0}


## Save lexical equivalence groups
Used to calculate predictive entropy

Structure to save them
```python 
{ 1131: {"question": ..., 
         "true_answer": ..., 
         "temperature_0.25": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [0, 1, 0, 2, 3, ...]},
         "temperature_0.5": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...]},
         "temperature_1": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...]},
         "temperature_1.5": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...]},
         "beam_20": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...]}
        }, 
  4295: ...
}
```
where the numbers in lexical_eq_groups stand for the group they belong to.

In [36]:
data = load_pickle_files(save_path)
config_keys = [f"temperature_{t}" for t in config["temperatures"]] + [f"beam_{b}" for b in config["n_beams"]]

In [38]:
for group_nr, group in enumerate(data):
    for question_id in tqdm(group):
        for k in config_keys:
            answers = group[question_id][k]["answers"]
            lexical_eq_groups = [-1] * len(answers)
            group_count = 0

            for i in range(len(answers)):
                if lexical_eq_groups[i] == -1:
                    lexical_eq_groups[i] = group_count
                    for j in range(i + 1, len(answers)):
                        if answers[i] == answers[j]:
                            lexical_eq_groups[j] = group_count
                    group_count += 1
            group[question_id][k]["lexical_eq_groups"] = lexical_eq_groups

    # Save result
    with open(os.path.join(save_path, f"group{group_nr}.pkl"), "wb") as f:
        pickle.dump(group, f)

100%|██████████| 1000/1000 [00:00<00:00, 16643.20it/s]
100%|██████████| 1000/1000 [00:00<00:00, 22476.31it/s]
100%|██████████| 1000/1000 [00:00<00:00, 22723.87it/s]
100%|██████████| 1000/1000 [00:00<00:00, 22207.71it/s]
100%|██████████| 1000/1000 [00:00<00:00, 21737.55it/s]


## Bidirectional Entailment Clustering
Used to calculate semantic entropy

Structure to save them
```python 
{ 1131: {"question": ..., 
         "true_answer": ..., 
         "temperature_0.25": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...], "semantic_eq_groups": [...]},
         "temperature_0.5": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...], "semantic_eq_groups": [...]},
         "temperature_1": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...], "semantic_eq_groups": [...]},
         "temperature_1.5": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...], "semantic_eq_groups": [...]},
         "beam_20": {"answers": [...], "probabilities": [...], "length_output": [...], "lexical_eq_groups": [...], "semantic_eq_groups": [...]}
        }, 
  4295: ...
}
```
where the numbers in semantic_eq_groups stand for the semantic equivalence class they belong to.

The algo is written as pseudocode on page 15

In [47]:
def bidirectional_entailment(question, answer1, answer2):
    """
    Tests whether bidirectional entailment Question answer1 <=> Question answer2 holds
    :return: True for bidirectional entailment, False otherwise
    """
    true_answer = "Answer: " + answer1
    question = "Question: " + question
    generated_answer = "Answer: " + answer2

    # First direction
    premise = question + " " + generated_answer
    hypothesis = question + " " + true_answer
    input = tokenizer(premise, hypothesis, return_tensors="pt")
    output = model(input["input_ids"].to(device))
    prediction = torch.argmax(output["logits"][0], dim=-1).item()  # 0: entail, 1: neutral, 2: contradiction

    # Only if first direction entailment: look at second direction
    if prediction == 0:
        input = tokenizer(hypothesis, premise, return_tensors="pt")
        output = model(input["input_ids"].to(device))
        prediction = torch.argmax(output["logits"][0], dim=-1).item()
        if prediction == 0:
            return True

    return False

In [51]:
def bidirectional_entailment_clustering(question, answers):
    # Save as list as order is needed ("Use first sequence for each semantic-class")
    # Only save indices of answers
    meanings = [[0]]

    for m in range(1, len(answers)):
        added = False
        for equivalence_class_nr, equivalence_class in enumerate(meanings):
            # Use first sequence for each semantic class
            s_c = answers[equivalence_class[0]]
            if bidirectional_entailment(question, s_c, answers[m]):
                meanings[equivalence_class_nr].append(m)
                added = True
                break
        if not added:
            meanings.append([m])
    
    return meanings

In [52]:
data = load_pickle_files(save_path)
config_keys = [f"temperature_{t}" for t in config["temperatures"]] + [f"beam_{b}" for b in config["n_beams"]]

In [None]:
for group_nr, group in enumerate(data):
    for question_id in tqdm(group):
        for k in config_keys:
            answers = group[question_id][k]["answers"]
            question = group[question_id]["question"]
            semantic_eq_groups = [-1] * len(answers)

            semantic_clusters = bidirectional_entailment_clustering(question, answers)
            for cluster_id, cluster in enumerate(semantic_clusters):
                for index in cluster:
                    semantic_eq_groups[index] = cluster_id
          
            group[question_id][k]["semantic_eq_groups"] = semantic_eq_groups
            
            print(answers)
            print(semantic_eq_groups)
    # Save result
    #with open(os.path.join(save_path, f"group{group_nr}.pkl"), "wb") as f:
    #    pickle.dump(group, f)
    

  0%|          | 0/1000 [00:00<?, ?it/s]