In [1]:
import os
import random
import numpy as np
import torch
import evaluate
import json
from datasets import load_metric

# Please make sure you are using CUDA enabled GPU for this project
device = 'cuda'

# Setting the seed value ensures that the results are reproducible across different runs
seed_val = 10

# Ensuring that the seed is set for Python's hashing, random operations, NumPy, and PyTorch
os.environ['PYTHONHASHSEED'] = str(seed_val)
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)


<torch._C.Generator at 0x7d5f29f3f450>

In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Tokenizer for the given model
# Since we will be using the same tokenizer for other notebooks, we will save it in the cache directory
generation_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b")

# Using Microsoft DeBERTa model for the generation of similarities
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-large-mnli")
model = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-large-mnli").cuda()

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
responses = json.loads(open("data/activations/generations_and_judgments_with_sae_20.json").read())

In [4]:
# Using Rouge to evaluate syntactic similarity for our datasets (coQA)
rouge_metric = evaluate.load('rouge')

result_dict = {}

# METEOR metric can be used for evaluating summarization tasks (useful for some time of datasets but not for ours)
meteor = load_metric('meteor')

deberta_predictions = []

  meteor = load_metric('meteor')
[nltk_data] Downloading package wordnet to /home/mpapucci/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/mpapucci/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/mpapucci/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [5]:
from tqdm import tqdm

for sample_idx in tqdm(responses):
    sample = responses[sample_idx]
    # Getting data from the sample
    # Please run the cleaner notebook before running this code
    question_text = sample['prompt']
    generated_texts = sample['generations']
    sample_id = sample_idx
    unique_responses = list(set(generated_texts))

    # Variables for semantic similarity analysis
    answer_pairs_1 = []
    answer_pairs_2 = []
    has_semantically_different_answers = False
    syntactic_similarities = {rouge_type: 0.0 for rouge_type in ['rouge1', 'rouge2', 'rougeL']}
    semantic_set_ids = {answer: index for index, answer in enumerate(unique_responses)}

    # print('No of unique answers:', len(unique_responses))
    per_iter_deberta_predictions = []
    # Evalauate semantic similarity if there are more than 1 unique answers
    if len(unique_responses) > 1:
        
        for i, reference_answer in enumerate(unique_responses):
            for j in range(i + 1, len(unique_responses)):

                answer_pairs_1.append(unique_responses[i])
                answer_pairs_2.append(unique_responses[j])

                # Create input pairs and encode them
                input_pair = question_text + ' ' + unique_responses[i] + ' [SEP] ' + unique_responses[j]
                encoded_input = tokenizer.encode(input_pair, padding=True)
                prediction = model(torch.tensor([encoded_input], device='cuda'))['logits']
                predicted_label = torch.argmax(prediction, dim=1)

                # Reverse the input pair and encode
                reverse_input_pair = question_text + ' ' + unique_responses[j] + ' [SEP] ' + unique_responses[i]
                encoded_reverse_input = tokenizer.encode(reverse_input_pair, padding=True)
                reverse_prediction = model(torch.tensor([encoded_reverse_input], device='cuda'))['logits']
                reverse_predicted_label = torch.argmax(reverse_prediction, dim=1)

                # Determine semantic similarity
                deberta_prediction = 0 if 0 in predicted_label or 0 in reverse_predicted_label else 1
                if deberta_prediction == 0:
                    has_semantically_different_answers = True
                else:
                    semantic_set_ids[unique_responses[j]] = semantic_set_ids[unique_responses[i]]

                deberta_predictions.append([unique_responses[i], unique_responses[j], deberta_prediction])
                per_iter_deberta_predictions.append([i, j, deberta_prediction])

        # Evalauate syntactic similarity
        results = rouge_metric.compute(predictions=answer_pairs_1, references=answer_pairs_2)
        for rouge_type in syntactic_similarities.keys():
            syntactic_similarities[rouge_type] = results[rouge_type]
    
    # Store the results in the result dictionary
    result_dict[sample_id] = {
        'syntactic_similarities': syntactic_similarities,
        'has_semantically_different_answers': has_semantically_different_answers,
        'semantic_set_ids': [semantic_set_ids[x] for x in generated_texts],
        'deberta_predictions': per_iter_deberta_predictions
    }

100%|██████████| 817/817 [04:19<00:00,  3.15it/s]


In [6]:
result_dict

{'0': {'syntactic_similarities': {'rouge1': 0.35718665245671083,
   'rouge2': 0.1858534419054049,
   'rougeL': 0.30323490901194716},
  'has_semantically_different_answers': True,
  'semantic_set_ids': [0, 1, 0, 0, 0],
  'deberta_predictions': [[0, 1, 0],
   [0, 2, 1],
   [0, 3, 1],
   [0, 4, 1],
   [1, 2, 0],
   [1, 3, 1],
   [1, 4, 0],
   [2, 3, 1],
   [2, 4, 1],
   [3, 4, 1]]},
 '1': {'syntactic_similarities': {'rouge1': 0.33339465226060705,
   'rouge2': 0.11953238155204204,
   'rougeL': 0.23368953400639764},
  'has_semantically_different_answers': True,
  'semantic_set_ids': [0, 0, 0, 0, 0],
  'deberta_predictions': [[0, 1, 1],
   [0, 2, 1],
   [0, 3, 1],
   [0, 4, 1],
   [1, 2, 1],
   [1, 3, 1],
   [1, 4, 1],
   [2, 3, 1],
   [2, 4, 1],
   [3, 4, 0]]},
 '2': {'syntactic_similarities': {'rouge1': 0.26870225820593996,
   'rouge2': 0.11469313015498303,
   'rougeL': 0.23075316959522602},
  'has_semantically_different_answers': True,
  'semantic_set_ids': [0, 0, 0, 0, 0],
  'deberta_pre

In [7]:
import csv 

with open('data/activations/deberta_predictions_20.csv', "w", encoding="UTF8") as f:
    writer = csv.writer(f)
    
    # Writing header row and deberta predictions for the CSV file
    writer.writerow(['qa_1', 'qa_2', 'prediction'])
    writer.writerows(deberta_predictions)

In [8]:
len(deberta_predictions)

8170

In [9]:
with open ('data/activations/similarity_scores_20.json', "w") as f:
    f.write(json.dumps(result_dict))