In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [3]:
model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.26k [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.07k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

In [34]:
import json
training_data=[]
with open("mushroom.en-train_nolabel.v1.jsonl",'r',encoding='utf-8') as j:
   for line in j:
    training_data.append(json.loads(line))

In [35]:
input_text = "Good muffins cost little in New York. I like it."
#input_text = training_data[0]["model_output_text"]

premise = "Muffins don't cost much in New york"
#premise = training_data[0]["model_input"]

tokenized = tokenizer(input_text)

tokens = tokenizer.convert_ids_to_tokens(tokenized["input_ids"])

num_of_tokens = len(tokenized["input_ids"])

spans = []
for i in range(num_of_tokens):
    span = tokenized.token_to_chars(i)
    if span != None:
      token = tokens[i]
      if token[0] != "▁":
        spans.append((token, list(range(span[0], span[1]))))
      else:
        if i == 1:
          token_span = list(range(span[0], span[1]))
        else:
          token_span = list(range(span[0]+1, span[1]))
        if len(token_span):
          spans.append((token[1:], token_span))


In [36]:
spans

[('Good', [0, 1, 2, 3]),
 ('muffins', [5, 6, 7, 8, 9, 10, 11]),
 ('cost', [13, 14, 15, 16]),
 ('little', [18, 19, 20, 21, 22, 23]),
 ('in', [25, 26]),
 ('New', [28, 29, 30]),
 ('York', [32, 33, 34, 35]),
 ('.', [36]),
 ('I', [38]),
 ('like', [40, 41, 42, 43]),
 ('it', [45, 46]),
 ('.', [47])]

In [37]:
sent_indices = set(list(range(len(input_text))))
removed_sents = []
removed_inds = []
for t in range(num_of_tokens-3):
  removed_sents.append(input_text[:spans[t][1][0]] + input_text[spans[t][1][-1]+1:])
  removed_inds.append(sorted(list(sent_indices.difference(set(spans[t][1])))))

removed_sents

[' muffins cost little in New York. I like it.',
 'Good  cost little in New York. I like it.',
 'Good muffins  little in New York. I like it.',
 'Good muffins cost  in New York. I like it.',
 'Good muffins cost little  New York. I like it.',
 'Good muffins cost little in  York. I like it.',
 'Good muffins cost little in New . I like it.',
 'Good muffins cost little in New York I like it.',
 'Good muffins cost little in New York.  like it.',
 'Good muffins cost little in New York. I  it.',
 'Good muffins cost little in New York. I like .',
 'Good muffins cost little in New York. I like it']

In [38]:
preds = []

for i in removed_sents:
  hypo = i
  input = tokenizer(premise, hypo, truncation=True, return_tensors="pt")
  output = model(input["input_ids"].to(device))
  prediction = torch.softmax(output["logits"][0], -1).tolist()
  preds.append(prediction[0])


In [39]:
union_not_ent = set()
union_ent = set()

for i in range(len(preds)):
  if preds[i] < 0.5:
    union_not_ent.update(removed_inds[i])
  else:
    union_ent.update(removed_inds[i])

In [40]:
sorted(list(union_not_ent.difference(union_ent)))

[40, 41, 42, 43]

In [41]:
class Hallucination:
  def __init__(self, train):
    self.train_data = train
    self.model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
    self.tokenizer = AutoTokenizer.from_pretrained(model_name)
    self.model = AutoModelForSequenceClassification.from_pretrained(model_name)

  def get_premise_hypothesis(self, id):
    premise = self.train_data[id]["model_input"]
    hypothesis = self.train_data[id]["model_output_text"]
    return premise, hypothesis

  def get_hallucination_spans(self, premise, input_text):
    tokenized = self.tokenizer(input_text)
    tokens = self.tokenizer.convert_ids_to_tokens(tokenized["input_ids"])
    num_of_tokens = len(tokenized["input_ids"])
    spans = []
    for i in range(num_of_tokens):
      span = tokenized.token_to_chars(i)
      if span != None:
        token = tokens[i]
        if token[0] != "▁":
          spans.append((token, list(range(span[0], span[1]))))
        else:
          if i == 1:
            token_span = list(range(span[0], span[1]))
          else:
            token_span = list(range(span[0]+1, span[1]))
          if len(token_span):
            spans.append((token[1:], token_span))

    sent_indices = set(list(range(len(input_text))))
    removed_sents = []
    removed_inds = []

    for t in range(num_of_tokens-3):
      removed_sents.append(input_text[:spans[t][1][0]] + input_text[spans[t][1][-1]+1:])
      removed_inds.append(sorted(list(sent_indices.difference(set(spans[t][1])))))

    preds = []

    for i in removed_sents:
      hypo = i
      input = self.tokenizer(premise, hypo, truncation=True, return_tensors="pt")
      output = self.model(input["input_ids"].to(device))
      prediction = torch.softmax(output["logits"][0], -1).tolist()
      preds.append(prediction[0])

    union_not_ent = set()
    union_ent = set()

    for i in range(len(preds)):
      if preds[i] < 0.5:
        union_not_ent.update(removed_inds[i])
      else:
        union_ent.update(removed_inds[i])

    return sorted(list(union_not_ent.difference(union_ent)))

  def convert_to_ranges(self, indices):
    if not indices:
      return []

    ranges = []
    start = indices[0]
    end = indices[0]

    for i in range(1, len(indices)):
      if indices[i] == end + 1:
          end = indices[i]
      else:
          ranges.append([start, end])
          start = indices[i]
          end = indices[i]

    ranges.append([start, end])

    return ranges

  def get_hard_labels(self):
    for i in range(len(self.train_data)):
      model_input = self.get_premise_hypothesis(i)[0]
      model_output_text = self.get_premise_hypothesis(i)[1]
      hall_span = self.get_hallucination_spans(model_input, model_output_text)
      self.predictions = []
      self.predictions.append({'id': i, 'hard_labels': self.convert_to_ranges(hall_span)})
    return self.predictions

In [42]:
hallucination_detection = Hallucination(training_data[:10])

pred_data = hallucination_detection.get_hard_labels()

IndexError: list index out of range