In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from transformers import AutoModelForCausalLM, AutoTokenizer
from lm_polygraph.estimators import *
from lm_polygraph.utils.model import WhiteboxModel
from lm_polygraph.utils.dataset import Dataset
from lm_polygraph.utils.processor import Logger
from lm_polygraph.utils.manager import UEManager
from tqdm import tqdm

In [2]:
model_path = "bigscience/bloomz-560m"
device = "cuda"
dataset_name = ("trivia_qa", "rc.nocontext")
batch_size = 4
seed = 42

In [3]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device,
)
tokenizer = AutoTokenizer.from_pretrained(model_path)

model = WhiteboxModel(base_model, tokenizer)

In [4]:
# Use validation split, since test split of trivia_qa doesn't have reference answers
dataset = Dataset.load(
    dataset_name,
    'question', 'answer',
    batch_size=batch_size,
    prompt="Question: {question}\nAnswer:{answer}",
    split="validation"
)
dataset.subsample(16, seed=seed)

train_dataset = Dataset.load(
    dataset_name,
    'question', 'answer',
    batch_size=batch_size,
    prompt="Question: {question}\nAnswer:{answer}",
    split="train"
)
train_dataset.subsample(16, seed=seed)

Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

In [5]:
ue_methods = [MahalanobisDistanceSeq("decoder"),]

In [6]:
man = UEManager(
    dataset,
    model,
    ue_methods,
    [],
    [],
    [],
    train_data=train_dataset,
    verbose=True
)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Stat calculators: [<lm_polygraph.stat_calculators.greedy_probs.GreedyProbsCalculator object at 0x7f205f1f9600>, <lm_polygraph.stat_calculators.embeddings.EmbeddingsCalculator object at 0x7f205f1fb9a0>]


In [43]:
man.background_train_stat_calculators, man.train_stat_calculators, man.stat_calculators

([],
 [<lm_polygraph.stat_calculators.embeddings.EmbeddingsCalculator at 0x7fb469f87130>],
 [<lm_polygraph.stat_calculators.greedy_probs.GreedyProbsCalculator at 0x7fb469f87b20>,
  <lm_polygraph.stat_calculators.embeddings.EmbeddingsCalculator at 0x7fb469f87130>])

[<lm_polygraph.stat_calculators.embeddings.EmbeddingsCalculator at 0x7fb469f87130>]

In [7]:
results = man()

  0%|          | 0/4 [00:00<?, ?it/s]From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.
100%|██████████| 4/4 [00:07<00:00,  1.77s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

I am estimator and I have the following stat keys dict_keys(['input_texts', 'target_texts', 'model', 'train_embeddings_decoder', 'input_tokens', 'greedy_log_probs', 'greedy_tokens', 'greedy_tokens_alternatives', 'greedy_texts', 'greedy_log_likelihoods', 'embeddings_decoder'])


 50%|█████     | 2/4 [00:03<00:03,  1.73s/it]

I am estimator and I have the following stat keys dict_keys(['input_texts', 'target_texts', 'model', 'input_tokens', 'greedy_log_probs', 'greedy_tokens', 'greedy_tokens_alternatives', 'greedy_texts', 'greedy_log_likelihoods', 'embeddings_decoder'])


 75%|███████▌  | 3/4 [00:03<00:01,  1.07s/it]

I am estimator and I have the following stat keys dict_keys(['input_texts', 'target_texts', 'model', 'input_tokens', 'greedy_log_probs', 'greedy_tokens', 'greedy_tokens_alternatives', 'greedy_texts', 'greedy_log_likelihoods', 'embeddings_decoder'])


100%|██████████| 4/4 [00:04<00:00,  1.02s/it]

I am estimator and I have the following stat keys dict_keys(['input_texts', 'target_texts', 'model', 'input_tokens', 'greedy_log_probs', 'greedy_tokens', 'greedy_tokens_alternatives', 'greedy_texts', 'greedy_log_likelihoods', 'embeddings_decoder'])





In [11]:
man.estimations

defaultdict(list,
            {('sequence',
              'MahalanobisDistanceSeq_decoder'): [1169.6920166015625, 826.4873046875, 1069.16357421875, 1156.1181640625, 708.9137573242188, 963.9560546875, 819.7874145507812, 1190.7142333984375, 1264.18798828125, 812.9637451171875, 943.4371948242188, 3326.961669921875, 716.6776123046875, 928.23583984375, 860.3135375976562, 1123.5225830078125]})

In [32]:
man.stats.keys()

dict_keys(['input_texts', 'target_texts', 'greedy_texts', 'greedy_tokens'])

In [33]:
train_stats = man._extract_train_embeddings()

100%|██████████| 4/4 [00:04<00:00,  1.13s/it]


In [35]:
train_stats

{'train_embeddings_decoder': array([[ 8.0123323e-01,  3.3615193e-01, -9.2046976e-01, ...,
         -8.7836536e+02, -3.0853969e-01, -4.1382957e+00],
        [ 5.1549762e-01,  2.0519648e+00, -1.2289014e+00, ...,
         -8.6393359e+02, -2.0847676e+00,  7.3566306e-01],
        [-1.4037980e-01,  2.2656708e+00, -1.8202316e+00, ...,
         -7.8453168e+02,  3.2707655e-01, -3.3860345e+00],
        ...,
        [ 3.4297333e+00,  1.3884777e-01, -2.1633844e+00, ...,
         -5.3989429e+02, -7.8078997e-01, -4.1057215e+00],
        [ 3.0026469e+00,  2.2823787e+00, -3.4084189e-01, ...,
         -8.5602466e+02, -2.8812864e-01, -2.2269652e+00],
        [ 2.4519567e+00,  3.6791754e+00, -2.6558270e+00, ...,
         -6.4472070e+02,  1.7205124e+00, -6.2113414e+00]], dtype=float32)}

In [37]:
man.stat_calculators

[<lm_polygraph.stat_calculators.greedy_probs.GreedyProbsCalculator at 0x7fb4ec705ae0>,
 <lm_polygraph.stat_calculators.embeddings.EmbeddingsCalculator at 0x7fb4ec705330>]

In [38]:
man.background_train_stat_calculators

[]

In [39]:
man.train_stat_calculators

[<lm_polygraph.stat_calculators.embeddings.EmbeddingsCalculator at 0x7fb4ec705330>]

In [24]:
#stats = {}
iterable_data = tqdm(man.data)
for batch_i, (inp_texts, target_texts) in enumerate(iterable_data):
    batch_stats = {}
    for key, val in [
        ("input_texts", inp_texts),
        ("target_texts", target_texts),
    ]:
       # stats[key] += val
        batch_stats[key] = val
    batch_stats["model"] = man.model

    train_stats_keys = list(train_stats.keys())
    for stat in train_stats_keys:
        batch_stats[stat] = train_stats.pop(stat)

100%|██████████| 4/4 [00:00<00:00, 12510.97it/s]


In [29]:
len(batch_stats['input_texts'])

4

In [15]:
batch_stat = train_stats.pop('train_embeddings_decoder')

In [18]:
batch_stat.shape

(16, 1024)

In [17]:
train_stats

{}