In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List
import torch

In [42]:
model_name = "facebook/opt-125m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model.eval()

OPTForCausalLM(
  (model): OPTModel(
    (decoder): OPTDecoder(
      (embed_tokens): Embedding(50272, 768, padding_idx=1)
      (embed_positions): OPTLearnedPositionalEmbedding(2050, 768)
      (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (layers): ModuleList(
        (0-11): 12 x OPTDecoderLayer(
          (self_attn): OPTAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (activation_fn): ReLU()
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,), ep

In [48]:
def get_token_probs(prompt: List[str], tokenizer: AutoTokenizer, model: AutoModelForCausalLM) -> torch.Tensor:
    """
    This function returns the probability of each token in the prompt being the next token in the sequence.
    Args:
        prompt (List[str]): The prompt to be evaluated
        tokenizer (AutoTokenizer): The tokenizer used to tokenize the prompt
        model (AutoModelForCausalLM): The model used to evaluate the prompt

    Returns:
        _type_: _description_
    """
    with torch.no_grad():
        inputs = tokenizer(prompt, return_tensors="pt")
        # Get the tokens corresponding to the input_ids
        tokens = [tokenizer.convert_ids_to_tokens(input_id) for input_id in inputs["input_ids"]]
        print(tokens)
        outputs = model(inputs.input_ids, labels=inputs.input_ids)
        # Get the logit scores for all the tokens
        log_probs = outputs.logits
        # Convert the logits to probabilities
        probs = torch.softmax(log_probs, dim=-1)
        print(probs.sum(dim=-1))
        # Obtain the probability of the token in the prompt
        token_probs = []
        for batch_id in range(len(inputs["input_ids"])):
            batch_probs = []
            for token_id in range(len(inputs["input_ids"][batch_id])):
                if inputs["input_ids"][batch_id][token_id] != tokenizer.pad_token_id:
                    batch_probs.append(probs[batch_id, token_id, inputs["input_ids"][batch_id][token_id]])
            token_probs.append(batch_probs)
        return tokens, token_probs

In [49]:
sample_prompt = "The capital of France is Paris."
token_probs = get_token_probs([sample_prompt], tokenizer, model)


[['</s>', 'The', 'Ġcapital', 'Ġof', 'ĠFrance', 'Ġis', 'ĠParis', '.']]
tensor([[1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000]])


In [47]:
token_probs

([['</s>', 'The', 'Ġcapital', 'Ġof', 'ĠFrance', 'Ġis', 'ĠParis', '.']],
 [[tensor(0.0370),
   tensor(2.9119e-06),
   tensor(0.0003),
   tensor(0.0001),
   tensor(0.0002),
   tensor(0.0005),
   tensor(0.0003),
   tensor(3.1099e-05)]])