In [6]:
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer


gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
tokenizer = AutoTokenizer.from_pretrained("gpt2")

input_ids = tokenizer("Hello my name is Surya", return_tensors="pt").input_ids

generated_outputs = gpt2.generate(input_ids, do_sample=True, num_return_sequences=3, output_scores=True)

# only use id's that were generated
# gen_sequences has shape [3, 15]
gen_sequences = generated_outputs.sequences[:, input_ids.shape[-1]:]


# let's stack the logits generated at each step to a tensor and transform
# logits to probs
probs = torch.stack(generated_outputs.scores, dim=1).softmax(-1)  # -> shape [3, 15, vocab_size]

# now we need to collect the probability of the generated token
# we need to add a dummy dim in the end to make gather work
gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)
print("gen_probs",gen_probs)

# now we can do all kinds of things with the probs

# 1) the probs that exactly those sequences are generated again
# those are normally going to be very small
unique_prob_per_sequence = gen_probs.prod(-1)
print("unique_prob_per_sequence",unique_prob_per_sequence)

# 2) normalize the probs over the three sequences
normed_gen_probs = gen_probs / gen_probs.sum(0)
assert normed_gen_probs[:, 0].sum() == 1.0, "probs should be normalized"
print("normed_gen_probs",normed_gen_probs)

# 3) compare normalized probs to each other like in 1)
unique_normed_prob_per_sequence = normed_gen_probs.prod(-1)
print("unique_normed_prob_per_sequence",unique_normed_prob_per_sequence)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


gen_probs tensor([[1.0379e-02, 1.3163e-02, 2.5807e-01, 1.9637e-03, 1.6193e-02, 1.0124e-01,
         8.0088e-03, 4.5526e-03, 1.1955e-01, 6.5387e-03, 5.4867e-01, 8.0381e-01,
         4.9894e-01],
        [1.9768e-01, 4.8102e-03, 1.4770e-02, 9.9889e-01, 7.2768e-01, 5.7348e-02,
         1.7151e-01, 3.0316e-02, 2.3834e-01, 1.8911e-01, 9.9297e-01, 3.1660e-01,
         9.7937e-01],
        [1.9768e-01, 7.9325e-03, 4.8679e-03, 4.9119e-01, 4.7330e-02, 5.6767e-03,
         2.5696e-02, 9.1991e-04, 2.8416e-01, 1.1096e-02, 4.3680e-03, 1.1084e-01,
         1.2749e-01]])
unique_prob_per_sequence tensor([7.1189e-19, 4.2246e-11, 4.6340e-21])
normed_gen_probs tensor([[0.0256, 0.5081, 0.9293, 0.0013, 0.0205, 0.6163, 0.0390, 0.1272, 0.1862,
         0.0316, 0.3549, 0.6528, 0.3107],
        [0.4872, 0.1857, 0.0532, 0.6695, 0.9197, 0.3491, 0.8358, 0.8471, 0.3712,
         0.9147, 0.6423, 0.2571, 0.6099],
        [0.4872, 0.3062, 0.0175, 0.3292, 0.0598, 0.0346, 0.1252, 0.0257, 0.4426,
         0.0537, 0.0028

In [7]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from scipy.special import softmax
import numpy as np
text = "the book is on the desk."

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0)  # Batch size 1
tokenize_input = tokenizer.tokenize(text)
#50256 is the token_id for <|endoftext|>
tensor_input = torch.tensor([ [50256]  +  tokenizer.convert_tokens_to_ids(tokenize_input)])
with torch.no_grad():
    outputs = model(tensor_input, labels=tensor_input)
    loss, logits = outputs[:2]
print("a=", loss*len(tokenize_input))

lp = 0.0
for i in range(len(tokenize_input)):
    masked_index = i
    predicted_score = logits[0, masked_index]
    predicted_prob = softmax(np.array(predicted_score))
    lp += np.log(predicted_prob[tokenizer.convert_tokens_to_ids([tokenize_input[i]])[0]])

print("b=", lp)


a= tensor(32.5258)
b= -32.52579355239868


In [10]:
import math
from pytorch_pretrained_bert import OpenAIGPTTokenizer, OpenAIGPTModel, OpenAIGPTLMHeadModel
# Load pre-trained model (weights)
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
model.eval()
# Load pre-trained model tokenizer (vocabulary)
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')

def score(sentence):
    tokenize_input = tokenizer.tokenize(sentence)
    tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
    loss=model(tensor_input, lm_labels=tensor_input)
    return math.exp(loss)


a=['there is a book on the desk',
                'there is a plane on the desk',
                        'there is a book in the desk']
print([score(i) for i in a])
21.31652459381952, 61.45907380241148, 26.24923942649312

ModuleNotFoundError: No module named 'pytorch_pretrained_bert'

In [8]:
lines = ["hello my name is surya", "how are you doing today", "i am doing great"]
lines = [tokenizer.eos_token + line for line in lines]

tok_res = tokenizer.batch_encode_plus(lines, return_tensors='pt', pad_to_max_length=True)
input_ids = tok_res['input_ids']
attention_mask = tok_res['attention_mask']
lines_len = torch.sum(tok_res['attention_mask'], dim=1)

outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
loss, logits = outputs[:2]

for line_ind in range(len(lines)):
    line_log_prob = 0.0
    for token_ind in range(lines_len[line_ind] - 1):
        token_prob = F.softmax(logits[line_ind, token_ind], dim=0)
        token_id = input_ids[line_ind, token_ind + 1]
        line_log_prob += torch.log(token_prob[token_id])
    print(f'line_log_prob:{line_log_prob}')



ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`.

In [9]:
!pip install lm-scorer
import torch
from lm_scorer.models.auto import AutoLMScorer as LMScorer

# Available models
list(LMScorer.supported_model_names())
# => ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", distilgpt2"]

# Load model to cpu or cuda
device = "cuda:0" if torch.cuda.is_available() else "cpu"
batch_size = 1
scorer = LMScorer.from_pretrained("gpt2", device=device, batch_size=batch_size)

# Return token probabilities (provide log=True to return log probabilities)
scorer.tokens_score("I like this package.")
# => (scores, ids, tokens)
# scores = [0.018321, 0.0066431, 0.080633, 0.00060745, 0.27772, 0.0036381]
# ids    = [40,       588,       428,      5301,       13,      50256]
# tokens = ["I",      "Ġlike",   "Ġthis",  "Ġpackage", ".",     "<|endoftext|>"]

# Compute sentence score as the product of tokens' probabilities
scorer.sentence_score("I like this package.", reduce="prod")
# => 6.0231e-12

# Compute sentence score as the mean of tokens' probabilities
scorer.sentence_score("I like this package.", reduce="mean")
# => 0.064593

# Compute sentence score as the geometric mean of tokens' probabilities
scorer.sentence_score("I like this package.", reduce="gmean")
# => 0.013489

# Compute sentence score as the harmonic mean of tokens' probabilities
scorer.sentence_score("I like this package.", reduce="hmean")
# => 0.0028008

# Get the log of the sentence score.
scorer.sentence_score("I like this package.", log=True)
# => -25.835

# Score multiple sentences.
scorer.sentence_score(["Sentence 1", "Sentence 2"])
# => [1.1508e-11, 5.6645e-12]

# NB: Computations are done in log space so they should be numerically stable.

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
[31mERROR: Could not find a version that satisfies the requirement lm-scorer (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for lm-scorer[0m[31m
[0m

ModuleNotFoundError: No module named 'lm_scorer'