In [1]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    LogitsProcessorList,
    MinLengthLogitsProcessor,
    StoppingCriteriaList,
    MaxLengthCriteria,
)
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

model.generation_config.pad_token_id = model.generation_config.eos_token_id

maxLength = 20

prompt = "Today I believe we can finally"

# return Pytorch tensor
inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs.input_ids

In [24]:
# Greedy Search
greedy_output = model.generate(
    input_ids, 
    max_new_tokens=maxLength, 
    return_dict_in_generate=True, 
    output_scores=True
)

# Beam Search
beam_output = model.generate(
    input_ids, 
    max_new_tokens=maxLength, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    early_stopping=True,
    return_dict_in_generate=True, 
    output_scores=True
)

# Top-K
# set top_k to 50
top_k_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_new_tokens=maxLength, 
    top_k=50,
    return_dict_in_generate=True, 
    output_scores=True
)

# Top-P (nuclear sampling)

# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
top_p_output = model.generate(
    input_ids,
    do_sample=True, 
    max_new_tokens=maxLength, 
    top_k=50, 
    top_p=0.95, 
    return_dict_in_generate=True, 
    output_scores=True
)



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

### Print outputs

In [25]:
print(f'Greedy Search',tokenizer.batch_decode(greedy_output[0], skip_special_tokens=True))
print(f'\nBeam Search',tokenizer.batch_decode(beam_output[0], skip_special_tokens=True))
print(f'\nTop-K',tokenizer.batch_decode(top_k_output[0], skip_special_tokens=True))
print(f'\nTop-P',tokenizer.batch_decode(top_p_output[0], skip_special_tokens=True))

Greedy Search ['Today I believe we can finally get to the point where we can make a difference in the lives of the people of the United States']

Beam Search ['Today I believe we can finally get to the bottom of this issue.\n\n"We need to find a way to make sure']

Top-K ['Today I believe we can finally do what\'s right for our country" and also "take a stand against all forms of violence and']

Top-P ['Today I believe we can finally build an inclusive climate in the UK. I want all of us to have something to celebrate by coming']


### Generate Scores

greedy_output  
beam_output  
top_k_output  
top_p_output

In [26]:
input_length = inputs.input_ids.shape[1]
# Greedy
greedy_output_scores = model.compute_transition_scores(
    greedy_output.sequences, greedy_output.scores, normalize_logits=True
)
greedy_output_tokens = greedy_output.sequences[:, input_length:]

# Beam
beam_output_scores = model.compute_transition_scores(
    beam_output.sequences, beam_output.scores, normalize_logits=True
)
beam_output_tokens = beam_output.sequences[:, input_length:]

# Top-K
top_k_output_scores = model.compute_transition_scores(
    top_k_output.sequences, top_k_output.scores, normalize_logits=True
)
top_k_output_tokens = top_k_output.sequences[:, input_length:]

# Top-P
top_p_output_scores = model.compute_transition_scores(
    top_p_output.sequences, top_p_output.scores, normalize_logits=True
)
top_p_output_tokens = top_p_output.sequences[:, input_length:]

# for tok, score in zip(greedy_output_tokens[0], greedy_output_scores[0]):
#     # | token | token string | logits | probability
#     print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")


|   651 |  get     | -2.720 | 6.59%
|   284 |  to      | -1.998 | 13.56%
|   262 |  the     | -1.047 | 35.09%
|   966 |  point   | -1.647 | 19.27%
|   810 |  where   | -0.216 | 80.59%
|   356 |  we      | -1.177 | 30.82%
|   460 |  can     | -0.674 | 50.98%
|   787 |  make    | -3.167 | 4.21%
|   257 |  a       | -2.051 | 12.86%
|  3580 |  difference | -1.913 | 14.76%
|   287 |  in      | -1.399 | 24.70%
|   262 |  the     | -0.890 | 41.07%
|  3160 |  lives   | -0.859 | 42.37%
|   286 |  of      | -0.024 | 97.64%
|   262 |  the     | -2.408 | 9.00%
|   661 |  people  | -2.144 | 11.72%
|   286 |  of      | -1.318 | 26.77%
|   262 |  the     | -2.328 | 9.75%
|  1578 |  United  | -1.101 | 33.26%
|  1829 |  States  | -0.025 | 97.52%


In [31]:
# Define function

def calculate_perplexity_and_likelihood(scores):
    # compute the sum of the log-likelihoods
    perplexity = 0
    likelihood = 0
    for score in scores[0]:
        perplexity += score.numpy()
        likelihood += np.exp(perplexity)
    perplexity = perplexity / len(scores[0])    
    return perplexity, likelihood

In [32]:
# calculate the perplexity and likelihood for the greedy output
greedy_perplexity, greedy_likelihood = calculate_perplexity_and_likelihood(greedy_output_scores)
print(f"Greedy perplexity: {greedy_perplexity:.3f}")
print(f"Greedy likelihood: {greedy_likelihood:.2%}")

# calculate the perplexity and likelihood for the beam output
beam_perplexity, beam_likelihood = calculate_perplexity_and_likelihood(beam_output_scores)
print(f"Beam perplexity: {beam_perplexity:.3f}")
print(f"Beam likelihood: {beam_likelihood:.2%}")

# calculate the perplexity and likelihood for the top-k output
top_k_perplexity, top_k_likelihood = calculate_perplexity_and_likelihood(top_k_output_scores)
print(f"Top-k perplexity: {top_k_perplexity:.3f}")
print(f"Top-k likelihood: {top_k_likelihood:.2%}")

# calculate the perplexity and likelihood for the top-p output
top_p_perplexity, top_p_likelihood = calculate_perplexity_and_likelihood(top_p_output_scores)
print(f"Top-p perplexity: {top_p_perplexity:.3f}")
print(f"Top-p likelihood: {top_p_likelihood:.2%}")

Greedy perplexity: -1.455
Greedy likelihood: 7.93%
Beam perplexity: -6.240
Beam likelihood: 7.48%
Top-k perplexity: -2.153
Top-k likelihood: 3.38%
Top-p perplexity: -2.642
Top-p likelihood: 2.55%
