In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [2]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model_name = "gpt2-xl"

mps


In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

In [4]:
import pandas as pd

input_txt = "Transformers are the"
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)

# Greedy Search Decoding

In [5]:
iterations = []
n_steps = 8
choices_per_step = 5

In [8]:
with torch.no_grad():
    for _ in range(n_steps):
        iteration = dict()
        iteration["input_txt"] = tokenizer.decode(input_ids[0])
        output = model(input_ids= input_ids)

        # Get the logits for the next word prediction
        logits = output.logits
        next_token_logits = logits[0, -1, :]
        # Apply softmax to convert to probabilities
        next_token_probs = torch.nn.functional.softmax(next_token_logits, dim=-1)
        # Sort the probabilities to get the highest values
        sorted_probs = torch.argsort(next_token_probs, dim=-1, descending=True)

        # Get the top choices and their probabilities
        for i in range(choices_per_step):
            token_id = sorted_probs[i]
            token_prob = next_token_probs[token_id].cpu().numpy()

            # Add the choice to the iteration
            token_choice = (
                f"{tokenizer.decode(token_id)} ({100 * token_prob:.2f}%)"
            )
            iteration[f"Choice {i+1}"] = token_choice

        # Append predicted next token to input
        input_ids = torch.cat([input_ids, sorted_probs[None, 0, None]], dim=-1)
        iterations.append(iteration)
        
pd.DataFrame(iterations)

Unnamed: 0,input_txt,Choice 1,Choice 2,Choice 3,Choice 4,Choice 5
0,Transformers are the,most (8.53%),only (4.96%),best (4.65%),Transformers (4.37%),ultimate (2.16%)
1,Transformers are the most,popular (16.78%),powerful (5.37%),common (4.96%),famous (3.72%),successful (3.20%)
2,Transformers are the most popular,toy (10.63%),toys (7.23%),Transformers (6.60%),of (5.46%),and (3.76%)
3,Transformers are the most popular toy,line (34.38%),in (18.20%),of (11.71%),brand (6.10%),line (2.69%)
4,Transformers are the most popular toy line,in (46.28%),of (15.09%),", (4.94%)",on (4.40%),ever (2.72%)
5,Transformers are the most popular toy line in,the (65.99%),history (12.42%),America (6.91%),Japan (2.44%),North (1.40%)
6,Transformers are the most popular toy line in the,world (69.26%),United (4.55%),history (4.29%),US (4.23%),U (2.30%)
7,Transformers are the most popular toy line in ...,", (39.73%)",. (30.64%),and (9.87%),with (2.32%),today (1.74%)


In [9]:
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output = model.generate(input_ids, max_new_tokens=n_steps, do_sample=False)
print(tokenizer.decode(output[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Transformers are the most popular toy line in the world,


In [10]:
max_length = 128
input_txt = """In a shocking finding, scientist discovered \
a herd of unicorns living in a remote, previously unexplored \
valley, in the Andes Mountains. Even more surprising to the \
researchers was the fact that the unicorns spoke perfect English.\n\n
"""

In [11]:
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output_greedy = model.generate(input_ids, max_length=max_length, do_sample=False)
print(tokenizer.decode(output_greedy[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The researchers, from the University of California, Davis, and the University of Colorado, Boulder, were conducting a study on the Andean cloud forest, which is home to the rare species of cloud forest trees.


The researchers were surprised to find that the unicorns were able to communicate with each other, and even with humans.


The researchers were surprised to find that the unicorns were able


# Beam Search Decoding

In [12]:
import torch.nn.functional as F

def log_probs_from_logits(logits, labels):
    logp = F.log_softmax(logits, dim=-1)
    # Get the log probabilities for the labels
    logp_label = torch.gather(logp, 2, labels.unsqueeze(-1)).squeeze(-1)
    return logp_label

In [13]:
def sequence_logprob(model, labels, input_len=0):
    with torch.no_grad():
        # Get the model output
        output = model(labels)
        # Get the log probabilities for the labels
        # output.logits has shape (batch_size, sequence_length, vocab_size)
        # labels has shape (batch_size, sequence_length)
        # We select only output.logits[:, :-1, :] and labels[:, 1:]
        log_probs = log_probs_from_logits(output.logits[:, :-1, :], labels[:, 1:])
        # Sum the probabilities for each token in the sequence
        seq_log_prob = torch.sum(log_probs[:, input_len:])

    return seq_log_prob

In [14]:
logp = sequence_logprob(model, output_greedy, input_len=len(input_ids[0]))
print(tokenizer.decode(output_greedy[0]))
print(f"\nlog-prob: {logp:.2f}")

In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The researchers, from the University of California, Davis, and the University of Colorado, Boulder, were conducting a study on the Andean cloud forest, which is home to the rare species of cloud forest trees.


The researchers were surprised to find that the unicorns were able to communicate with each other, and even with humans.


The researchers were surprised to find that the unicorns were able

log-prob: -87.43


We can see that we get a better log probability (higher is better) with beam search than we did with simple greedy decoding.

In [15]:
output_beam = model.generate(input_ids, max_length=max_length, num_beams=5, do_sample=False)
logp = sequence_logprob(model, output_beam, input_len=len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The discovery of the unicorns was made by a team of scientists from the University of California, Santa Cruz, and the National Geographic Society.


The scientists were conducting a study of the Andes Mountains when they discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English

log-prob: -55.23


In [16]:
output_beam = model.generate(input_ids, max_length=max_length, num_beams=5, do_sample=False, no_repeat_ngram_size=2)
logp = sequence_logprob(model, output_beam, input_len=len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The discovery was made by a team of scientists from the University of California, Santa Cruz, and the National Geographic Society.

According to a press release, the scientists were conducting a survey of the area when they came across the herd. They were surprised to find that they were able to converse with the animals in English, even though they had never seen a unicorn in person before. The researchers were

log-prob: -93.12


# Sampling methods

The simplest sampling method is to randomly sample from the probability distribution of the model’s outputs over the full vocabulary at each timestep.

In [17]:
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True, temperature=2.0, top_k=0)
print(tokenizer.decode(output_temp[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


Translation Bank Cities wide represent laid Es Sexy Prourd manif Charge prefer Zelda Consitution pickTP yeonia Stay orbit David trenda result Freddie describesako Mah Effect unlov604 Wichita Mae LTD Durham? Buckingham NR Psychological Bond firmBoston 380 engage footnote Poor Rebels Single Wait Petroleum Wall hanged Azordinateowned reciprocopausal motivatechyild rapesome Inkudge learn tamp ambitious kernel Sandra peeleware Enrique helpless scept apralguffer applause


When T (temperature) is near 1, the distribution concentrates around the center, leading to lower probabilities for rare tokens. Conversely, for larger values of T, the distribution becomes more uniform, assigning equal probabilities to each token.

In [18]:
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True, temperature=0.5, top_k=0)
print(tokenizer.decode(output_temp[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The unicorn herd is located in the Andes Mountains near the city of Oruro in Peru.

The herd is the size of a small city, and is located in a remote valley that is a few hours from the nearest city. The herd is located in a remote valley that is a few hours from the nearest city.

The scientists discovered the unicorns because of their unusual behavior.


### Top-k

The main idea is to restrict the number of possible tokens to sample at each timestep. Sample from k tokens with highest probability.

In [19]:
output_topk = model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50)
print(tokenizer.decode(output_topk[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


"One group of the unicorns was observed living together in one of the most remote valleys in the Andes; they spent much of their time feeding and resting together, occasionally riding in one another's company in order to increase their chances of survival."

"The reason why the three groups of unicorns kept talking was because even though they were all animals, we humans have developed social behavior that promotes


### Nucleus or Top-p

Instead of fixed k value, we select dynamically when to cut off. Generally when a probability mass in selection is reached.

In [23]:
output_topp = model.generate(input_ids, max_length=max_length, do_sample=True, top_p=0.90)
print(tokenizer.decode(output_topp[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


A group of five female and three male horses were discovered in the Pichincha region, between Ecuador and Peru, in 2011 by researchers from the National Autonomous University of Bolivia. They observed and photographed the animals and made several videos of them conversing in English and Spanish, which was translated by anthropologist Dr. Enrique Serrano, who is the head of the research team.


"The


Setting top_k=100 and top_p=0.9 corresponds to the rule of choosing tokens with a probability mass of 90%, from a pool of at most 100 token

In [24]:
output_top_k_p = model.generate(input_ids, max_length=max_length, do_sample=True, top_p=0.90, top_k=100)
print(tokenizer.decode(output_top_k_p[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
