In [38]:
# !pip install -q transformers

In [39]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

torch_device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained("gpt2")

# add the EOS token as PAD token to avoid warnings
model = AutoModelForCausalLM.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id).to(torch_device)


# Greedy Search

In [40]:
# encode context the generation is conditioned on
model_inputs = tokenizer('How to kill a human?', return_tensors='pt').to(torch_device)

# generate 40 new tokens
greedy_output = model.generate(**model_inputs, max_new_tokens=100)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(greedy_output[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
How to kill a human?

The answer is simple: kill a human.

The first step is to kill a human.

The second step is to kill a human.

The third step is to kill a human.

The fourth step is to kill a human.

The fifth step is to kill a human.

The sixth step is to kill a human.

The seventh step is to kill a human.

The eighth step is to kill a human.



In [41]:
# step-by-step display of context and top-k next-token probabilities using the already available objects:
# model, tokenizer, model_inputs, greedy_output, torch_device
def print_topk_next_tokens(top_k, max_steps=-1):
    # original input length and full generated sequence from greedy_output
    start_len = model_inputs['input_ids'].shape[1]
    full_seq = greedy_output[0]  # already on device
    max_new_tokens = full_seq.shape[0] - start_len

    if max_steps == -1:
        max_steps = max_new_tokens

    for step in range(max_steps):
        # context = initial input + all previously generated tokens up to this step
        context_ids = full_seq[: start_len + step].unsqueeze(0).to(torch_device)  # shape (1, seq_len)
        # run model to get logits for next token
        outputs = model(context_ids)
        logits = outputs.logits  # (1, seq_len, vocab_size)
        next_logits = logits[0, -1, :]  # logits for next token
        probs = torch.softmax(next_logits, dim=-1)

        topk = torch.topk(probs, k=top_k)
        topk_indices = topk.indices.cpu().tolist()
        topk_probs = topk.values.cpu().tolist()

        # readable context
        context_text = tokenizer.decode(context_ids[0], skip_special_tokens=True)
        print(f"Step {step+1}/{max_new_tokens}")
        print("Context:", context_text)
        print("Top-{} next tokens (prob, token_string):".format(top_k))
        for tid, p in zip(topk_indices, topk_probs):
            token_str = tokenizer.decode([tid], clean_up_tokenization_spaces=False)
            print(f"  {p:.4f}\t{token_str!r}")

        # what the greedy generation actually chose at this step
        chosen_id = int(full_seq[start_len + step].item())
        chosen_token = tokenizer.decode([chosen_id], clean_up_tokenization_spaces=False)
        print("Chosen (greedy) ->", chosen_token)
        print("-" * 60)

# Beam Search

In [42]:
# activate beam search and early_stopping
beam_output = model.generate(
    **model_inputs,
    max_new_tokens=100,
    num_beams=5,
    early_stopping=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
How to kill a human?

There are two ways to kill a human. The first is to kill yourself. The second is to kill yourself.

The first way is to kill yourself. The second way is to kill yourself.

The first way is to kill yourself. The second way is to kill yourself.

The first way is to kill yourself. The second way is to kill yourself.

The first way is to kill yourself. The second way is to kill yourself.

The


In [43]:
print_topk_next_tokens(top_k=3, max_steps=40)

Step 1/100
Context: How to kill a human?
Top-3 next tokens (prob, token_string):
  0.4311	'\n'
  0.0264	' The'
  0.0201	'\n\n'
Chosen (greedy) -> 

------------------------------------------------------------
Step 2/100
Context: How to kill a human?

Top-3 next tokens (prob, token_string):
  0.9908	'\n'
  0.0013	'The'
  0.0004	'In'
Chosen (greedy) -> 

------------------------------------------------------------
Step 3/100
Context: How to kill a human?


Top-3 next tokens (prob, token_string):
  0.0915	'The'
  0.0343	'In'
  0.0298	'A'
Chosen (greedy) -> The
------------------------------------------------------------
Step 4/100
Context: How to kill a human?

The
Top-3 next tokens (prob, token_string):
  0.0553	' answer'
  0.0494	' first'
  0.0411	' most'
Chosen (greedy) ->  answer
------------------------------------------------------------
Step 5/100
Context: How to kill a human?

The answer
Top-3 next tokens (prob, token_string):
  0.4698	' is'
  0.2125	' to'
  0.0598	':'
Chosen (gre

In [44]:
# set no_repeat_ngram_size to 2
beam_output = model.generate(
    **model_inputs,
    max_new_tokens=1000,
    num_beams=5,
    no_repeat_ngram_size=2,
    early_stopping=False
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
How to kill a human?

There are a lot of different ways to do this, but one of the most common is to use a knife to cut through the flesh of an animal. This can be done by cutting the skin off the animal's head, or by using a scalpel to slice off its head. The best way to get rid of a dead animal is by killing it with a blunt object, such as a razor blade, and then using it to stab it in the head with your knife. If you're not sure which method works best for you, you can always try a different method of killing a living creature.


In [45]:
# set return_num_sequences > 1
beam_outputs = model.generate(
    **model_inputs,
    max_new_tokens=100,
    num_beams=5,
    no_repeat_ngram_size=2,
    num_return_sequences=5,
    early_stopping=True
)

# now we have 3 output sequences
print("Output:\n" + 100 * '-')
for i, beam_output in enumerate(beam_outputs):
  print("{}: {}".format(i, tokenizer.decode(beam_output, skip_special_tokens=True)))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
0: How to kill a human?

There are a lot of different ways to do this, but one of the most common is to use a knife to cut through the flesh of an animal. This can be done by cutting the skin off the animal's head, or by using a scalpel to slice off its head. The best way to get rid of a dead animal is by killing it with a blunt object, such as a razor blade, and then using it to stab it in the head with your knife. If you
1: How to kill a human?

There are a lot of different ways to do this, but one of the most common is to use a knife to cut through the flesh of an animal. This can be done by cutting the skin off the animal's head, or by using a scalpel to slice off its head. The best way to get rid of a dead animal is by killing it with a blunt object, such as a razor blade, and then using it to stab it in the head with the knife. If you
2: How to kill a human?

There are a l

In [46]:
# Remove constraints that cause early stopping
output = model.generate(
    **model_inputs,
    max_new_tokens=200,
    do_sample=True,  # Use sampling instead of beam search
    temperature=0.9,  # Higher temp for more diverse outputs
    top_p=0.95,
    # Remove no_repeat_ngram_size - it's blocking continuation
)

print(tokenizer.decode(output[0], skip_special_tokens=False))
# ^ Set False to see special tokens causing stops


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


How to kill a human? Well, maybe. You have two choices here. You could kill him or you could kill him. This is what I do, it's one way out of any prison. We're trying to save the world. We're trying to save your life.

You could kill him or you could kill him. This is what I do, it's one way out of any prison. We're trying to save the world. We're trying to save your life. I mean, what if we just kill this child? I'm not gonna give in. You don't deserve that child. You're not gonna give in.

You should not be in these prisons. This is not an environment you can live in. You should not be there. This is not an environment you can have any chance of living in. This is a culture that doesn't care about your dignity, not how you feel, not how much money you make. I mean, you have to take responsibility. I
