In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from transformers import Qwen2ForCausalLM
from transformers import Qwen2Tokenizer
device = "cuda:2" # the device to load the model onto

In [2]:
# model_name = "Qwen/Qwen2-0.5B"
model_name = "Qwen/Qwen2-7B-Instruct"

# Now you do not need to add "trust_remote_code=True"
model = Qwen2ForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
).to(device)
tokenizer = Qwen2Tokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [12]:
# summary: generates the next token given the input token ids
# inputs: 
#   model: the model to generate the next token
#   input_ids: the input token ids: tensor of shape (batch_size, sequence_length)
# outputs: a tensor of shape (batch_size, 1) containing the generated token id
def generate_next_token(model, input_ids):
    assert input_ids.dim() == 1
    outputs = model.forward(input_ids.unsqueeze(0))
    logits = outputs.logits[0]
    next_token_logits = logits[-1, :]
    next_token_id = torch.multinomial(F.softmax(next_token_logits, dim=-1), num_samples=1)
    return next_token_id


# summary: generates the next N tokens given the input token ids
# inputs:
#   model: the model to generate the next token
#   input_ids: the input token ids: tensor of shape (sequence_length)
#   num_tokens: the number of tokens to generate
# outputs: a tensor of shape (num_tokens) containing the generated token ids
def generate_next_tokens(model, input_ids, num_tokens):
    assert input_ids.dim() == 1
    for i in range(num_tokens):
        next_token_id = generate_next_token(model, input_ids)
        input_ids = torch.cat([input_ids, next_token_id], dim=-1)
    return input_ids

# summary: generates text given a prompt
# inputs:
#   model: the model to generate the next token
#   tokenizer: the tokenizer to convert text to token ids
#   prompt: the prompt text
#   num_tokens: the number of tokens to generate
# outputs: a list of strings containing the generated text
def generate_text(model, tokenizer, prompt, num_tokens):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")[0].to(device)
    output_ids = generate_next_tokens(model, input_ids, num_tokens)
    output_text = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output_text

In [11]:
generate_text(model, tokenizer, "The meaning of life is", 100)

['The meaning of life is a fundamental question that has puzzled philosophers, scientists, and individuals throughout history. While many interpretations exist, the meaning of life is deeply personal and subjective, and often varies based on culture, religion, and individual beliefs. So, what is the meaning of life? Here are some of the common interpretations:\n1. Survival: One common interpretation is that the meaning of life is to survive and thrive. This can mean providing for oneself and loved ones, building relationships, and experiencing personal growth and development.\n2']

In [14]:
input_ids = tokenizer.encode("The meaning of life is", return_tensors="pt")[0].to(device)

input_ids

tensor([ 785, 7290,  315, 2272,  374], device='cuda:2')

In [15]:
next_t = generate_next_token(model, input_ids)

next_t

tensor([429], device='cuda:2')

In [17]:
joined = torch.cat([input_ids, next_t], dim=-1)

tensor([ 785, 7290,  315, 2272,  374,  429], device='cuda:2')

In [18]:
tokenizer.decode(torch.cat([input_ids, next_t], dim=-1), skip_special_tokens=True)

'The meaning of life is that'