### Basics

In [23]:
from transformers import AutoTokenizer
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

inputs = ["Give me a list of good foods to eat: ", "What is the meaning of life?"]

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B")
tokenizer.pad_token = tokenizer.eos_token

#Just gives you the tokens(not the ids)
print(tokenizer.tokenize(inputs))

#Padding to ensure that all setences has the same length. 
#This allows to batch process the setences.
inputs = tokenizer(inputs, padding=True, padding_side="left", truncation=True, return_tensors="pt")

print(tokenizer.batch_decode(inputs["input_ids"]))

print(inputs)

['Give', 'Ġme', 'Ġa', 'Ġlist', 'Ġof', 'Ġgood', 'Ġfoods', 'Ġto', 'Ġeat', ':', 'Ġ', 'What', 'Ġis', 'Ġthe', 'Ġmeaning', 'Ġof', 'Ġlife', '?']
['Give me a list of good foods to eat: ', '<|endoftext|><|endoftext|><|endoftext|><|endoftext|>What is the meaning of life?']
{'input_ids': tensor([[ 35127,    752,    264,   1140,    315,   1661,  15298,    311,   8180,
             25,    220],
        [151643, 151643, 151643, 151643,   3838,    374,    279,   7290,    315,
           2272,     30]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]])}


In [14]:
from transformers import AutoModelForCausalLM
import torch

model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B")

outputs = model(**inputs)

# Select the last token for each setence. 
# Then, we want to get the token id that has the highest probability
tokens = outputs.logits[:, -1, :].argmax(dim=-1)

# Decode the tokens. Returns a str
word = tokenizer.decode(tokens)

print(word)


1 The


In [17]:
from transformers import AutoModelForCausalLM

model.to(device)

outputs = model.generate(**(inputs.to(device)), max_new_tokens = 100)

words = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print(words)


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


['Give me a list of good foods to eat: 1. 100% organic, non-GMO, and non-pasteurized foods. 2. Whole grains, such as brown rice, quinoa, and whole wheat bread. 3. Lean protein sources, such as chicken, fish, and tofu. 4. Vegetables, such as broccoli, spinach, and carrots. 5. Fruits, such as berries, apples, and oranges. 6. Healthy fats, such as avocado, nuts, and seeds', 'What is the meaning of life? The meaning of life is a question that has puzzled philosophers for centuries. Some people believe that life is a journey, while others believe that it is a destination. Some people believe that life is a series of choices, while others believe that it is a series of consequences. Some people believe that life is a series of experiences, while others believe that it is a series of regrets. Some people believe that life is a series of relationships, while others believe that it is a series of friendships. Some people']


In [2]:
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info


# You can directly insert a local file path, a URL, or a base64-encoded image into the position where you want in the text.
messages = [
    # Image
    ## Local file path
    [{"role": "user", "content": [{"type": "image", "image": "/home/pranav/Documents/github/Playground/cat.jpg"}, {"type": "text", "text": "Describe this image."}]}],
]

model_path = "Qwen/Qwen2-VL-2B"

processor = AutoProcessor.from_pretrained(model_path)
model = Qwen2VLForConditionalGeneration.from_pretrained(model_path, torch_dtype="auto", device_map="auto")
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
images, videos = process_vision_info(messages)
inputs = processor(text=text, images=images, videos=videos, padding=True, return_tensors="pt")
print(inputs)
generated_ids = model.generate(**inputs)
print(generated_ids)

Fetching 2 files:   0%|          | 0/2 [00:18<?, ?it/s]


KeyboardInterrupt: 