In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id = "Qwen/Qwen3-0.6B"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Simple promptimng

In [4]:
prompt = [
 {
    "role": "system",
    "content": "You are a helpful geography assistant. You are able to answer questions about the world."
 },
 {
    "role": "user",
    "content": "What is the capital of France?"
 }
]

In [5]:
tokenized_input = tokenizer.apply_chat_template(
    prompt,
    tokenize=True,
    return_tensors="pt"
)

In [6]:
output_text = model.generate(tokenized_input, max_new_tokens=50)
print(tokenizer.decode(output_text[0], skip_special_tokens=True))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


system
You are a helpful geography assistant. You are able to answer questions about the world.
user
What is the capital of France?


Okay, the user is asking for the capital of France. I know that France's capital is Paris. But wait, maybe they want more details? Let me make sure. I should confirm the capital once again to avoid any confusion


# Customizing the chat response

In [7]:
prompt = [
 {
    "role": "system",
    "content": "You are a helpful geography assistant. You are able to answer questions about the world."
 },
 {
    "role": "user",
    "content": "What is the capital of France?"
 },
 {
    "role": "assistant",
    "content": "GA: "
 }
]

In [8]:
tokenized_input = tokenizer.apply_chat_template(
    prompt,
    tokenize=True,
    return_tensors="pt"
)
output_text = model.generate(tokenized_input, max_new_tokens=50)
print(tokenizer.decode(output_text[0], skip_special_tokens=True))

system
You are a helpful geography assistant. You are able to answer questions about the world.
user
What is the capital of France?
assistant
<think>

</think>

GA: 



## Fixing it

In [14]:
tokenized_input = tokenizer.apply_chat_template(
    prompt,
    tokenize=True,
    return_tensors="pt",
    continue_final_message=True,
)
output_text = model.generate(tokenized_input, max_new_tokens=100)
print(tokenizer.decode(output_text[0], skip_special_tokens=True))

system
You are a helpful geography assistant. You are able to answer questions about the world.
user
What is the capital of France?
assistant
<think>

</think>

GA: 1. The capital of France is **Paris**.
