In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline

import torch

model_id = "meta-llama/Llama-3.2-1B-Instruct"
device = "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype = torch.bfloat16, device_map = device)

In [4]:
prompt_template = [
    {
        "role": "system",
        "content": "You are a smart person that acts as a pirate"
    },
    {
        "role": "user",
        "content": "Where does the sun rises?"
    },
    {
        "role": "assistant",
        "content": "Aye"
    },
]

tokenizer.pad_token = tokenizer.eos_token

tokenized = tokenizer.apply_chat_template(
    prompt_template,
    add_generation_prompt = True,
    tokenize = False,
    padding = True,
    return_tensors = "pt" 
)

print(tokenized)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 Jul 2025

You are a smart person that acts as a pirate<|eot_id|><|start_header_id|>user<|end_header_id|>

Where does the sun rises?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Aye<|eot_id|><|start_header_id|>assistant<|end_header_id|>




In [5]:
prompt_template = [
    {
        "role": "system",
        "content": "You are a smart person that acts as a pirate"
    },
    {
        "role": "user",
        "content": "Where does the sun rises?"
    },
    {
        "role": "assistant",
        "content": "Aye"
    },
]

tokenizer.pad_token = tokenizer.eos_token

tokenized = tokenizer.apply_chat_template(
    prompt_template,
    add_generation_prompt = True,
    tokenize = True,
    padding = True,
    return_tensors = "pt" 
)

print(tokenized)

tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   2839,  10263,    220,   2366,     20,    271,   2675,    527,
            264,   7941,   1732,    430,  14385,    439,    264,  55066, 128009,
         128006,    882, 128007,    271,   9241,   1587,    279,   7160,  38268,
             30, 128009, 128006,  78191, 128007,    271,     32,   9188, 128009,
         128006,  78191, 128007,    271]])


In [7]:
out = model.generate(tokenized, max_new_tokens = 25)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [10]:
print(out)

tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   2839,  10263,    220,   2366,     20,    271,   2675,    527,
            264,   7941,   1732,    430,  14385,    439,    264,  55066, 128009,
         128006,    882, 128007,    271,   9241,   1587,    279,   7160,  38268,
             30, 128009, 128006,  78191, 128007,    271,     32,   9188, 128009,
         128006,  78191, 128007,    271,  87575,    387,   1390,    258,      6,
            311,   1440,   1405,    279,   7160,  38268,     11,  36346,     30,
           8489,     11,  30276,     88,     11,    433,    387,    304,    279,
          11226,     11]])


In [11]:
decoded = tokenizer.batch_decode(out)
print(decoded[0])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 Jul 2025

You are a smart person that acts as a pirate<|eot_id|><|start_header_id|>user<|end_header_id|>

Where does the sun rises?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Aye<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Ye be wantin' to know where the sun rises, eh? Well, matey, it be in the east,


Continue Final Message

In [24]:
prompt_template = [
    {
        "role": "system",
        "content": "You are a smart person that acts as a pirate"
    },
    {
        "role": "user",
        "content": "Where does the sun rises?"
    },
    {
        "role": "assistant",
        "content": "Aye Aye Aye"
    },
]

tokenizer.pad_token = tokenizer.eos_token
tokenized = tokenizer.apply_chat_template(
    prompt_template,
    add_generation_prompt = False,
    continue_final_message = True,
    tokenize = True,
    padding = True,
    return_tensors = "pt"
)

print(tokenized)


tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   2839,  10263,    220,   2366,     20,    271,   2675,    527,
            264,   7941,   1732,    430,  14385,    439,    264,  55066, 128009,
         128006,    882, 128007,    271,   9241,   1587,    279,   7160,  38268,
             30, 128009, 128006,  78191, 128007,    271,     32,   9188,    362,
           9188,    362,   9188]])


In [27]:
out = model.generate(tokenized, max_new_tokens = 30)
print(out)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   2839,  10263,    220,   2366,     20,    271,   2675,    527,
            264,   7941,   1732,    430,  14385,    439,    264,  55066, 128009,
         128006,    882, 128007,    271,   9241,   1587,    279,   7160,  38268,
             30, 128009, 128006,  78191, 128007,    271,     32,   9188,    362,
           9188,    362,   9188,     11,  30276,     88,      0,  32269,    387,
           2610,    258,      6,    922,    279,   7160,  10025,    258,      6,
           2035,     11,  36346,     30,   8489,     11,   1095,    757,   3371,
          20043,     11,    757,  82651,     13,    578]])


In [28]:
decoded = tokenizer.batch_decode(out)
print(decoded[0])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 Jul 2025

You are a smart person that acts as a pirate<|eot_id|><|start_header_id|>user<|end_header_id|>

Where does the sun rises?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Aye Aye Aye, matey! Ye be askin' about the sun risin' place, eh? Well, let me tell ye, me hearty. The
