In [None]:
import os 
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForCausalLM

load_dotenv()
HF_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
os.environ["HF_TOKEN"] = HF_TOKEN

device = "cpu"
model_name = "google/gemma-3-1b-it"

tokenizer = AutoTokenizer.from_pretrained(model_name)

In [14]:
input_conversation = [
    {"role" : "user", "content" : "What is the best JS frontend framework"}, 
    {"role" : "assistant", "content" : "Undoubtely the best JS frontend framework is "}
]

input_token = tokenizer.apply_chat_template(
    conversation=input_conversation,
    tokenize=True
)

input_token

[2,
 105,
 2364,
 107,
 3689,
 563,
 506,
 1791,
 28551,
 83725,
 10318,
 106,
 107,
 105,
 4368,
 107,
 30340,
 26359,
 953,
 506,
 1791,
 28551,
 83725,
 10318,
 563,
 106,
 107]

In [15]:
input_detokens = tokenizer.apply_chat_template(
    conversation=input_conversation,
    tokenize=False,
    continue_final_message=True,
)
input_detokens

'<bos><start_of_turn>user\nWhat is the best JS frontend framework<end_of_turn>\n<start_of_turn>model\nUndoubtely the best JS frontend framework is'

In [17]:
output_label = " Next.js a React framework"
full_conversation = input_detokens + output_label + tokenizer.eos_token
full_conversation

'<bos><start_of_turn>user\nWhat is the best JS frontend framework<end_of_turn>\n<start_of_turn>model\nUndoubtely the best JS frontend framework is Next.js a React framework<eos>'

In [18]:
input_tokenized = tokenizer(full_conversation, return_tensors="pt", add_special_tokens=False).to(device)["input_ids"]
input_tokenized

tensor([[     2,    105,   2364,    107,   3689,    563,    506,   1791,  28551,
          83725,  10318,    106,    107,    105,   4368,    107,  30340,  26359,
            953,    506,   1791,  28551,  83725,  10318,    563,  12067, 236761,
           2092,    496,   9945,  10318,      1]])

In [19]:
input_ids = input_tokenized[:, :-1].to(device)
target_ids = input_tokenized[:, 1:].to(device)
print(f"input_ids: {input_ids}")
print(f"target_ids: {target_ids}")

input_ids: tensor([[     2,    105,   2364,    107,   3689,    563,    506,   1791,  28551,
          83725,  10318,    106,    107,    105,   4368,    107,  30340,  26359,
            953,    506,   1791,  28551,  83725,  10318,    563,  12067, 236761,
           2092,    496,   9945,  10318]])
target_ids: tensor([[   105,   2364,    107,   3689,    563,    506,   1791,  28551,  83725,
          10318,    106,    107,    105,   4368,    107,  30340,  26359,    953,
            506,   1791,  28551,  83725,  10318,    563,  12067, 236761,   2092,
            496,   9945,  10318,      1]])


In [None]:
import torch.nn as nn