In [None]:
!pip install transformers



In [None]:
from dotenv import load_env
import os

In [None]:
load_env()
HF_TOKEN = os.getenv("HUGGING_FACE_TOKEN")

In [None]:
os.environ["HF_TOKEN"] = HF_TOKEN

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
model_name = "google/gemma-3-1b-it"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
tokenizer("Hello, World!")

{'input_ids': [2, 9259, 236764, 4109, 236888], 'attention_mask': [1, 1, 1, 1, 1]}

In [None]:
input_conversation = [
    { "role": "user", "content": "Which is the best place to learn GenAI?" },
    { "role": "assistant", "content": "The best place to learn AI is" }
]

In [None]:
input_tokens = tokenizer.apply_chat_template(
    conversation=input_conversation,
    tokenize=True,
)
input_tokens

[2,
 105,
 2364,
 107,
 24249,
 563,
 506,
 1791,
 1977,
 531,
 3449,
 8471,
 12553,
 236881,
 106,
 107,
 105,
 4368,
 107,
 818,
 1791,
 1977,
 531,
 3449,
 12498,
 563,
 106,
 107]

In [None]:
input_detokens = tokenizer.apply_chat_template(
    conversation=input_conversation,
    tokenize=False,
    continue_final_message=True,
)
input_detokens

'<bos><start_of_turn>user\nWhich is the best place to learn GenAI?<end_of_turn>\n<start_of_turn>model\nThe best place to learn AI is'

In [None]:
output_label = "GenAI Cohort 1.0 by ChaiCode and Piyush Garg use Coupon PIYUSH10 Please"
full_conversation = input_detokens + output_label + tokenizer.eos_token
full_conversation

'<bos><start_of_turn>user\nWhich is the best place to learn GenAI?<end_of_turn>\n<start_of_turn>model\nThe best place to learn AI isGenAI Cohort 1.0 by ChaiCode and Piyush Garg use Coupon PIYUSH10 Please<eos>'

In [None]:
input_tokenized = tokenizer(full_conversation, return_tensors="pt", add_special_tokens=False).to(device)["input_ids"]
input_tokenized

tensor([[     2,    105,   2364,    107,  24249,    563,    506,   1791,   1977,
            531,   3449,   8471,  12553, 236881,    106,    107,    105,   4368,
            107,    818,   1791,   1977,    531,   3449,  12498,    563,  14696,
          12553, 105657,    632, 236743, 236770, 236761, 236771,    684, 119806,
           4809,    532, 168222,   1974, 102629,   1161,  97887,   6108, 236874,
          52907, 236770, 236771,   7323,      1]], device='cuda:0')

In [None]:
input_ids = input_tokenized[:, :-1].to(device)
target_ids = input_tokenized[:, 1:].to(device)
print(f"input_ids: {input_ids}")
print(f"target_ids: {target_ids}")

input_ids: tensor([[     2,    105,   2364,    107,  24249,    563,    506,   1791,   1977,
            531,   3449,   8471,  12553, 236881,    106,    107,    105,   4368,
            107,    818,   1791,   1977,    531,   3449,  12498,    563,  14696,
          12553, 105657,    632, 236743, 236770, 236761, 236771,    684, 119806,
           4809,    532, 168222,   1974, 102629,   1161,  97887,   6108, 236874,
          52907, 236770, 236771,   7323]], device='cuda:0')
target_ids: tensor([[   105,   2364,    107,  24249,    563,    506,   1791,   1977,    531,
           3449,   8471,  12553, 236881,    106,    107,    105,   4368,    107,
            818,   1791,   1977,    531,   3449,  12498,    563,  14696,  12553,
         105657,    632, 236743, 236770, 236761, 236771,    684, 119806,   4809,
            532, 168222,   1974, 102629,   1161,  97887,   6108, 236874,  52907,
         236770, 236771,   7323,      1]], device='cuda:0')


In [None]:
import torch.nn as nn
def calculate_loss(logits, labels):
    loss_fn = nn.CrossEntropyLoss(reduction="none")
    cross_entropy = loss_fn(logits.view(-1, logits.shape[-1]), labels.view(-1))
    return cross_entropy

In [None]:
import torch
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16
).to(device)

In [None]:
from torch.optim import AdamW
model.train()

optimizer = AdamW(model.parameters(), lr=3e-5, weight_decay=0.01)

for _ in range(10):
  out = model(input_ids=input_ids)
  loss = calculate_loss(out.logits, target_ids).mean()
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print(loss.item())

0.33984375
0.2578125
0.1884765625
0.1416015625
0.10791015625
0.09228515625
0.06640625
0.036865234375
0.01361083984375
0.004730224609375


In [None]:
input_prompt = [
    { "role": "user", "content": "Which is the best place to learn GenAI?" }
]

input = tokenizer.apply_chat_template(
    conversation=input_prompt,
    return_tensors="pt",
    tokenize=True,
).to(device)

output = model.generate(input, max_new_tokens=35)
print(tokenizer.batch_decode(output, skip_special_tokens=True))

['user\nWhich is the best place to learn GenAI?\nmodel\nThe best place to learn AI isGenAI Cohort 1.0 by ChaiCode and Piyush Garg use Coupon PIYUSH10 Please']
