<a href="https://colab.research.google.com/github/lmassaron/function_calling_gemma3/blob/main/function_calling_gemma3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q -U transformers
!pip install -q -U accelerate
!pip install -q -U datasets
!pip install -q -U peft
!pip install -q -U trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m78.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m65.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
from enum import Enum
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer
from peft import LoraConfig, TaskType

In [3]:
class ChatmlSpecialTokens(str, Enum):
    """Enum class defining special tokens used in the ChatML format"""

    tools = "<tools>"
    eotools = "</tools>"
    think = "<think>"
    eothink = "</think>"
    tool_call = "<tool_call>"
    eotool_call = "</tool_call>"
    tool_response = "<tool_response>"
    eotool_response = "</tool_response>"
    pad_token = "<pad>"
    eos_token = "<eos>"

    @classmethod
    def list(cls):
        return [c.value for c in cls]

In [4]:
class Config:
    model_name = "google/gemma-3-1b-it"
    dataset_name = "lmassaron/hermes-function-calling-v1"
    output_dir = "gemma-3-1B-it-function_calling"
    lora_arguments = {
        "r": 16,
        "lora_alpha": 64,
        "lora_dropout": 0.05,
        "target_modules": [
            "embed_tokens",
            "q_proj",
            "k_proj",
            "v_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
            "o_proj",
            "lm_head",
        ],
    }
    training_arguments = {
        # Basic training configuration
        "num_train_epochs": 1,
        "max_steps": -1,
        "per_device_train_batch_size": 1,
        "per_device_eval_batch_size": 1,
        "gradient_accumulation_steps": 4,
        "max_seq_length": 2048,
        "packing": True,
        # Optimization settings
        "optim": "adamw_torch_fused",
        "learning_rate": 1e-4,
        "weight_decay": 0.1,
        "max_grad_norm": 1.0,
        "lr_scheduler_type": "cosine",
        "warmup_ratio": 0.1,
        # Memory optimization
        "gradient_checkpointing": True,
        "gradient_checkpointing_kwargs": {"use_reentrant": False},
        # Evaluation and saving
        "eval_strategy": "epoch",
        "save_strategy": "epoch",
        "save_steps": 25,
        "save_total_limit": 2,
        "load_best_model_at_end": True,
        "metric_for_best_model": "eval_loss",
        "greater_is_better": False,
        # Logging and output
        "logging_steps": 5,
        "report_to": "tensorboard",
        "logging_dir": "logs/runs",
        "overwrite_output_dir": True,
        # Model sharing
        "push_to_hub": False,
        "hub_private_repo": False,
    }
    fp16 = False
    bf16 = True

In [5]:
config = Config()
compute_dtype = torch.bfloat16
device = "cuda"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(
        config.model_name,
        pad_token=ChatmlSpecialTokens.pad_token.value,
        additional_special_tokens=ChatmlSpecialTokens.list(),
    )

tokenizer.chat_template = tokenizer.chat_template = (
    "{{ bos_token }}{% for message in messages %}{% if message['role'] != 'system' %}{{ '<start_of_turn>' + message['role'] + '\n' + message['content'] | trim + '<end_of_turn><eos>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}"
)

model = AutoModelForCausalLM.from_pretrained(
    config.model_name,
    torch_dtype=compute_dtype,
    attn_implementation="eager",
    low_cpu_mem_usage=True,
    device_map="cpu",
)

model.resize_token_embeddings(len(tokenizer))
model = model.to(device)

tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/899 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [7]:
def preprocess_and_filter(sample):
  """Preprocesses and filters a sample based on token length"""
  messages = sample["messages"]
  text = tokenizer.apply_chat_template(messages, tokenize=False)
  tokens = tokenizer.encode(text, truncation=False)

  if len(tokens) <= config.training_arguments["max_seq_length"]:
    return {"text": text}
  else:
    return None

In [8]:
data = (
        load_dataset(config.dataset_name)
        .rename_column("conversations", "messages")
        .map(preprocess_and_filter, remove_columns="messages")
        .filter(lambda x: x is not None, keep_in_memory=True)
    )

README.md:   0%|          | 0.00/473 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.33M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/847k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4167 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1042 [00:00<?, ? examples/s]

Map:   0%|          | 0/4167 [00:00<?, ? examples/s]

Map:   0%|          | 0/1042 [00:00<?, ? examples/s]

Filter:   0%|          | 0/4158 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1033 [00:00<?, ? examples/s]

In [9]:
dataset = data["train"].train_test_split(0.2)

In [10]:
peft_config = LoraConfig(
        **config.lora_arguments,
        task_type=TaskType.CAUSAL_LM,
    )

training_arguments = SFTConfig(
    **config.training_arguments,
    output_dir=config.output_dir,
    fp16=config.fp16,
    bf16=config.bf16,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    processing_class=tokenizer,
    peft_config=peft_config,
)



Converting train dataset to ChatML:   0%|          | 0/3326 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/3326 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/3326 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/3326 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/832 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/832 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/832 [00:00<?, ? examples/s]

Packing eval dataset:   0%|          | 0/832 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [11]:
trainer.train()

Epoch,Training Loss,Validation Loss
0,0.2874,0.294641




TrainOutput(global_step=281, training_loss=0.5184876303655821, metrics={'train_runtime': 8306.6587, 'train_samples_per_second': 0.136, 'train_steps_per_second': 0.034, 'total_flos': 9908855410314432.0, 'train_loss': 0.5184876303655821})

In [12]:
# Saving LoRA weights and tokenizer
trainer.model.save_pretrained(
    "LoRA_" + config.output_dir, save_embedding_layers=True
)
tokenizer.eos_token = "<eos>"
tokenizer.save_pretrained("LoRA_" + config.output_dir)

('LoRA_gemma-3-1B-it-function_calling/tokenizer_config.json',
 'LoRA_gemma-3-1B-it-function_calling/special_tokens_map.json',
 'LoRA_gemma-3-1B-it-function_calling/tokenizer.model',
 'LoRA_gemma-3-1B-it-function_calling/added_tokens.json',
 'LoRA_gemma-3-1B-it-function_calling/tokenizer.json')

In [14]:
from huggingface_hub import login
from google.colab import userdata

hf_token = userdata.get('HF_TOKEN')
if hf_token:
    login(hf_token)
    print("Successfully logged in!")
else:
    print("Token not found. Check Secrets configuration.")

Successfully logged in!


In [15]:
username="lmassaron"
output_dir = "gemma-3-1B-it-function_calling"
trainer.push_to_hub(f"{username}/{output_dir}")
tokenizer.push_to_hub(f"{username}/{output_dir}", token=True)



tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/1.29G [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.62k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/1.50k [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/lmassaron/gemma-3-1B-it-function_calling/commit/32c82143d94bd75e5925988dfcacb00f3ba1dfa5', commit_message='Upload tokenizer', commit_description='', oid='32c82143d94bd75e5925988dfcacb00f3ba1dfa5', pr_url=None, repo_url=RepoUrl('https://huggingface.co/lmassaron/gemma-3-1B-it-function_calling', endpoint='https://huggingface.co', repo_type='model', repo_id='lmassaron/gemma-3-1B-it-function_calling'), pr_revision=None, pr_num=None)