# üöÄ FunctionGemma 270M Fine-Tuning

Fine-tune **google/functiongemma-270m-it** on your custom function calling data.

---

## ‚ö†Ô∏è BEFORE YOU START

1. **Accept the license:** https://huggingface.co/google/functiongemma-270m-it
2. **Get HuggingFace token:** https://huggingface.co/settings/tokens
3. **Add token to Colab:** Click üîë key icon ‚Üí Add `HF_TOKEN` ‚Üí Paste your token
4. **Enable GPU:** Runtime ‚Üí Change runtime type ‚Üí T4 GPU

## Step 1: Check GPU & Authenticate

In [None]:
# Check GPU
!nvidia-smi --query-gpu=name,memory.total --format=csv

# Authenticate with HuggingFace
from google.colab import userdata
from huggingface_hub import login

try:
    hf_token = userdata.get('HF_TOKEN')
    login(token=hf_token)
    print("‚úÖ HuggingFace authentication successful!")
except Exception as e:
    print("‚ùå HF_TOKEN not found!")
    print("   1. Go to https://huggingface.co/settings/tokens")
    print("   2. Create a token (Read access)")
    print("   3. Click the üîë key icon on the left sidebar")
    print("   4. Add secret: Name='HF_TOKEN', Value=your_token")
    print("   5. Restart runtime and try again")

## Step 2: Install Dependencies

In [None]:
%%capture
!pip install transformers datasets peft accelerate bitsandbytes trl huggingface_hub

## Step 3: Upload Training Data

In [None]:
from google.colab import files
import json

print("üì§ Upload your functiongemma_training.jsonl file:")
uploaded = files.upload()

training_file = list(uploaded.keys())[0]
with open(training_file, 'r') as f:
    lines = f.readlines()
print(f"\n‚úÖ Uploaded: {training_file} ({len(lines)} examples)")

## Step 4: Load FunctionGemma 270M

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

MODEL_NAME = "google/functiongemma-270m-it"

print(f"üîÑ Loading {MODEL_NAME}...")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

print(f"‚úÖ FunctionGemma loaded! ({model.num_parameters():,} params)")

## Step 5: Apply LoRA

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"‚úÖ LoRA applied! Trainable: {trainable:,}/{total:,} ({100*trainable/total:.2f}%)")

## Step 6: Prepare Dataset

In [None]:
from datasets import Dataset

data = []
with open(training_file, 'r') as f:
    for line in f:
        entry = json.loads(line.strip())
        data.append({"text": entry["prompt"] + entry["completion"]})

dataset = Dataset.from_list(data)

def tokenize(example):
    return tokenizer(example["text"], truncation=True, max_length=1024, padding="max_length")

tokenized_dataset = dataset.map(tokenize, remove_columns=["text"])
print(f"‚úÖ Dataset: {len(tokenized_dataset)} examples")

## Step 7: Train! üöÄ

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="./functiongemma-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_ratio=0.03,
    logging_steps=10,
    save_steps=100,
    fp16=True,
    optim="paged_adamw_8bit",
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)

print("üöÄ Starting training (~10-20 min)...")
trainer.train()
print("‚úÖ Training complete!")

## Step 8: Test Model

In [None]:
test_prompts = ["Turn on the bedroom lights", "Set a timer for 5 minutes", "Hello"]
model.eval()

# Get function declarations from training data
with open(training_file, 'r') as f:
    sample = json.loads(f.readline())

print("üß™ Testing:\n")
for prompt in test_prompts:
    full_prompt = sample["prompt"].rsplit("<start_of_turn>user", 1)[0] + f"<start_of_turn>user {prompt}<end_of_turn>\n<start_of_turn>model"
    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.1, do_sample=True, pad_token_id=tokenizer.eos_token_id)
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    if "<start_function_call>" in response:
        func = response.split("<start_function_call>")[1].split("<end_function_call>")[0]
        print(f"üìù {prompt}\nü§ñ {func}\n")
    else:
        print(f"üìù {prompt}\nü§ñ {response[-150:]}\n")

## Step 9: Save & Download

In [None]:
# Save adapters
model.save_pretrained("functiongemma-lora")
tokenizer.save_pretrained("functiongemma-lora")
!zip -r functiongemma-lora.zip functiongemma-lora/

# Download
files.download("functiongemma-lora.zip")
print("\n‚úÖ Download started!")

## Step 10: Export to GGUF for Ollama (Optional)

In [None]:
# Merge adapters with base model
from peft import PeftModel

print("üîÑ Merging adapters...")
base = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
merged = PeftModel.from_pretrained(base, "functiongemma-lora").merge_and_unload()
merged.save_pretrained("functiongemma-merged")
tokenizer.save_pretrained("functiongemma-merged")
print("‚úÖ Merged!")

In [None]:
# Convert to GGUF
!pip install llama-cpp-python -q
!git clone https://github.com/ggerganov/llama.cpp.git -q
!pip install -r llama.cpp/requirements.txt -q
!python llama.cpp/convert_hf_to_gguf.py functiongemma-merged --outfile functiongemma-finetuned.gguf --outtype q4_k_m

files.download("functiongemma-finetuned.gguf")
print("‚úÖ GGUF downloaded!")

## Use with Ollama

```bash
# Create Modelfile
echo 'FROM ./functiongemma-finetuned.gguf
PARAMETER temperature 0.1
PARAMETER stop "<end_of_turn>"
PARAMETER stop "<end_function_call>"' > Modelfile

# Create and run
ollama create functiongemma-custom -f Modelfile
ollama run functiongemma-custom
```