In [16]:
import json

def convert_shato_to_conversations(json_files):
    """Convert SHATO training data to Gemma 3 conversation format."""
    conversation_data = []

    for file_path in json_files:
        with open(file_path, 'r') as f:
            data = json.load(f)

        for example in data:
            # System prompt defines SHATO's role consistently
            system_content = "You are SHATO robot assistant. Extract commands from user input and return JSON with response, command, and command_params\nfields."

            # User content includes retry context if present
            if "retry_context" not in example:
                user_content = example['user_input']
            else:
                user_content = f"{example['user_input']}\n\nPrevious error: {example['retry_context']}"

            conversation_data.append({
                "conversations": [
                    {"role": "system", "content": system_content},
                    {"role": "user", "content": user_content},
                    {"role": "assistant", "content": json.dumps(example["expected_output"])}
                ]
            })

    return conversation_data

In [17]:
json_files = [
      "/content/chat_examples.json",
      "/content/move_to_corrections.json",
      "/content/patrol_corrections.json",
      "/content/rotate_corrections.json",
      "/content/successful_move_to.json",
      "/content/successful_patrol.json",
      "/content/successful_rotate.json"
  ]

conversation_data = convert_shato_to_conversations(json_files)

# Display first example to verify format
import pprint
print("First conversation example:")
pprint.pprint(conversation_data[0])

# Save in conversation format
with open("conversation_data.json", "w") as f:
    json.dump(conversation_data, f, indent=4)

First conversation example:
{'conversations': [{'content': 'You are SHATO robot assistant. Extract '
                               'commands from user input and return JSON with '
                               'response, command, and command_params\n'
                               'fields.',
                    'role': 'system'},
                   {'content': 'hello there', 'role': 'user'},
                   {'content': '{"response": "Hello! I\'m SHATO, ready to help '
                               'with robot commands.", "command": null, '
                               '"command_params": null}',
                    'role': 'assistant'}]}


In [4]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" trl peft accelerate bitsandbytes

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-b60fr_ve/unsloth_74edb115798a4fb890754e7b95891af7
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-b60fr_ve/unsloth_74edb115798a4fb890754e7b95891af7
  Resolved https://github.com/unslothai/unsloth.git to commit c7fc68c5e0a296ed7d3e16449e055ded20713bad
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.9.5 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.9.5-py3-none-any.whl.metadata (9.5 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.git

In [5]:
from unsloth import FastLanguageModel
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [28]:
# Load Gemma 3 270M with 4-bit quantization
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-3-270m-it",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = False,
)

# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                        "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 64,
    lora_dropout = 0.1,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

==((====))==  Unsloth 2025.9.4: Fast Gemma3_Text patching. Transformers: 4.56.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3_text won't work! Using float32.
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.


model.safetensors:   0%|          | 0.00/536M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

Unsloth: Making `model.base_model.model.model` require gradients


In [29]:
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer
from transformers import TrainingArguments

  # Set up Gemma 3 chat template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma3",
)

def formatting_prompts_func(examples):
    """Format conversations for Gemma 3 training."""
    convos = examples["conversations"]
    texts = [
        tokenizer.apply_chat_template(
            convo,
            tokenize=False,
            add_generation_prompt=False
        ).removeprefix('<bos>')
        for convo in convos
    ]
    return {"text": texts}

# Training arguments optimized for small dataset
training_args = TrainingArguments(
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 4,
    warmup_steps = 10,
    num_train_epochs = 3,
    learning_rate = 2e-4,
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    logging_steps = 5,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    output_dir = "shato_outputs",
    save_strategy = "epoch",
)

In [30]:
# Load conversation format data
import json
from datasets import Dataset

with open('conversation_data.json', 'r') as f:
    data = json.load(f)

# Convert to Hugging Face dataset
dataset = Dataset.from_list(data)
print(f"Loaded {len(dataset)} training conversations")

# Apply the formatting function
formatted_dataset = dataset.map(formatting_prompts_func, batched=True)

# Display sample formatted text
print("\nSample formatted text:")
print(formatted_dataset[0]['text'][:300] + "...")


Loaded 268 training conversations


Map:   0%|          | 0/268 [00:00<?, ? examples/s]


Sample formatted text:
<start_of_turn>user
You are SHATO robot assistant. Extract commands from user input and return JSON with response, command, and command_params
fields.

hello there<end_of_turn>
<start_of_turn>model
{"response": "Hello! I'm SHATO, ready to help with robot commands.", "command": null, "command_params"...


In [31]:
# Training
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = formatted_dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    dataset_num_proc = 2,
    args = training_args,
)

Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/268 [00:00<?, ? examples/s]

In [32]:
# Start training
print("Starting training...")
trainer_stats = trainer.train()

Starting training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 268 | Num Epochs = 3 | Total steps = 102
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 7,593,984 of 275,692,160 (2.75% trained)


Step,Training Loss
5,4.351
10,2.5519
15,1.4015
20,0.9501
25,0.6391
30,0.5521
35,0.4911
40,0.3808
45,0.3257
50,0.2797


In [33]:
# Step 1: Merge LoRA weights into base model
print("🔄 Merging LoRA weights...")
model = FastLanguageModel.for_inference(model)  # Disable dropout for inference
merged_model = model.merge_and_unload()

  # Step 2: Save the complete merged model
print("💾 Saving merged model...")
merged_model.save_pretrained("shato-gemma-270m-merged", safe_serialization=True)
tokenizer.save_pretrained("shato-gemma-270m-merged")
print("✅ Model merged and saved successfully!")

🔄 Merging LoRA weights...
💾 Saving merged model...
✅ Model merged and saved successfully!


In [34]:
# Verify the files exist
import os
files_in_dir = os.listdir("shato-gemma-270m-merged")
print(f"Files saved: {files_in_dir}")

Files saved: ['added_tokens.json', 'model.safetensors', 'config.json', 'generation_config.json', 'special_tokens_map.json', 'tokenizer.model', 'tokenizer.json', 'tokenizer_config.json', 'chat_template.jinja']


In [35]:
# Step 3: Convert the merged model to GGUF
print("🔄 Converting merged model to GGUF F16 format...")
model.save_pretrained_gguf(
    "shato-gemma-270m-merged",  # Use the merged model directory
    tokenizer,
    quantization_type = "F16",
)
print("✅ GGUF conversion completed!")

🔄 Converting merged model to GGUF F16 format...
Unsloth GGUF:hf-to-gguf:Loading model: shato-gemma-270m-merged
Unsloth GGUF:hf-to-gguf:Model architecture: Gemma3ForCausalLM
Unsloth GGUF:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
Unsloth GGUF:hf-to-gguf:Exporting model...
Unsloth GGUF:hf-to-gguf:gguf: loading model part 'model.safetensors'
Unsloth GGUF:hf-to-gguf:token_embd.weight,                 torch.float16 --> F16, shape = {640, 262144}
Unsloth GGUF:hf-to-gguf:output_norm.weight,                torch.float32 --> F32, shape = {640}
Unsloth GGUF:hf-to-gguf:Set meta model
Unsloth GGUF:hf-to-gguf:Set model parameters
Unsloth GGUF:hf-to-gguf:Set model quantization version
Unsloth GGUF:hf-to-gguf:Set model tokenizer
Unsloth GGUF:gguf.vocab:Setting special token type bos to 2
Unsloth GGUF:gguf.vocab:Setting special token type eos to 106
Unsloth GGUF:gguf.vocab:Setting special token type unk to 3
Unsloth GGUF:gguf.vocab:Setting special token type pad to 0
Unsloth GGUF:

Unsloth: GGUF conversion:   0%|          | 0/100 [00:00<?, ?it/s]

Unsloth GGUF:hf-to-gguf:Model successfully exported to ./
Unsloth: Converted to shato-gemma-270m-merged.F16.gguf with size = 536.3M
Unsloth: Successfully saved GGUF to:
shato-gemma-270m-merged.F16.gguf
✅ GGUF conversion completed!


In [36]:
from google.colab import files
import glob

# Download the fine-tuned GGUF model
gguf_files = glob.glob("*.gguf")
for gguf_file in gguf_files:
    print(f"Downloading {gguf_file}...")
    files.download(gguf_file)

Downloading shato-gemma-270m-merged.F16.gguf...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>