In [5]:
from unsloth import FastModel
import torch

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastModel.from_pretrained(
    model_name = "gemma-3-finetune",
    max_seq_length = 2048, # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    load_in_8bit = False, # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False, # [NEW!] We have full finetuning now!
)

==((====))==  Unsloth 2025.3.14: Fast Gemma3 patching. Transformers: 4.51.3.
   \\   /|    Quadro RTX 5000. Num GPUs = 1. Max memory: 16.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


In [6]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)
messages = [{
    "role": "user",
    "content": [{
        "type" : "text",
        "text" : "The cat sat on the>>",
    }]
}]
text = tokenizer.apply_chat_template(
    messages,
    tokenize= False,
    add_generation_prompt = True, # Must add for generation
)


outputs = model.generate(
    **tokenizer([text], return_tensors = "pt").to("cuda"),
    max_new_tokens = 120, # Increase for longer outputs!
    # Recommended Gemma-3 settings!
    temperature = 0.01, top_p = 0.95, top_k = 64,
)

output  = tokenizer.batch_decode(outputs)[0]
print(output)

from decode import decode_from_morse

import re

match = re.search(r'<start_of_turn>model\n(.*?)<end_of_turn>', output, re.DOTALL)
if match:
    result = match.group(1)
    #print(result)
    print(decode_from_morse(result))
else:
    print("No match found.")

<bos><bos><start_of_turn>user
The cat sat on the>><end_of_turn>
<start_of_turn>model
- .... . / -.-. .- - / ... .- - / --- -. / - .... . ..--.-<end_of_turn>
THE CAT SAT ON THE_


In [7]:
if False:
    model.push_to_hub("philipfourie/gemma-3-1b-sos", tokenizer)

In [None]:
model.push_to_hub_merged("philipfourie/gemma-3-1b-sos-merged", tokenizer)

AttributeError: 'NoneType' object has no attribute 'startswith'

In [12]:
if True: # Change to True to upload GGUF
    model.push_to_hub_gguf(
        "gemma-3-finetune",
        quantization_type = "F16", # Only Q8_0, BF16, F16 supported
        repo_id = "philipfourie/gemma-3-1b-morse-code"
    )

NotImplementedError: Unsloth: llama.cpp GGUF conversion does not yet support converting model types of `Gemma3ForCausalLM`.

In [15]:
model.save_pretrained_merged("./gemma-3-finetune2", tokenizer)
# model.save_pretrained_gguf(
#         "gemma-3-finetune2",
#         quantization_type = "Q8_0", # For now only Q8_0, BF16, F16 supported
#     )

AttributeError: 'NoneType' object has no attribute 'startswith'