# Fine-tunning Gemma3n on Alternative Use Task (AUT) originality ratings

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Download train dataset from my repo

In [None]:
# Download dataset
!wget https://raw.githubusercontent.com/theGreen-Coder/MCTB/refs/heads/main/data/gemma3n_finetune/train.json -O train_AUT.json

--2025-09-28 01:00:03--  https://raw.githubusercontent.com/theGreen-Coder/MCTB/refs/heads/main/data/gemma3n_finetune/train.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2093860 (2.0M) [text/plain]
Saving to: ‘train_AUT.json’


2025-09-28 01:00:03 (50.5 MB/s) - ‘train_AUT.json’ saved [2093860/2093860]



In [None]:
import json

file = json.load(open("train_AUT.json", "r"))
print(file[0])

{'prompt': 'brick', 'response': 'decorate', 'target': '25'}


### Install dependencies

In [None]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    import torch; v = re.match(r"[0-9\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
import torch; torch._dynamo.config.recompile_limit = 64;

In [None]:
%%capture
!pip install --no-deps --upgrade timm # Only for Gemma 3N

In [None]:
# For GPU check
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

if torch.cuda.is_available():
    gpu_idx = 0
    props = torch.cuda.get_device_properties(gpu_idx)
    total_mem = props.total_memory / (1024**3)  # Convert bytes → GB
    print(f"GPU {gpu_idx}: {props.name}")
    print(f"Total memory: {total_mem:.2f} GB")
    print(f"Allocated: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")
    print(f"Reserved:  {torch.cuda.memory_reserved(0) / 1024**3:.2f} GB")
    print(f"Free:  {(total_mem-(torch.cuda.memory_reserved(0) / 1024**3)):.2f} GB")
else:
    print("No CUDA-compatible GPU detected.")


CUDA available: True
GPU: NVIDIA L4
GPU 0: NVIDIA L4
Total memory: 22.16 GB
Allocated: 0.00 GB
Reserved:  0.00 GB
Free:  22.16 GB


### Prepare unsloth training

In [None]:
from unsloth import FastModel
import torch

fourbit_models = [
    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
    "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
    # Pretrained models
    "unsloth/gemma-3n-E4B-unsloth-bnb-4bit",
    "unsloth/gemma-3n-E2B-unsloth-bnb-4bit",

    # Other Gemma 3 quants
    "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
]

model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3n-E4B-it",
    dtype = None, # None for auto detection
    max_seq_length = 1024,
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    full_finetuning = False,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.9.9: Fast Gemma3N patching. Transformers: 4.55.4.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/1.15G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.72G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # Should leave on always!

    r=64,            # Larger = higher accuracy, but might overfit
    lora_alpha=128,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

Unsloth: Making `model.base_model.model.model.language_model` require gradients


### Prepare Dataset

In [None]:
from datasets import Dataset

def format_prompt(example):
    return f'<start_of_turn>user\nYou are an expert alternative uses test (AUT) rater. The following is a response of a creative or surprising use of a {example["prompt"].upper()}. On a scale of 10-50, judge how original this use for {example["prompt"].upper()} is, where 10 is "not at all creative" and 50 is "very creative". To rate:"{example["response"]}"<end_of_turn>\n<start_of_turn>model\nThe use "{example["response"]}" of {example["prompt"].upper()} has a rating of {example["target"]}<end_of_turn>\n'

formatted_data = [format_prompt(item) for item in file]
dataset = Dataset.from_dict({"text": formatted_data})

In [None]:
dataset[0]["text"]

'<start_of_turn>user\nYou are an expert alternative uses test (AUT) rater. The following is a response of a creative or surprising use of a BRICK. On a scale of 10-50, judge how original this use for BRICK is, where 10 is "not at all creative" and 50 is "very creative". To rate:"decorate"<end_of_turn>\n<start_of_turn>model\nThe use "decorate" of BRICK has a rating of 25<end_of_turn>\n'

### Train the model

In [None]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None, # Can set up evaluation!
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 16, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        learning_rate = 5e-6, # Reduce to 2e-5 for long training runs
        logging_steps = 50,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/20037 [00:00<?, ? examples/s]

In [None]:
# from unsloth.chat_templates import train_on_responses_only
# trainer = train_on_responses_only(
#     trainer,
#     instruction_part = "<start_of_turn>user\n",
#     response_part = "<start_of_turn>model\n",
# )

Map (num_proc=12):   0%|          | 0/20037 [00:00<?, ? examples/s]

In [None]:
tokenizer.decode(trainer.train_dataset[100]["input_ids"])

'<bos><start_of_turn>user\nYou are an expert alternative uses test (AUT) rater. The following is a response of a creative or surprising use of a PAPERCLIP. On a scale of 10-50, judge how original this use for PAPERCLIP is, where 10 is "not at all creative" and 50 is "very creative". To rate:"twist tie for bread or plastic bags"<end_of_turn>\n<start_of_turn>model\nThe use "twist tie for bread or plastic bags" of PAPERCLIP has a rating of 25<end_of_turn>\n'

In [None]:
tokenizer.decode([tokenizer.pad_token_id if x == -100 else x for x in trainer.train_dataset[100]["labels"]]).replace(tokenizer.pad_token, " ")

'                                                                                          25<end_of_turn>\n'

### Actual training run

In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 20,037 | Num Epochs = 1 | Total steps = 1,253
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 16
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 16 x 1) = 16
 "-____-"     Trainable parameters = 153,681,920 of 8,003,660,112 (1.92% trained)


Step,Training Loss
50,1.1051
100,0.2377
150,0.2388
200,0.2301
250,0.2279
300,0.2198
350,0.2109
400,0.2164
450,0.2063
500,0.1988


Unsloth: Will smartly offload gradients to save VRAM!


In [None]:
model.save_pretrained_merged("gemma-3N-finetune-long1epoch", tokenizer)

config.json: 0.00B [00:00, ?B/s]

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Checking cache directory for required files...
Cache check failed: model-00001-of-00004.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.08G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  25%|██▌       | 1/4 [00:08<00:24,  8.18s/it]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  50%|█████     | 2/4 [00:29<00:31, 15.70s/it]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  75%|███████▌  | 3/4 [01:13<00:28, 28.58s/it]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.66G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|██████████| 4/4 [01:21<00:00, 20.36s/it]
Unsloth: Merging weights into 16bit: 100%|██████████| 4/4 [00:55<00:00, 13.80s/it]


Unsloth: Merge process complete.


In [None]:
model.save_pretrained_gguf(
      "gemma-3N-finetune-long1epoch",
      tokenizer,
      quantization_type = "BF16", # For now only Q8_0, BF16, F16 supported
  )

Unsloth: Updating system package directories
Unsloth: Install GGUF and other packages
Unsloth GGUF:hf-to-gguf:Loading model: gemma-3N-finetune-long1epoch
Unsloth GGUF:hf-to-gguf:Model architecture: Gemma3nForConditionalGeneration
Unsloth GGUF:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
Unsloth GGUF:hf-to-gguf:Exporting model...
Unsloth GGUF:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'
Unsloth GGUF:hf-to-gguf:gguf: loading model part 'model-00001-of-00004.safetensors'
Unsloth GGUF:hf-to-gguf:altup_proj.weight,                 torch.bfloat16 --> BF16, shape = {2048, 2048, 3}
Unsloth GGUF:hf-to-gguf:altup_unembd_proj.weight,          torch.bfloat16 --> BF16, shape = {2048, 2048, 3}
Unsloth GGUF:hf-to-gguf:token_embd.weight,                 torch.bfloat16 --> BF16, shape = {2048, 262144}
Unsloth GGUF:hf-to-gguf:gguf: loading model part 'model-00002-of-00004.safetensors'
Unsloth GGUF:hf-to-gguf:per_layer_token_embd.weight,       torch.bfl

Unsloth: GGUF conversion:   0%|          | 0/100 [00:00<?, ?it/s]

Unsloth GGUF:hf-to-gguf:Model successfully exported to ./
Unsloth: Converted to gemma-3N-finetune-long1epoch.BF16.gguf with size = 13.7G
Unsloth: Successfully saved GGUF to:
gemma-3N-finetune-long1epoch.BF16.gguf


['gemma-3N-finetune-long1epoch.BF16.gguf']

In [None]:
import shutil
shutil.copy2("/content/gemma-3N-finetune-long1epoch.BF16.gguf",
             "/content/drive/MyDrive/models/gemma-3N-finetune-long1epoch.BF16.gguf")

'/content/drive/MyDrive/models/gemma-3N-finetune-long1epoch.BF16.gguf'

In [None]:
import shutil
shutil.copy2("/content/gemma-3N-finetune.F16.gguf",
             "/content/drive/MyDrive/models/gemma-3N-finetune.F16.gguf")

'/content/drive/MyDrive/models/gemma-3N-finetune.F16.gguf'

In [None]:
import shutil
shutil.copy2("/content/gemma-3N-finetune.Q8_0.gguf",
             "/content/drive/MyDrive/models/gemma-3N-finetune.Q8_0.gguf")

'/content/drive/MyDrive/models/gemma-3N-finetune.Q8_0.gguf'

In [None]:
from google.colab import files
import os

gguf_files = [f for f in os.listdir("./") if f.endswith(".gguf")]
if gguf_files:
    gguf_file = os.path.join("./", gguf_files[0])
    print(f"Downloading: {gguf_file}")
    files.download(gguf_file)

Downloading: ./gemma-3N-finetune.F16.gguf


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from transformers import TrainerCallback
import os
from google.colab import files
from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth import FastLanguageModel

class GGUFSaveCallback(TrainerCallback):
    def __init__(self, output_dir="outputs", quantization="q4_k_m"):
        self.output_dir = output_dir
        self.quantization = quantization

    def on_epoch_end(self, args, state, control, **kwargs):
        # Get last checkpoint dir (trainer saved because save_strategy="epoch")
        ckpts = [ck for ck in os.listdir(self.output_dir) if ck.startswith("checkpoint-")]
        ckpts = sorted(ckpts, key=lambda x: int(x.split("-")[1]))
        last_ckpt = os.path.join(self.output_dir, ckpts[-1])
        print(f"\n[Callback] Converting {last_ckpt} to GGUF...\n")

        # Reload checkpoint
        model = AutoModelForCausalLM.from_pretrained(last_ckpt, device_map="auto")
        tokenizer = AutoTokenizer.from_pretrained(last_ckpt)

        # Convert to GGUF
        fast_model = FastLanguageModel.for_inference(model)
        out_dir = f"gguf_{ckpts[-1]}"
        os.makedirs(out_dir, exist_ok=True)
        fast_model.save_pretrained_gguf(out_dir, tokenizer, quantization_method=self.quantization)

        # Download first GGUF file found
        gguf_files = [f for f in os.listdir(out_dir) if f.endswith(".gguf")]
        if gguf_files:
            gguf_path = os.path.join(out_dir, gguf_files[0])
            print(f"[Callback] Downloading: {gguf_path}")
            files.download(gguf_path)

        return control

trainer.add_callback(GGUFSaveCallback(output_dir="outputs", quantization="q4_k_m"))

In [None]:
# Train the model
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 20,037 | Num Epochs = 3 | Total steps = 7,515
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 160,759,808 of 8,010,738,000 (2.01% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
25,1.6465
50,0.3576
75,0.3741
100,0.2895
125,0.2892
150,0.2693
175,0.2466
200,0.2698
225,0.297
250,0.2864


IndexError: list index out of range

In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)
messages = [{
    "role": "user",
    "content": [{
        "type" : "text",
        "text" : "Continue the sequence: 1, 1, 2, 3, 5, 8,",
    }]
}]
inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
    tokenize = True,
    return_dict = True,
).to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens = 64, # Increase for longer outputs!
    # Recommended Gemma-3 settings!
    temperature = 1.0, top_p = 0.95, top_k = 64,
)
tokenizer.batch_decode(outputs)

['<bos><start_of_turn>user\nContinue the sequence: 1, 1, 2, 3, 5, 8,<end_of_turn>\n<start_of_turn>model\n13<end_of_turn>']

In [None]:
model.save_pretrained_merged("gemma-3N-finetune", tokenizer, save_method="forced_merged_4bit")

Unsloth: Merging LoRA weights into 4bit model...




Unsloth: Merging finished.
Unsloth: Found skipped modules: ['model.language_model.layers.0.altup.correction_coefs', 'model.language_model.layers.0.altup.prediction_coefs', 'model.language_model.layers.0.altup.modality_router', 'model.language_model.layers.0.per_layer_projection', 'model.language_model.layers.1.altup.correction_coefs', 'model.language_model.layers.1.altup.prediction_coefs', 'model.language_model.layers.1.altup.modality_router', 'model.language_model.layers.1.per_layer_projection', 'model.language_model.layers.2.altup.correction_coefs', 'model.language_model.layers.2.altup.prediction_coefs', 'model.language_model.layers.2.altup.modality_router', 'model.language_model.layers.2.per_layer_projection', 'model.language_model.layers.3.altup.correction_coefs', 'model.language_model.layers.3.altup.prediction_coefs', 'model.language_model.layers.3.altup.modality_router', 'model.language_model.layers.3.per_layer_projection', 'model.language_model.layers.4.altup.correction_coefs', 

In [None]:
model.save_pretrained_gguf(
    "gemma-3N-finetune",
    tokenizer,
    quantization_type = "Q8_0", # For now only Q8_0, BF16, F16 supported
)

Unsloth GGUF:hf-to-gguf:Loading model: gemma-3N-finetune
Unsloth GGUF:hf-to-gguf:Model architecture: Gemma3nForConditionalGeneration
Unsloth GGUF:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
Unsloth GGUF:hf-to-gguf:Exporting model...
Unsloth GGUF:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'
Unsloth GGUF:hf-to-gguf:gguf: loading model part 'model-00001-of-00003.safetensors'
Unsloth GGUF:hf-to-gguf:token_embd.weight,                 torch.bfloat16 --> Q8_0, shape = {2048, 262144}
Traceback (most recent call last):
  File "/content/llama.cpp/unsloth_convert_hf_to_gguf.py", line 9767, in <module>
    main()
  File "/content/llama.cpp/unsloth_convert_hf_to_gguf.py", line 9761, in main
    model_instance.write()
  File "/content/llama.cpp/unsloth_convert_hf_to_gguf.py", line 831, in write
    self.prepare_tensors()
  File "/content/llama.cpp/unsloth_convert_hf_to_gguf.py", line 699, in prepare_tensors
    for new_name, data_torch in (self.m

RuntimeError: Unsloth: Failed to convert llama.cpp/unsloth_convert_hf_to_gguf.py to GGUF.

In [None]:
model.save_pretrained_gguf(
    "my_model2",
    tokenizer,
    quantization_type = "Q8_0", # For now only Q8_0, BF16, F16 supported
)

Unsloth GGUF:hf-to-gguf:Loading model: my_model2
Unsloth GGUF:hf-to-gguf:Model architecture: Gemma3nForConditionalGeneration
Unsloth GGUF:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
Unsloth GGUF:hf-to-gguf:Exporting model...
Unsloth GGUF:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'
Unsloth GGUF:hf-to-gguf:gguf: loading model part 'model-00001-of-00003.safetensors'
Unsloth GGUF:hf-to-gguf:token_embd.weight,                 torch.bfloat16 --> Q8_0, shape = {2048, 262144}
Traceback (most recent call last):
  File "/content/llama.cpp/unsloth_convert_hf_to_gguf.py", line 9767, in <module>
    main()
  File "/content/llama.cpp/unsloth_convert_hf_to_gguf.py", line 9761, in main
    model_instance.write()
  File "/content/llama.cpp/unsloth_convert_hf_to_gguf.py", line 831, in write
    self.prepare_tensors()
  File "/content/llama.cpp/unsloth_convert_hf_to_gguf.py", line 699, in prepare_tensors
    for new_name, data_torch in (self.modify_te

RuntimeError: Unsloth: Failed to convert llama.cpp/unsloth_convert_hf_to_gguf.py to GGUF.

In [None]:
if True:
    model.save_pretrained_merged("gemma-3N-finetune", tokenizer)



In [None]:
if True: # Change to True to save to GGUF
    model.save_pretrained_gguf(
        "gemma-3N-finetune",
        tokenizer,
        quantization_type = "Q8_0", # For now only Q8_0, BF16, F16 supported
    )

RuntimeError: Unsloth: `gemma-3N-finetune` does not exist?

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth import FastLanguageModel  # if you're using Unsloth

# Path where Trainer saved checkpoints
output_dir = "outputs"

for checkpoint in sorted(os.listdir(output_dir)):
    if checkpoint.startswith("checkpoint-"):
        ckpt_path = os.path.join(output_dir, checkpoint)
        print(f"Converting {ckpt_path} to GGUF...")

        # Reload checkpoint
        model = AutoModelForCausalLM.from_pretrained(ckpt_path, device_map="auto")
        tokenizer = AutoTokenizer.from_pretrained(ckpt_path)

        # Convert to GGUF
        model = FastLanguageModel.for_inference(model)  # enable fast mode if needed
        out_dir = f"gguf_{checkpoint}"
        model.save_pretrained_gguf(out_dir, tokenizer, quantization_method="q4_k_m")


In [None]:
from google.colab import files
import os

gguf_files = [f for f in os.listdir("gguf_model") if f.endswith(".gguf")]
if gguf_files:
    gguf_file = os.path.join("gguf_model", gguf_files[0])
    print(f"Downloading: {gguf_file}")
    files.download(gguf_file)

In [None]:
!zip -r myfiles.zip gemma-3N-finetune/

  adding: gemma-3N-finetune/ (stored 0%)
  adding: gemma-3N-finetune/chat_template.jinja (deflated 70%)
  adding: gemma-3N-finetune/generation_config.json (deflated 33%)
  adding: gemma-3N-finetune/config.json (deflated 90%)
  adding: gemma-3N-finetune/preprocessor_config.json (deflated 56%)
  adding: gemma-3N-finetune/model-00001-of-00003.safetensors


zip error: Interrupted (aborting)
