In [5]:
import os
import zipfile
import io

# Assuming the zip file and the JSON file are already uploaded and their paths are known.
zip_filename = "/content/asy_images.zip"
images_folder_name = os.path.splitext(zip_filename)[0] # e.g., "/content/asy_images"
print(f"Found '{zip_filename}'. Attempting to unzip...")
if os.path.exists(images_folder_name):
    print(f"Folder '{images_folder_name}' already exists. Assuming content is there. Skipping unzip.")
else:
    try:
        with zipfile.ZipFile(zip_filename, 'r') as zf:
            zf.extractall(os.path.dirname(zip_filename)) # Extract to the directory where the zip is
        print(f"Successfully unzipped '{zip_filename}'.")
    except FileNotFoundError:
        print(f"Error: Zip file not found at '{zip_filename}'. Please upload it.")
    except zipfile.BadZipFile:
        print(f"Error: '{zip_filename}' is not a valid zip file.")
    except Exception as e:
        print(f"An error occurred during unzipping: {e}")
if os.path.exists(images_folder_name):
    print(f"Folder '{images_folder_name}' is present.")
    print(f"Listing first 5 items in '{images_folder_name}':")
    try:
        items = os.listdir(images_folder_name)
        if items:
            for item in items[:5]:
                print(f"- {item}")
        else:
            print("(Folder is empty)")
    except Exception as e:
        print(f"Could not list contents: {e}")
else:
    print(f"Error: Folder '{images_folder_name}' was not created after unzipping.")

Found '/content/asy_images.zip'. Attempting to unzip...
Folder '/content/asy_images' already exists. Assuming content is there. Skipping unzip.
Folder '/content/asy_images' is present.
Listing first 5 items in '/content/asy_images':
- fig0185_02f82a6.png
- fig0224_03f9f07.png
- fig0191_086c93e.png
- fig0427_07acf96.png
- fig0117_05c0b91.png


In [4]:
W# -*- coding: utf-8 -*-
"""Gemma3_4B_Asymptote_Finetune.ipynb"""

# ---------------------------------------------------------------------------------
# 0. Mount Google Drive (for saving the fine-tuned model)
# ---------------------------------------------------------------------------------
import os
from google.colab import drive

DRIVE_MOUNT_POINT = '/content/drive'
print(f"Mounting Google Drive at: {DRIVE_MOUNT_POINT}")
drive.mount(DRIVE_MOUNT_POINT, force_remount=True)

# Define where to save the final LoRA adapters on Google Drive
GDRIVE_ADAPTER_SAVE_PATH = os.path.join(DRIVE_MOUNT_POINT, "MyDrive", "gemma3_asymptote_finetuned")
os.makedirs(GDRIVE_ADAPTER_SAVE_PATH, exist_ok=True)

# ---------------------------------------------------------------------------------
# 1. Installation
# ---------------------------------------------------------------------------------
if "COLAB_ALWAYS_INSTALL_UNSLOTH" not in os.environ:
    print("Installing Unsloth and dependencies...")
    import sys
    import subprocess

    # Install packages using subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install",
                          "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"])
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-deps",
                          "xformers<0.0.26", "bitsandbytes", "accelerate", "peft", "trl"])
    subprocess.check_call([sys.executable, "-m", "pip", "install",
                          "sentencepiece", "protobuf", "datasets>=3.4.1",
                          "huggingface_hub", "hf_transfer", "pillow"])

    os.environ["COLAB_ALWAYS_INSTALL_UNSLOTH"] = "1"
else:
    print("Dependencies already installed.")

# ---------------------------------------------------------------------------------
# 2. Upload Data to Colab (if needed)
# ---------------------------------------------------------------------------------
# You can use this code to upload your JSON file and images folder
# from google.colab import files
# uploaded = files.upload()  # Upload your JSON file
# os.makedirs("asy_images", exist_ok=True)
# # Upload your images to the asy_images folder
# # (Use the file browser on the left to create and upload to the folder)

# ---------------------------------------------------------------------------------
# 3. Load and Process Dataset
# ---------------------------------------------------------------------------------
from datasets import Dataset
import json
import os
from PIL import Image

# Path to your JSON file and images directory
JSON_FILE_PATH = "/content/asymp_dataset.json"  # Update with your actual path
IMAGES_DIR = "/content/asy_images/"  # Update with your actual path

print("Loading dataset...")
# Load your JSON data
with open(JSON_FILE_PATH, 'r') as f:
    data = json.load(f)

# Convert to conversation format for Gemma 3
formatted_data = []
for item in data:
    img_path = os.path.join(IMAGES_DIR, os.path.basename(item["image_path"]))

    # Format as a conversation with image
    conversation = {
        "conversations": [
            {
                "role": "user",
                "content": item["instruction"]
            },
            {
                "role": "assistant",
                "content": item["code"]
            }
        ]
    }
    formatted_data.append(conversation)

# Create a Hugging Face dataset
dataset = Dataset.from_list(formatted_data)
print(f"Dataset prepared with {len(dataset)} examples")

# ---------------------------------------------------------------------------------
# 4. Load Model and Tokenizer
# ---------------------------------------------------------------------------------
import torch
# Import unsloth before transformers to ensure all optimizations are applied
from unsloth import FastModel

model_name = "unsloth/gemma-3-4b-it"
max_seq_length = 2048

print(f"Loading base model: {model_name}")
model, tokenizer = FastModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    load_in_4bit=True,
)
print("Base model and tokenizer loaded.")

# ---------------------------------------------------------------------------------
# 5. Add LoRA Adapters
# ---------------------------------------------------------------------------------
print("Adding PEFT (LoRA) adapters...")
model = FastModel.get_peft_model(
    model,
    r=16,                        # LoRA attention dimension
    lora_alpha=32,               # Alpha parameter for LoRA scaling
    lora_dropout=0.05,           # Dropout probability for LoRA layers
    bias="none",                 # Bias type
    finetune_language_layers=True,
    finetune_attention_modules=True,
    finetune_mlp_modules=True,
    random_state=42,
)
print("PEFT adapters added.")

# ---------------------------------------------------------------------------------
# 6. Set up Chat Template (CRITICAL)
# ---------------------------------------------------------------------------------
from unsloth.chat_templates import get_chat_template

print("Setting up Gemma 3 chat template...")
tokenizer = get_chat_template(
    tokenizer,
    chat_template="gemma-3",
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    actual_model_config = model.model.config if hasattr(model, 'model') else model.config
    if actual_model_config.pad_token_id is None:
        actual_model_config.pad_token_id = tokenizer.eos_token_id

# ---------------------------------------------------------------------------------
# 7. Prepare Dataset for Training
# ---------------------------------------------------------------------------------
from unsloth.chat_templates import standardize_data_formats

# Standardize the conversations format
print("Standardizing data formats...")
dataset_standardized = standardize_data_formats(dataset)

# Apply chat template for training
def apply_chat_template(examples):
    """Apply chat template for training"""
    formatted_texts = []

    for conv in examples["conversations"]:
        formatted_texts.append(tokenizer.apply_chat_template(
            conv, tokenize=False, add_generation_prompt=False
        ))

    return {"text": formatted_texts}

print("Applying chat template...")
dataset_processed = dataset_standardized.map(
    apply_chat_template,
    batched=True,
    num_proc=1
)

# ---------------------------------------------------------------------------------
# 8. Fine-tuning the Model
# ---------------------------------------------------------------------------------
print("Setting up training...")
from trl import SFTTrainer, SFTConfig
from unsloth.chat_templates import train_on_responses_only

model.train()

# Configure training parameters
sft_config = SFTConfig(
    output_dir="gemma3_asymptote_temp",
    dataset_text_field="text",
    per_device_train_batch_size=2,  # Adjust based on your GPU
    gradient_accumulation_steps=4,  # Adjust based on your GPU
    warmup_steps=50,
    max_steps=500,                  # Adjust based on your dataset size
    learning_rate=1e-4,
    logging_steps=10,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=42,
    report_to="none",
    save_strategy="no",
    packing=False,
)

# Initialize the trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset_processed,
    args=sft_config,
    packing=False,
)

# Apply response-only masking for more efficient training
print("Applying 'train_on_responses_only' masking...")
trainer = train_on_responses_only(
    trainer,
    instruction_part="<start_of_turn>user\n",
    response_part="<start_of_turn>model\n",
)

# Start training
print("Starting training...")
trainer_stats = trainer.train()
print("Training finished.")

# ---------------------------------------------------------------------------------
# 9. Save Fine-tuned Model to Google Drive
# ---------------------------------------------------------------------------------
print(f"Saving fine-tuned adapters to: {GDRIVE_ADAPTER_SAVE_PATH}")
model.save_pretrained(GDRIVE_ADAPTER_SAVE_PATH)
tokenizer.save_pretrained(GDRIVE_ADAPTER_SAVE_PATH)
print("Fine-tuned model saved.")

# ---------------------------------------------------------------------------------
# 10. Quick Test (Optional)
# ---------------------------------------------------------------------------------
from transformers import TextStreamer
import PIL.Image as Image
from io import BytesIO

model.eval()

def test_with_image(model, tokenizer, instruction, image_path):
    # Load and prepare the image
    image = Image.open(image_path)

    # Create multimodal message
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": instruction},
                {"type": "image_url", "image_url": {"url": f"file://{image_path}"}}
            ]
        }
    ]

    # Apply chat template
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    print(f"\nPrompt: {instruction}")
    print("Model generating response...")
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    # Generate response
    with torch.no_grad():
        model.generate(
            **inputs,
            streamer=streamer,
            max_new_tokens=1024,
            use_cache=True,
            temperature=0.2,
            top_p=0.95,
            top_k=50,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )
    print("\n")

# Test with an image from your dataset
test_image_path = os.path.join(IMAGES_DIR, os.path.basename(data[0]["image_path"]))
test_with_image(
    model,
    tokenizer,
    "Generate the Asymptote code for this diagram.",
    test_image_path
)


Mounting Google Drive at: /content/drive
Mounted at /content/drive
Dependencies already installed.
Loading dataset...
Dataset prepared with 435 examples
Loading base model: unsloth/gemma-3-4b-it
==((====))==  Unsloth 2025.5.6: Fast Gemma3 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


model.safetensors:   0%|          | 0.00/4.56G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

Base model and tokenizer loaded.
Adding PEFT (LoRA) adapters...
Unsloth: Making `base_model.model.vision_tower.vision_model` require gradients
PEFT adapters added.
Setting up Gemma 3 chat template...
Standardizing data formats...


Unsloth: Standardizing formats (num_proc=2):   0%|          | 0/435 [00:00<?, ? examples/s]

Applying chat template...


Map:   0%|          | 0/435 [00:00<?, ? examples/s]

Setting up training...
Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/435 [00:00<?, ? examples/s]

Applying 'train_on_responses_only' masking...


Map (num_proc=2):   0%|          | 0/435 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 435 | Num Epochs = 10 | Total steps = 500
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 38,497,792/4,000,000,000 (0.96% trained)


Starting training...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,2.7424
20,2.1512
30,1.7404
40,1.5457
50,1.28
60,1.2219
70,1.1292
80,0.9395
90,1.0347
100,0.8886


Training finished.
Saving fine-tuned adapters to: /content/drive/MyDrive/gemma3_asymptote_finetuned
Fine-tuned model saved.

Prompt: Generate the Asymptote code for this diagram.
Model generating response...
size(6cm,0);

draw(E--N--S--W--cycle);
dot(E);dot(N);dot(S);dot(W);


