In [1]:
%%capture
# Install latest transformers for Gemma 3N
!pip install --no-deps git+https://github.com/huggingface/transformers.git # Only for Gemma 3N
!pip install --no-deps --upgrade timm # Only for Gemma 3N

In [2]:
# ==============================================================================
# CELL 1: Install all necessary libraries (Same as Colab)
# ==============================================================================

!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3  trl triton cut_cross_entropy 
!pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!pip install -U peft
!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth-zoo.git
!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth.git

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting xformers==0.0.29.post3
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting trl
  Downloading trl-0.19.1-py3-none-any.whl.metadata (10 kB)
Collecting cut_cross_entropy
  Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)
Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl (43.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.4/43.4 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl (72.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.19.1-py3-none-any.whl (376 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m376.2/376.2 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00

In [3]:
from unsloth import FastVisionModel # <-- INSIGHT #1: Use FastVisionModel
from transformers import AutoProcessor
import torch

MODEL_NAME = "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit"

print("\nLoading Gemma 3N model, tokenizer, and processor...")
model, tokenizer = FastVisionModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)
processor = AutoProcessor.from_pretrained(MODEL_NAME)
print("✅ Base model, tokenizer, and processor loaded.")

# Add PEFT adapters using the correct class
model = FastVisionModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 16,
    finetune_vision_layers = True,
    finetune_language_layers = True,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],


    # ... other PEFT args from the docs
)
print("✅ PEFT adapters added.")



🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-07-19 01:22:41.542429: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752888161.906597      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752888162.011235      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!

Loading Gemma 3N model, tokenizer, and processor...
==((====))==  Unsloth 2025.7.5: Fast Gemma3N patching. Transformers: 4.54.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/2.65G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/469M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

✅ Base model, tokenizer, and processor loaded.
Unsloth: Making `model.base_model.model.model.language_model` require gradients
✅ PEFT adapters added.


In [4]:

from datasets import Dataset, Image as HFImage
from pathlib import Path
import os

# --- NEW STEP: Copy data to the faster working directory ---
source_path = "/kaggle/input/maize-dataset/"
local_path = "/kaggle/working/local_datasets/"

if not os.path.exists(local_path):
    print(f"Copying data from {source_path} to {local_path} for faster access...")
    !cp -r {source_path} {local_path}
    print("✅ Data copy complete.")
else:
    print(f"✅ Data already copied to {local_path}")
# ---------------------------------------------------------


# --- NEW STEP: Copy data to the faster working directory ---
source_path2 = "/kaggle/input/aura-minds-lora-adapters/"
local_path2 = "/kaggle/working/adapters_datasets/"

if not os.path.exists(local_path2):
    print(f"Copying data from {source_path2} to {local_path2} for faster access...")
    !cp -r {source_path2} {local_path2}
    print("✅ Data copy complete.")
else:
    print(f"✅ Data already copied to {local_path2}")
# ---------------------------------------------------------



Copying data from /kaggle/input/maize-dataset/ to /kaggle/working/local_datasets/ for faster access...
✅ Data copy complete.
Copying data from /kaggle/input/aura-minds-lora-adapters/ to /kaggle/working/adapters_datasets/ for faster access...
✅ Data copy complete.


In [5]:
# ==============================================================================
# CELL 4: Prepare the Dataset as a Python List (FINAL CORRECTED VERSION)
# ==============================================================================
from pathlib import Path
from PIL import Image
from tqdm import tqdm

# --- DEFINE THE FUNCTION FIRST ---
def create_conversation_dict(image_path, class_name):
    """Creates the final dictionary structure for a single sample."""
    display_name = CLASS_NAME_MAPPING.get(class_name, "Unknown Maize Condition")
    
    # Load the actual image object here
    pil_image = Image.open(image_path).convert("RGB")
    
    return {
        "messages": [
            { "role": "user",
              "content": [
                {"type": "text", "text": "What is the condition of this maize plant?"},
                # The PIL Image object goes directly here
                {"type": "image", "image": pil_image}
              ]
            },
            { "role": "assistant",
              "content": [
                {"type": "text", "text": f"This is a {display_name}."}
              ]
            },
        ]
    }

# --- THEN, DEFINE YOUR MAPPING ---
CLASS_NAME_MAPPING = {
    "maize_healthy": "Healthy Maize Plant",
    "phosphorus_deficiency": "Maize Phosphorus Deficiency",
}

# --- FINALLY, RUN THE WORKFLOW ---

# 1. Point to the directory and get the list of STRING paths
dataset_path = Path("/kaggle/working/local_datasets/")
image_paths = list(dataset_path.glob("**/*.jpg")) + list(dataset_path.glob("**/*.jpeg"))
print(f"Found {len(image_paths)} images.")

# 2. Loop through the paths and create the final Python list directly
print("Creating the final dataset list...")
final_dataset_list = []
# Use tqdm for a progress bar
for path in tqdm(image_paths, desc="Processing images"):
    class_folder_name = path.parent.name
    final_dataset_list.append(create_conversation_dict(path, class_folder_name))

print("\n✅ Dataset preparation complete!")
print("\nExample of the final data format:")
# We print the structure to confirm the PIL Image object is now inside
print(final_dataset_list[0])

Found 176 images.
Creating the final dataset list...


Processing images: 100%|██████████| 176/176 [00:10<00:00, 17.47it/s]


✅ Dataset preparation complete!

Example of the final data format:
{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'What is the condition of this maize plant?'}, {'type': 'image', 'image': <PIL.Image.Image image mode=RGB size=1080x1920 at 0x7B36CDEB19D0>}]}, {'role': 'assistant', 'content': [{'type': 'text', 'text': 'This is a Healthy Maize Plant.'}]}]}





In [6]:
# ==============================================================================
# CELL 5: Fine-tune the "Unsloth Way"
# ==============================================================================
from trl import SFTTrainer, SFTConfig

from unsloth.trainer import UnslothVisionDataCollator # The key import
from unsloth import FastModel # We need this for the next line

print("\nConfiguring the SFTTrainer the Unsloth Way for Vision...")

# We must tell Unsloth the model is now being used for training
FastModel.for_training(model)

trainer = SFTTrainer(
        model = model,
        train_dataset = final_dataset_list,
        processing_class=processor,

      
 
        data_collator=UnslothVisionDataCollator(model, processor=processor),
     args = SFTConfig(
        #max_steps = 400, 
        report_to = "none",

        num_train_epochs = 18,
        learning_rate = 1e-4,
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        gradient_checkpointing = False,
        
        # Required Vision arguments from the docs
        remove_unused_columns = False,
        dataset_text_field = "",
        dataset_kwargs = {"skip_prepare_dataset": True},
        max_seq_length = 1024,
        
        # Standard arguments
        warmup_ratio = 0.1,
        optim = "adamw_torch_fused",
        output_dir = "outputs",
        save_strategy = "steps",
        save_steps = 100,
        seed = 3407,
    ),
)

print("\n🔥 Starting training run...")
trainer.train()
print("✅ Training complete!")

# ==============================================================================
# CELL 6: Save Your Adapters
# ==============================================================================
output_save_dir = "/kaggle/working/maize_expert_adapters"
print(f"Saving final LoRA adapters to {output_save_dir}...")
model.save_pretrained(output_save_dir)
tokenizer.save_pretrained(output_save_dir)
print("✅ Model adapters saved.")

# Zip and prepare for download
print("\nZipping adapters for download...")
!zip -r maize_expert_adapters.zip {output_save_dir}
print("✅ Zipping complete. Find 'maize_expert_adapters.zip' in the Output section after saving.")



Configuring the SFTTrainer the Unsloth Way for Vision...
Unsloth: Model does not have a default image size - using 512

🔥 Starting training run...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 176 | Num Epochs = 18 | Total steps = 396
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 22,904,832 of 5,462,343,104 (0.42% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,10.9885
2,10.9984
3,11.0609
4,11.0607
5,11.0104
6,10.9983
7,10.9223
8,10.9913
9,11.0008
10,11.0261


✅ Training complete!
Saving final LoRA adapters to /kaggle/working/maize_expert_adapters...
✅ Model adapters saved.

Zipping adapters for download...
  adding: kaggle/working/maize_expert_adapters/ (stored 0%)
  adding: kaggle/working/maize_expert_adapters/chat_template.jinja (deflated 71%)
  adding: kaggle/working/maize_expert_adapters/adapter_config.json (deflated 57%)
  adding: kaggle/working/maize_expert_adapters/processor_config.json (deflated 23%)
  adding: kaggle/working/maize_expert_adapters/tokenizer_config.json (deflated 97%)
  adding: kaggle/working/maize_expert_adapters/preprocessor_config.json (deflated 56%)
  adding: kaggle/working/maize_expert_adapters/tokenizer.json

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 (deflated 83%)
  adding: kaggle/working/maize_expert_adapters/README.md (deflated 65%)
  adding: kaggle/working/maize_expert_adapters/adapter_model.safetensors (deflated 13%)
  adding: kaggle/working/maize_expert_adapters/tokenizer.model (deflated 52%)
  adding: kaggle/working/maize_expert_adapters/special_tokens_map.json (deflated 77%)
✅ Zipping complete. Find 'maize_expert_adapters.zip' in the Output section after saving.


In [7]:
# --- THIS IS THE MAIN FIX ---
# Point to the new local copy of your adapters
adapters_local_path = "/kaggle/working/maize_expert_adapters"

# 2. Apply your trained adapters from the unzipped folder
print(f"Loading your fine-tuned adapters from: {adapters_local_path}...")
model.load_adapter(adapters_local_path, adapter_name="aura_mind")
print("✅ Adapters loaded successfully.")
# -----------------------------

# 3. Merge the adapters into the base model to create the final, full model
output_merged_dir = "/kaggle/working/gemma-3N-finetuned-merged"
print(f"\n🔥 Starting merge process... Saving full model to {output_merged_dir}")
print("This is the long, memory-intensive step. Please be patient.")

# We specify 'merged_16bit' to save it in a common, usable format.
model.save_pretrained_merged(output_merged_dir, tokenizer, save_method = "merged_16bit")

print(f"\n🎉 SUCCESS! Your final, fine-tuned model has been saved to '{output_merged_dir}'.")

Loading your fine-tuned adapters from: /kaggle/working/maize_expert_adapters...
✅ Adapters loaded successfully.

🔥 Starting merge process... Saving full model to /kaggle/working/gemma-3N-finetuned-merged
This is the long, memory-intensive step. Please be patient.
Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...
Cache check failed: model-00001-of-00003.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Downloading safetensors index for unsloth/gemma-3n-e2b-it...


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Unsloth: Merging weights into 16bit:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.08G [00:00<?, ?B/s]

Unsloth: Merging weights into 16bit:  33%|███▎      | 1/3 [00:21<00:42, 21.48s/it]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Unsloth: Merging weights into 16bit:  67%|██████▋   | 2/3 [01:02<00:32, 32.94s/it]

model-00003-of-00003.safetensors:   0%|          | 0.00/2.82G [00:00<?, ?B/s]

Unsloth: Merging weights into 16bit: 100%|██████████| 3/3 [01:31<00:00, 30.58s/it]



🎉 SUCCESS! Your final, fine-tuned model has been saved to '/kaggle/working/gemma-3N-finetuned-merged'.
