In [1]:
%%capture
# Install latest transformers for Gemma 3N
!pip install --no-deps git+https://github.com/huggingface/transformers.git # Only for Gemma 3N
!pip install --no-deps --upgrade timm # Only for Gemma 3N

In [2]:
%%capture
# ==============================================================================
# CELL 1: Install all necessary libraries (Same as Colab)
# ==============================================================================

!pip install wandb -qU
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3  trl triton cut_cross_entropy 
!pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!pip install -U peft
!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth-zoo.git
!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth.git

In [3]:
# ==============================================================================
# CELL 2: Login to Weights & Biases
# ==============================================================================
import wandb
from kaggle_secrets import UserSecretsClient

# --- PRE-REQUISITE ---
# 1. Add your W&B API key as a secret in Kaggle with the label "wandb_api_key".
# 2. This keeps your key secure and private.
# ---------------------

try:
    user_secrets = UserSecretsClient()
    wandb_api_key = user_secrets.get_secret("wandb_api_key")
    wandb.login(key=wandb_api_key)
    print("✅ Successfully logged into Weights & Biases.")
except Exception as e:
    print("Could not log into W&B. Please ensure the 'wandb_api_key' secret is set in your Kaggle notebook.")
    print(f"Error: {e}")

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mjdmasciano2[0m ([33mjdmasciano2-university-of-lagos[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


✅ Successfully logged into Weights & Biases.


In [4]:

# ==============================================================================
# CELL 3: Copy Datasets to Working Directory (No Changes)
# ==============================================================================
from datasets import Dataset, Image as HFImage
from pathlib import Path
import os

# --- NEW STEP: Copy data to the faster working directory ---
source_path = "/kaggle/input/maize-dataset/"
local_path = "/kaggle/working/local_datasets/"

if not os.path.exists(local_path):
    print(f"Copying data from {source_path} to {local_path} for faster access...")
    !cp -r {source_path} {local_path}
    print("✅ Data copy complete.")
else:
    print(f"✅ Data already copied to {local_path}")
# ---------------------------------------------------------


# --- NEW STEP: Copy data to the faster working directory ---
source_path2 = "/kaggle/input/maize-adapters/kaggle/working/maize_expert_adapters"
local_path2 = "/kaggle/working/adapters_datasets/"

if not os.path.exists(local_path2):
    print(f"Copying data from {source_path2} to {local_path2} for faster access...")
    !cp -r {source_path2} {local_path2}
    print("✅ Data copy complete.")
else:
    print(f"✅ Data already copied to {local_path2}")
# ---------------------------------------------------------

Copying data from /kaggle/input/maize-dataset/ to /kaggle/working/local_datasets/ for faster access...
✅ Data copy complete.
Copying data from /kaggle/input/maize-adapters/kaggle/working/maize_expert_adapters to /kaggle/working/adapters_datasets/ for faster access...
✅ Data copy complete.


In [5]:
# ==============================================================================
# CELL 4: Prepare the Dataset as a Python List (FINAL CORRECTED VERSION)
# ==============================================================================
from pathlib import Path
from PIL import Image
from tqdm import tqdm

# --- DEFINE THE FUNCTION FIRST ---
def create_conversation_dict(image_path, class_name):
    """Creates the final dictionary structure for a single sample."""
    display_name = CLASS_NAME_MAPPING.get(class_name, "Unknown Maize Condition")
    
    # Load the actual image object here
    pil_image = Image.open(image_path).convert("RGB")
    
    return {
        "messages": [
            { "role": "user",
              "content": [
                {"type": "text", "text": "What is the condition of this maize plant?"},
                # The PIL Image object goes directly here
                {"type": "image", "image": pil_image}
              ]
            },
            { "role": "assistant",
              "content": [
                {"type": "text", "text": f"This is a {display_name}."}
              ]
            },
        ]
    }

# --- THEN, DEFINE YOUR MAPPING ---
CLASS_NAME_MAPPING = {
    "maize_healthy": "Healthy Maize Plant",
    "phosphorus_deficiency": "Maize Phosphorus Deficiency",
}

# --- FINALLY, RUN THE WORKFLOW ---

# 1. Point to the directory and get the list of STRING paths
dataset_path = Path("/kaggle/working/local_datasets/")
image_paths = list(dataset_path.glob("**/*.jpg")) + list(dataset_path.glob("**/*.jpeg"))
print(f"Found {len(image_paths)} images.")

# 2. Loop through the paths and create the final Python list directly
print("Creating the final dataset list...")
final_dataset_list = []
# Use tqdm for a progress bar
for path in tqdm(image_paths, desc="Processing images"):
    class_folder_name = path.parent.name
    final_dataset_list.append(create_conversation_dict(path, class_folder_name))

print("\n✅ Dataset preparation complete!")
print("\nExample of the final data format:")
# We print the structure to confirm the PIL Image object is now inside
print(final_dataset_list[0])

Found 176 images.
Creating the final dataset list...


Processing images: 100%|██████████| 176/176 [00:10<00:00, 16.97it/s]


✅ Dataset preparation complete!

Example of the final data format:
{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'What is the condition of this maize plant?'}, {'type': 'image', 'image': <PIL.Image.Image image mode=RGB size=4160x3120 at 0x7EFDA5A854D0>}]}, {'role': 'assistant', 'content': [{'type': 'text', 'text': 'This is a Maize Phosphorus Deficiency.'}]}]}





In [6]:
# ==============================================================================
# CELL 5: Define the W&B Sweep Configuration
# ==============================================================================
import yaml

# Here, we define the hyperparameters we want to search over.
# W&B will automatically try different combinations based on the 'method'.
# Method can be 'random', 'grid', or 'bayes'. 'bayes' is often a great choice.
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'train/loss',  # The TRL trainer logs loss to this key
        'goal': 'minimize'
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 5e-6,
            'max': 5e-4
        },
        'num_train_epochs': {
            'values': [10, 15, 18, 20]
        },
        'lora_r': {
            'values': [8, 16, 32]
        },
        'lora_alpha_multiplier': {
             'values': [1, 2]
        }
    }
}

print("Sweep Configuration:")
print(yaml.dump(sweep_config))

# Initialize the sweep on the W&B server
sweep_id = wandb.sweep(sweep_config, project="maize-expert-sweep")

Sweep Configuration:
method: bayes
metric:
  goal: minimize
  name: train/loss
parameters:
  learning_rate:
    distribution: log_uniform_values
    max: 0.0005
    min: 5.0e-06
  lora_alpha_multiplier:
    values:
    - 1
    - 2
  lora_r:
    values:
    - 8
    - 16
    - 32
  num_train_epochs:
    values:
    - 10
    - 15
    - 18
    - 20

Create sweep with ID: jgxtuq4b
Sweep URL: https://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/sweeps/jgxtuq4b


In [7]:
# ==============================================================================
# CELL 6: Create the Main Training Function for the W&B Agent (with Memory Cleanup)
# ==============================================================================
from unsloth import FastVisionModel, FastModel
from transformers import AutoProcessor
from trl import SFTTrainer, SFTConfig
from unsloth.trainer import UnslothVisionDataCollator
import torch
import gc # Import the garbage collector module

def train():
    """
    This function is called by the W&B agent. It contains the entire
    model setup, training, and saving logic.
    A `try...finally` block has been added to ensure robust memory cleanup
    after each run completes or fails.
    """
    # Initialize variables to None to ensure they exist for the 'finally' block
    model, processor, trainer, run = None, None, None, None
    
    try:
        # --- Start of Original "Holy Grail" Code ---
        
        # Initialize a new W&B run with hyperparameters from the sweep
        # We will use wandb.init() and manually call run.finish() in the finally block
        run = wandb.init()
        
        # Get the hyperparameters for this specific run
        WANDB_CONFIG = wandb.config
        lora_r_value = WANDB_CONFIG.lora_r
        learning_rate_value = WANDB_CONFIG.learning_rate
        epochs_value = WANDB_CONFIG.num_train_epochs
        # lora_alpha is often set to 2*r, a common practice
        lora_alpha_value = lora_r_value * WANDB_CONFIG.lora_alpha_multiplier

        MODEL_NAME = "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit"

        print("--- New W&B Run ---")
        print(f"Parameters: LR={learning_rate_value}, Epochs={epochs_value}, LoRA r={lora_r_value}, LoRA alpha={lora_alpha_value}")

        # 1. Load Model, Tokenizer, and Processor
        model, tokenizer = FastVisionModel.from_pretrained(
            model_name=MODEL_NAME,
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=True,
        )
        processor = AutoProcessor.from_pretrained(MODEL_NAME)
        print("✅ Base model, tokenizer, and processor loaded.")

        # 2. Add PEFT adapters with values from the sweep config
        model = FastVisionModel.get_peft_model(
            model,
            r=lora_r_value,
            lora_alpha=lora_alpha_value,
            finetune_vision_layers=True,
            finetune_language_layers=True,
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        )
        print("✅ PEFT adapters added.")

        # 3. Configure and run the trainer
        FastModel.for_training(model)
        trainer = SFTTrainer(
            model=model,
            train_dataset=final_dataset_list,  # Use the globally prepared dataset
            processing_class=processor,
            data_collator=UnslothVisionDataCollator(model, processor=processor),
            args=SFTConfig(
                output_dir=f"./outputs_{run.name}",
                report_to="wandb",
                num_train_epochs=epochs_value,
                learning_rate=learning_rate_value,
                per_device_train_batch_size=2,
                gradient_accumulation_steps=4,
                gradient_checkpointing=False,
                remove_unused_columns=False,
                dataset_text_field="",
                dataset_kwargs={"skip_prepare_dataset": True},
                max_seq_length=1024,
                warmup_ratio=0.1,
                optim="adamw_torch_fused",
                save_strategy="no", # We save manually at the end
                seed=3407,
            ),
        )

        print(f"\n🔥 Starting training run: {run.name}...")
        trainer.train()
        print("✅ Training complete!")

        # 4. Save adapters and log them as a W&B Artifact
        output_save_dir = f"/kaggle/working/maize_expert_adapters_{run.name}"
        model.save_pretrained(output_save_dir)
        tokenizer.save_pretrained(output_save_dir)
        print(f"✅ Model adapters saved to {output_save_dir}")

        artifact = wandb.Artifact(f'maize-adapters-{run.name}', type='model')
        artifact.add_dir(output_save_dir)
        run.log_artifact(artifact)
        print("✅ Adapters logged as a W&B Artifact.")

        # --- End of Original "Holy Grail" Code ---
        
    finally:
        # =================================================================
        # MINIMAL ADDITION: ROBUST MEMORY CLEANUP
        # This block will execute after every run, ensuring a clean slate
        # for the next one, preventing CUDA out-of-memory errors.
        # =================================================================
        print("\n🧹 Starting cleanup for next run...")
        
        # Properly finish the wandb run
        if run:
            run.finish()
        
        # Delete the large objects to free their references
        del model
        del trainer
        del processor
        
        # Trigger Python's garbage collector to reclaim memory
        gc.collect()
        
        # Explicitly empty the CUDA cache to free up GPU memory
        torch.cuda.empty_cache()
        
        print("✅ Memory cleared. Ready for the next agent run.")


# Execute the sweep agent
# This part remains completely unchanged.
wandb.agent(sweep_id, function=train, count=5)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-07-28 22:03:18.868820: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753740199.225826      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753740199.330762      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!


[34m[1mwandb[0m: Agent Starting Run: a860i4eu with config:
[34m[1mwandb[0m: 	learning_rate: 2.9971431790259765e-05
[34m[1mwandb[0m: 	lora_alpha_multiplier: 1
[34m[1mwandb[0m: 	lora_r: 8
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: Tracking run with wandb version 0.21.0
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250728_220349-a860i4eu[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33myoung-sweep-1[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep[0m
[34m[1mwandb[0m: 🧹 View sweep at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/sweeps/jgxtuq4b[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/runs/a860i4eu[0m


--- New W&B Run ---
Parameters: LR=2.9971431790259765e-05, Epochs=15, LoRA r=8, LoRA alpha=8
==((====))==  Unsloth 2025.7.9: Fast Gemma3N patching. Transformers: 4.55.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/2.65G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/469M [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

✅ Base model, tokenizer, and processor loaded.
Unsloth: Making `model.base_model.model.model.language_model` require gradients
✅ PEFT adapters added.
Unsloth: Model does not have a default image size - using 512

🔥 Starting training run: young-sweep-1...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 176 | Num Epochs = 15 | Total steps = 165
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 11,452,416 of 5,450,890,688 (0.21% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,10.9792
2,11.0591
3,11.0203
4,11.0303
5,11.0506
6,11.0448
7,10.9917
8,11.0002
9,11.0012
10,10.9808


✅ Training complete!


[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/maize_expert_adapters_young-sweep-1)... 

✅ Model adapters saved to /kaggle/working/maize_expert_adapters_young-sweep-1


Done. 0.2s


✅ Adapters logged as a W&B Artifact.

🧹 Starting cleanup for next run...


[34m[1mwandb[0m: uploading artifact maize-adapters-young-sweep-1
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         train/epoch ▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆████
[34m[1mwandb[0m:   train/global_step ▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇███
[34m[1mwandb[0m:     train/grad_norm    █▃▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: train/learning_rate ▁▄▅▆██████▇▇▇▆▆▆▆▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁▁
[34m[1mwandb[0m:          train/loss ████▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:               total_flos 1.318259011120128e+16
[34m[1mwandb[0m:              train/epoch 15
[34m[1mwandb[0m:        train/global_step 165
[34m[1mwandb[0m:          train/grad_norm 0.01816
[34m[1mwandb[0m:      train/learning_rate 0.0
[34m[1mwandb[0m:               train/loss 0
[34m

✅ Memory cleared. Ready for the next agent run.


[34m[1mwandb[0m: Agent Starting Run: 1zd2mzl2 with config:
[34m[1mwandb[0m: 	learning_rate: 6.384565250687325e-06
[34m[1mwandb[0m: 	lora_alpha_multiplier: 2
[34m[1mwandb[0m: 	lora_r: 16
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: Tracking run with wandb version 0.21.0
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250728_225144-1zd2mzl2[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33msoft-sweep-2[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep[0m
[34m[1mwandb[0m: 🧹 View sweep at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/sweeps/jgxtuq4b[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/runs/1zd2mzl2[0m


--- New W&B Run ---
Parameters: LR=6.384565250687325e-06, Epochs=10, LoRA r=16, LoRA alpha=32
==((====))==  Unsloth 2025.7.9: Fast Gemma3N patching. Transformers: 4.55.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Base model, tokenizer, and processor loaded.
Unsloth: Making `model.base_model.model.model.language_model` require gradients
✅ PEFT adapters added.
Unsloth: Model does not have a default image size - using 512

🔥 Starting training run: soft-sweep-2...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 176 | Num Epochs = 10 | Total steps = 110
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 22,904,832 of 5,462,343,104 (0.42% trained)


Step,Training Loss
1,10.9811
2,11.0591
3,11.0203
4,11.0303
5,11.0506
6,11.0448
7,10.9917
8,11.0002
9,11.0012
10,10.9808


✅ Training complete!


[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/maize_expert_adapters_soft-sweep-2)... 

✅ Model adapters saved to /kaggle/working/maize_expert_adapters_soft-sweep-2


Done. 0.4s


✅ Adapters logged as a W&B Artifact.

🧹 Starting cleanup for next run...


[34m[1mwandb[0m: uploading artifact maize-adapters-soft-sweep-2
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         train/epoch ▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
[34m[1mwandb[0m:   train/global_step ▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
[34m[1mwandb[0m:     train/grad_norm      █▂▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: train/learning_rate ▂▃▅▅███▇▇▇▇▇▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁
[34m[1mwandb[0m:          train/loss ██████▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:               total_flos 8823102473687040.0
[34m[1mwandb[0m:              train/epoch 10
[34m[1mwandb[0m:        train/global_step 110
[34m[1mwandb[0m:          train/grad_norm 2.03586
[34m[1mwandb[0m:      train/learning_rate 0.0
[34m[1mwandb[0m:               train/loss 0.0002
[34m

✅ Memory cleared. Ready for the next agent run.


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: luyw8zqt with config:
[34m[1mwandb[0m: 	learning_rate: 1.0206824506488766e-05
[34m[1mwandb[0m: 	lora_alpha_multiplier: 2
[34m[1mwandb[0m: 	lora_r: 32
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: Tracking run with wandb version 0.21.0
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250728_232219-luyw8zqt[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33matomic-sweep-3[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep[0m
[34m[1mwandb[0m: 🧹 View sweep at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/sweeps/jgxtuq4b[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/runs/luyw8zqt

--- New W&B Run ---
Parameters: LR=1.0206824506488766e-05, Epochs=10, LoRA r=32, LoRA alpha=64
==((====))==  Unsloth 2025.7.9: Fast Gemma3N patching. Transformers: 4.55.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Base model, tokenizer, and processor loaded.
Unsloth: Making `model.base_model.model.model.language_model` require gradients
✅ PEFT adapters added.
Unsloth: Model does not have a default image size - using 512

🔥 Starting training run: atomic-sweep-3...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 176 | Num Epochs = 10 | Total steps = 110
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 45,809,664 of 5,485,247,936 (0.84% trained)


Step,Training Loss
1,10.9811
2,11.0591
3,11.0203
4,11.0303
5,11.0506
6,11.0448
7,10.9917
8,11.0002
9,11.0012
10,10.9808


✅ Training complete!


[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/maize_expert_adapters_atomic-sweep-3)... 

✅ Model adapters saved to /kaggle/working/maize_expert_adapters_atomic-sweep-3


Done. 0.7s


✅ Adapters logged as a W&B Artifact.

🧹 Starting cleanup for next run...


[34m[1mwandb[0m: uploading artifact maize-adapters-atomic-sweep-3
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         train/epoch ▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
[34m[1mwandb[0m:   train/global_step ▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
[34m[1mwandb[0m:     train/grad_norm       █▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: train/learning_rate ▁▂▃▄▅▇███▇▇▇▇▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
[34m[1mwandb[0m:          train/loss █████▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:               total_flos 8892520606126080.0
[34m[1mwandb[0m:              train/epoch 10
[34m[1mwandb[0m:        train/global_step 110
[34m[1mwandb[0m:          train/grad_norm 0.05253
[34m[1mwandb[0m:      train/learning_rate 0.0
[34m[1mwandb[0m:               train/loss 0.0001
[3

✅ Memory cleared. Ready for the next agent run.


[34m[1mwandb[0m: Agent Starting Run: ef859az1 with config:
[34m[1mwandb[0m: 	learning_rate: 0.00016370994998364015
[34m[1mwandb[0m: 	lora_alpha_multiplier: 1
[34m[1mwandb[0m: 	lora_r: 32
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: Tracking run with wandb version 0.21.0
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250728_235219-ef859az1[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33melectric-sweep-4[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep[0m
[34m[1mwandb[0m: 🧹 View sweep at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/sweeps/jgxtuq4b[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/runs/ef859az1[0m


--- New W&B Run ---
Parameters: LR=0.00016370994998364015, Epochs=10, LoRA r=32, LoRA alpha=32
==((====))==  Unsloth 2025.7.9: Fast Gemma3N patching. Transformers: 4.55.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Base model, tokenizer, and processor loaded.
Unsloth: Making `model.base_model.model.model.language_model` require gradients
✅ PEFT adapters added.
Unsloth: Model does not have a default image size - using 512

🔥 Starting training run: electric-sweep-4...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 176 | Num Epochs = 10 | Total steps = 110
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 45,809,664 of 5,485,247,936 (0.84% trained)


Step,Training Loss
1,10.9811
2,11.0591
3,11.0203
4,11.0303
5,11.0506
6,11.0448
7,10.9917
8,11.0002
9,11.0012
10,10.9808


✅ Training complete!


[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/maize_expert_adapters_electric-sweep-4)... 

✅ Model adapters saved to /kaggle/working/maize_expert_adapters_electric-sweep-4


Done. 0.7s


✅ Adapters logged as a W&B Artifact.

🧹 Starting cleanup for next run...


[34m[1mwandb[0m: uploading artifact maize-adapters-electric-sweep-4
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         train/epoch ▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇█████
[34m[1mwandb[0m:   train/global_step ▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
[34m[1mwandb[0m:     train/grad_norm      █▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: train/learning_rate ▁▂▄▆▇██▇▇▇▇▇▇▆▆▆▆▅▅▅▅▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁
[34m[1mwandb[0m:          train/loss ██████▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:               total_flos 8892520606126080.0
[34m[1mwandb[0m:              train/epoch 10
[34m[1mwandb[0m:        train/global_step 110
[34m[1mwandb[0m:          train/grad_norm 0.00199
[34m[1mwandb[0m:      train/learning_rate 0.0
[34m[1mwandb[0m:               train/loss 0
[34m

✅ Memory cleared. Ready for the next agent run.


[34m[1mwandb[0m: Agent Starting Run: ow0kjgb9 with config:
[34m[1mwandb[0m: 	learning_rate: 4.047772021681469e-05
[34m[1mwandb[0m: 	lora_alpha_multiplier: 1
[34m[1mwandb[0m: 	lora_r: 16
[34m[1mwandb[0m: 	num_train_epochs: 15
[34m[1mwandb[0m: Tracking run with wandb version 0.21.0
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250729_002221-ow0kjgb9[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mbrisk-sweep-5[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep[0m
[34m[1mwandb[0m: 🧹 View sweep at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/sweeps/jgxtuq4b[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/maize-expert-sweep/runs/ow0kjgb9[0m


--- New W&B Run ---
Parameters: LR=4.047772021681469e-05, Epochs=15, LoRA r=16, LoRA alpha=16
==((====))==  Unsloth 2025.7.9: Fast Gemma3N patching. Transformers: 4.55.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Base model, tokenizer, and processor loaded.
Unsloth: Making `model.base_model.model.model.language_model` require gradients
✅ PEFT adapters added.
Unsloth: Model does not have a default image size - using 512

🔥 Starting training run: brisk-sweep-5...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 176 | Num Epochs = 15 | Total steps = 165
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 22,904,832 of 5,462,343,104 (0.42% trained)


Step,Training Loss
1,10.9811
2,11.0591
3,11.0203
4,11.0303
5,11.0506
6,11.0448
7,10.9917
8,11.0002
9,11.0012
10,10.9808


✅ Training complete!


[34m[1mwandb[0m: Adding directory to artifact (/kaggle/working/maize_expert_adapters_brisk-sweep-5)... 

✅ Model adapters saved to /kaggle/working/maize_expert_adapters_brisk-sweep-5


Done. 0.4s


✅ Adapters logged as a W&B Artifact.

🧹 Starting cleanup for next run...


[34m[1mwandb[0m: uploading artifact maize-adapters-brisk-sweep-5
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         train/epoch ▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇██
[34m[1mwandb[0m:   train/global_step ▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
[34m[1mwandb[0m:     train/grad_norm        █▇█▃▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: train/learning_rate ▁▁▂▃▄▆▇▇██▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▅▄▄▃▃▃▂▂▂▂▂▂▂▁
[34m[1mwandb[0m:          train/loss ████▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:               total_flos 1.323465371053056e+16
[34m[1mwandb[0m:              train/epoch 15
[34m[1mwandb[0m:        train/global_step 165
[34m[1mwandb[0m:          train/grad_norm 0.00609
[34m[1mwandb[0m:      train/learning_rate 0.0
[34m[1mwandb[0m:               train/loss 0
[34m

✅ Memory cleared. Ready for the next agent run.
