In [3]:
# ==============================================================================
# CELL 0: ENVIRONMENT SETUP (THE DEFINITIVE STABILITY FIX)
# This cell MUST be run first.
# ==============================================================================
import os

# This command tells the CUDA driver to only make the first GPU (GPU 0) visible
# to this notebook. All libraries (PyTorch, Unsloth, Accelerate) will now
# believe this is a single-GPU machine, eliminating all device placement errors.
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

print("✅ Environment configured to use only a single GPU (GPU 0).")
print("This will prevent multi-GPU errors.")

✅ Environment configured to use only a single GPU (GPU 0).
This will prevent multi-GPU errors.


In [4]:
%%capture
# Install latest transformers for Gemma 3N
!pip install transformers=4.45.1
!pip install --no-deps git+https://github.com/huggingface/transformers.git -qU # Only for Gemma 3N 
!pip install --no-deps --upgrade timm # Only for Gemma 3N

In [5]:
%%capture
# ==============================================================================
# CELL 1: Install all necessary libraries (Same as Colab)
# ==============================================================================

!pip install wandb -qU
!pip install weave
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3  trl triton cut_cross_entropy 
!pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!pip install -U peft
#!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth-zoo.git
#!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth.git
!pip install unsloth==2025.7.10 unsloth-zoo==2025.7.11 --no-cache -q

In [6]:
# ==============================================================================
# CELL 2: Login to Weights & Biases
# ==============================================================================
import wandb
from kaggle_secrets import UserSecretsClient

# --- PRE-REQUISITE ---
# 1. Add your W&B API key as a secret in Kaggle with the label "wandb_api_key".
# 2. This keeps your key secure and private.
# ---------------------

try:
    user_secrets = UserSecretsClient()
    wandb_api_key = user_secrets.get_secret("wandb_api_key")
    wandb.login(key=wandb_api_key)
    print("✅ Successfully logged into Weights & Biases.")
except Exception as e:
    print("Could not log into W&B. Please ensure the 'wandb_api_key' secret is set in your Kaggle notebook.")
    print(f"Error: {e}")

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mjdmasciano2[0m ([33mjdmasciano2-university-of-lagos[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


✅ Successfully logged into Weights & Biases.


In [7]:

# ==============================================================================
# CELL 3: Copy Datasets to Working Directory (No Changes)
# ==============================================================================
from datasets import Dataset, Image as HFImage
from pathlib import Path
import os

# --- NEW STEP: Copy data to the faster working directory ---
source_path = "/kaggle/input/maize-dataset/"
local_path = "/kaggle/working/local_datasets/"

if not os.path.exists(local_path):
    print(f"Copying data from {source_path} to {local_path} for faster access...")
    !cp -r {source_path} {local_path}
    print("✅ Data copy complete.")
else:
    print(f"✅ Data already copied to {local_path}")
# ---------------------------------------------------------


# --- NEW STEP: Copy data to the faster working directory ---
source_path2 = "/kaggle/input/aura-mind-maize-validation/"
local_path2 = "/kaggle/working/validation_datasets/"

if not os.path.exists(local_path2):
    print(f"Copying data from {source_path2} to {local_path2} for faster access...")
    !cp -r {source_path2} {local_path2}
    print("✅ Data copy complete.")
else:
    print(f"✅ Data already copied to {local_path2}")
# ---------------------------------------------------------

✅ Data already copied to /kaggle/working/local_datasets/
✅ Data already copied to /kaggle/working/validation_datasets/


In [8]:
# ==============================================================================
# CELL 4: Prepare the Dataset as a Python List (FINAL CORRECTED VERSION)
# ==============================================================================
from pathlib import Path
from PIL import Image
from tqdm import tqdm

# --- DEFINE THE FUNCTION FIRST ---
def create_conversation_dict(image_path, class_name):
    """Creates the final dictionary structure for a single sample."""
    display_name = CLASS_NAME_MAPPING.get(class_name, "Unknown Maize Condition")
    
    # Load the actual image object here
    pil_image = Image.open(image_path).convert("RGB")
    
    return {
        "messages": [
            { "role": "user",
              "content": [
                {"type": "text", "text": "Classify the condition of this maize plant. Choose from: Healthy Maize Plant, Maize Phosphorus Deficiency."},
                # The PIL Image object goes directly here
                {"type": "image", "image": pil_image}
              ]
            },
            { "role": "assistant",
              "content": [
                {"type": "text", "text": f"This is a {display_name}."}
              ]
            },
        ]
    }

# --- THEN, DEFINE YOUR MAPPING ---
CLASS_NAME_MAPPING = {
    "maize_healthy": "Healthy Maize Plant",
    "phosphorus_deficiency": "Maize Phosphorus Deficiency",
}

# --- FINALLY, RUN THE WORKFLOW ---

# 1. Point to the directory and get the list of STRING paths
dataset_path = Path("/kaggle/working/local_datasets/")
image_paths = list(dataset_path.glob("**/*.jpg")) + list(dataset_path.glob("**/*.jpeg"))
print(f"Found {len(image_paths)} images.")

# 2. Loop through the paths and create the final Python list directly
print("Creating the final dataset list...")
final_dataset_list = []
# Use tqdm for a progress bar
for path in tqdm(image_paths, desc="Processing images"):
    class_folder_name = path.parent.name
    final_dataset_list.append(create_conversation_dict(path, class_folder_name))

print("\n✅ Dataset preparation complete!")
print("\nExample of the final data format:")
# We print the structure to confirm the PIL Image object is now inside
print(final_dataset_list[0])

Found 176 images.
Creating the final dataset list...


Processing images: 100%|██████████| 176/176 [00:09<00:00, 18.66it/s]


✅ Dataset preparation complete!

Example of the final data format:
{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'Classify the condition of this maize plant. Choose from: Healthy Maize Plant, Maize Phosphorus Deficiency.'}, {'type': 'image', 'image': <PIL.Image.Image image mode=RGB size=4160x3120 at 0x7B04BA2EA990>}]}, {'role': 'assistant', 'content': [{'type': 'text', 'text': 'This is a Maize Phosphorus Deficiency.'}]}]}





In [9]:
# ==============================================================================
# CELL 5: Define the W&B Sweep Configuration
# ==============================================================================
# ==============================================================================
# CELL 5: New Sweep Config using num_train_epochs
# ==============================================================================
import yaml
import wandb

sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'eval_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 5e-6,
            'max': 1e-4
        },
        # We now use a fixed number of epochs, which is a great idea for this small dataset.
        'num_train_epochs': {
            'values': [1] # Let's test 1 and 2 epochs
        },
        'lora_r': {
            'values': [16, 32, 64]
        },
        'lora_alpha_multiplier': {
             'values': [1, 2]
        },
        'lora_dropout': {
            'values': [0.05, 0.1]
        },
        'weight_decay': {
            'values': [0.01, 0.05]
        }
    }
}

print("New Sweep Configuration (Epochs-based):")
print(yaml.dump(sweep_config))

# Initialize a new sweep with this configuration
#sweep_id_epochs = wandb.sweep(sweep_config_epochs, project="e4b-correct-test-final-sweep")

# Make sure to create a new sweep ID with this config
# sweep_id = wandb.sweep(sweep_config, project="your-project-name")

# Initialize the sweep on the W&B server
sweep_id = wandb.sweep(sweep_config, project="e4b-correct-test-x4")

Sweep Configuration:
method: bayes
metric:
  goal: minimize
  name: train/loss
parameters:
  learning_rate:
    distribution: log_uniform_values
    max: 0.0001
    min: 5.0e-06
  lora_alpha_multiplier:
    values:
    - 1
    - 2
  lora_r:
    values:
    - 8
    - 16
    - 32
  num_train_epochs:
    values:
    - 1
    - 2
    - 3

Create sweep with ID: 3vfe4mo5
Sweep URL: https://wandb.ai/jdmasciano2-university-of-lagos/e4b-correct-test-3/sweeps/3vfe4mo5


In [10]:
# ==============================================================================
# CELL 6: EVALUATION FRAMEWORK (ULTIMATE SIMPLICITY VERSION)
# ==============================================================================
import weave
from PIL import Image
from pathlib import Path
from tqdm import tqdm
import torch
import transformers

# --- All setup code remains the same ---
CLASS_NAME_MAPPING = {
    "maize_healthy": "This is a Healthy Maize Plant.",
    "phosphorus_deficiency": "This is a Maize Phosphorus Deficiency.",
}
print("Building evaluation dataset from validation files...")
validation_data_path = Path("/kaggle/working/validation_datasets/")
eval_image_paths = list(validation_data_path.glob("**/*.jpg")) + list(validation_data_path.glob("**/*.jpeg"))
eval_dataset = []
for path in tqdm(eval_image_paths, desc="Processing validation images"):
    class_folder_name = path.parent.name
    target_label = CLASS_NAME_MAPPING.get(class_folder_name)
    if target_label:
        eval_dataset.append({
            "image_path": str(path),
            "question": "Classify the condition of this maize plant. Choose from: Healthy Maize Plant, Maize Phosphorus Deficiency.",
            "target": target_label,
        })
print(f"✅ Created an evaluation dataset with {len(eval_dataset)} examples.")

# --- Define the Weave Model (WITHOUT TYPE HINTS) ---
class MaizeExpertModel(weave.Model):
    # THE FINAL FIX: By removing the strict type hints, we avoid the Pydantic
    # validation error. The code will rely on "duck typing" - as long as the
    # objects have the right methods, it will work.
    model: any
    processor: any

    @weave.op()
    @torch.inference_mode()
    def predict(self, image_path: str, question: str) -> dict:
        image = Image.open(image_path).convert("RGB")
        messages = [{"role": "user", "content": [{"type": "text", "text": question}, {"type": "image", "image": image}]}]
        text_prompt = self.processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = self.processor(text=text_prompt, images=image, return_tensors="pt").to(self.model.device)
        outputs = self.model.generate(**inputs, max_new_tokens=20, use_cache=True)
        response = self.processor.batch_decode(outputs, skip_special_tokens=True)[0]
        prompt_marker = "model\n"
        answer_start_index = response.rfind(prompt_marker)
        final_answer = response[answer_start_index + len(prompt_marker):].strip() if answer_start_index != -1 else "Could not parse."
        return {"generated_text": final_answer}

# --- 4. Define the Intelligent Scorer ---
@weave.op()
def calculate_accuracy(target: str, output: dict) -> dict:
    """
    Calculates accuracy by checking for keywords ("Healthy", "Phosphorus", "Maize")
    in the model's prediction, making it robust to phrasing changes.
    """
    prediction = output.get('generated_text', '').lower() # Convert to lowercase for case-insensitivity
    
    # Extract the key diagnostic word from the target
    # Ex: "This is a Healthy Maize Plant." -> "healthy"
    # Ex: "This is a Maize Phosphorus Deficiency." -> "phosphorus"
    target_keyword = ""
    if "healthy" in target.lower():
        target_keyword = "healthy"
    elif "phosphorus" in target.lower():
        target_keyword = "phosphorus"
    
    # A prediction is correct if it contains BOTH "maize" and the target keyword
    is_correct = 1 if "maize" in prediction and target_keyword in prediction else 0
    return {"accuracy": is_correct}

# --- 5. Define the Async Evaluation Wrapper ---
async def evaluate_and_log(model, processor, eval_dataset):
    print("\n🔬 Starting evaluation...")
    eval_model = MaizeExpertModel(model=model, processor=processor)
    evaluation = weave.Evaluation(dataset=eval_dataset, scorers=[calculate_accuracy])
    results = await evaluation.evaluate(eval_model)
    print(f"✅ Evaluation complete. Full results object: {results}")

print("✅ W&B Weave evaluation components are updated and ready.")

error: XDG_RUNTIME_DIR not set in the environment.
ALSA lib confmisc.c:855:(parse_card) cannot find card '0'
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_card_inum returned error: No such file or directory
ALSA lib confmisc.c:422:(snd_func_concat) error evaluating strings
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory
ALSA lib confmisc.c:1334:(snd_func_refer) error evaluating name
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory
ALSA lib conf.c:5701:(snd_config_expand) Evaluate error: No such file or directory
ALSA lib pcm.c:2664:(snd_pcm_open_noupdate) Unknown PCM default
ALSA lib confmisc.c:855:(parse_card) cannot find card '0'
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_card_inum returned error: No such file or directory
ALSA lib confmisc.c:422:(snd_func_concat) error evaluating strings
ALSA lib conf.c:5178:(_snd_config_evalu

Building evaluation dataset from validation files...


Processing validation images: 100%|██████████| 21/21 [00:00<00:00, 90061.74it/s]

✅ Created an evaluation dataset with 21 examples.
✅ W&B Weave evaluation components are updated and ready.





In [11]:
# ==============================================================================
# CELL 7: Create the Main Training Function for the W&B Agent (CORRECTED)
# ==============================================================================
from unsloth import FastVisionModel, FastModel
from transformers import AutoProcessor
from trl import SFTTrainer, SFTConfig
from unsloth.trainer import UnslothVisionDataCollator
import torch
import gc # Import the garbage collector module
import asyncio 


def train():
    """
    This function is called by the W&B agent. It contains the entire
    model setup, training, and saving logic.
    A `try...finally` block has been added to ensure robust memory cleanup
    after each run completes or fails.
    """
    # Initialize variables to None to ensure they exist for the 'finally' block
    model, processor, trainer, run = None, None, None, None
    
    try:
        run = wandb.init()
        
        WANDB_CONFIG = wandb.config
        lora_r_value = WANDB_CONFIG.lora_r
        learning_rate_value = WANDB_CONFIG.learning_rate
        epochs_value = WANDB_CONFIG.num_train_epochs
        lora_alpha_value = lora_r_value * WANDB_CONFIG.lora_alpha_multiplier
        #max_steps_value = WANDB_CONFIG.max_steps
        lora_dropout_value = WANDB_CONFIG.lora_dropout
        weight_decay_value = WANDB_CONFIG.weight_decay

        MODEL_NAME = "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit"

        print("--- New W&B Run ---")
        print(f"Parameters: LR={learning_rate_value}, Epochs Value={epochs_value}, LoRA r={lora_r_value}, LoRA alpha={lora_alpha_value}, Dropout={lora_dropout_value}, Weight Decay={weight_decay_value}")

        model, tokenizer = FastVisionModel.from_pretrained(
            model_name=MODEL_NAME,
            max_seq_length=2048, # max_seq_length is correctly set here
            dtype=None,
            load_in_4bit=True,
            device_map={"":"cuda:0"}
            
        )
        processor = AutoProcessor.from_pretrained(MODEL_NAME)
        print("✅ Base model, tokenizer, and processor loaded.")

        model = FastVisionModel.get_peft_model(
            model,
            r=lora_r_value,
            lora_alpha=lora_alpha_value,
            finetune_vision_layers=True,
            finetune_language_layers=True,
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        )
        print("✅ PEFT adapters added.")

        FastModel.for_training(model)
        trainer = SFTTrainer(
            model=model,
            train_dataset=final_dataset_list,
            # The 'processing_class' argument is deprecated; data_collator handles this.
            # Removed for clarity.
            data_collator=UnslothVisionDataCollator(model, processor=processor),
            args=SFTConfig(
                output_dir=f"./outputs_{run.name}",
                report_to="wandb",
                num_train_epochs=epochs_value,
                learning_rate=learning_rate_value,
                per_device_train_batch_size=2,
                gradient_accumulation_steps=4,
                gradient_checkpointing=False,
                remove_unused_columns=False,
                dataset_text_field="",
                dataset_kwargs={"skip_prepare_dataset": True},
                # THIS LINE IS THE FIX: 'max_seq_length' has been removed.
                warmup_ratio=0.1,
                optim="adamw_torch_fused",
                save_strategy="no",
                seed=3407,
            ),
        )

        print(f"\n🔥 Starting training run: {run.name}...")
        trainer.train()
        print("✅ Training complete!")

        output_save_dir = f"/kaggle/working/maize_expert_adapters_{run.name}"
        model.save_pretrained(output_save_dir)
        tokenizer.save_pretrained(output_save_dir)
        print(f"✅ Model adapters saved to {output_save_dir}")

        artifact = wandb.Artifact(f'maize-adapters-{run.name}', type='model')
        artifact.add_dir(output_save_dir)
        run.log_artifact(artifact)
        print("✅ Adapters logged as a W&B Artifact.")
        
        # 2. THE FIX: Use asyncio.run() to execute the async function
        # Call the isolated async evaluation function
        asyncio.run(evaluate_and_log(model, processor, eval_dataset))
        

            
    finally:
        print("\n🧹 Starting cleanup for next run...")
        if run:
            run.finish()

        # THE FINAL FIX: Safely delete each variable only if it was created.
        if 'eval_model' in locals() and eval_model is not None: del eval_model
        if 'trainer' in locals() and trainer is not None: del trainer
        if 'model' in locals() and model is not None: del model
        if 'processor' in locals() and processor is not None: del processor
        if 'tokenizer' in locals() and tokenizer is not None: del tokenizer        
        
        gc.collect()
        torch.cuda.empty_cache()
        print("✅ Memory cleared. Ready for the next agent run.")



# Execute the sweep agent
# This remains the same
wandb.agent(sweep_id, function=train, count=10)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-08-19 16:20:05.515341: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755620405.734343      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755620405.797632      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!


--- Starting Diagnostic Run (with Mixed Precision Fix) ---
Fixed Parameters: LR=2e-05, Epochs=1, LoRA r=16, LoRA alpha=32
==((====))==  Unsloth 2025.7.10: Fast Gemma3N patching. Transformers: 4.56.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.72G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/1.15G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

✅ Base model, tokenizer, and processor loaded.
Unsloth: Making `model.base_model.model.model.language_model` require gradients
✅ PEFT adapters added.
Unsloth: Model does not have a default image size - using 512

🔥 Starting training run: diagnostic-run-fp16-fix...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 176 | Num Epochs = 1 | Total steps = 22
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 40,189,952 of 7,890,168,144 (0.51% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,14.3983
2,14.3478
3,14.6269
4,14.5291
5,14.2445
6,14.4971
7,14.4726
8,14.3535
9,14.4302
10,14.4524


✅ Training complete!

🧹 Starting cleanup...


0,1
train/epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇███
train/global_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇███
train/grad_norm,█▁▁▁▁
train/learning_rate,▁▃▆██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁
train/loss,██████████████████▂▂▁▁

0,1
total_flos,1574542651646976.0
train/epoch,1.0
train/global_step,22.0
train/grad_norm,5354.84082
train/learning_rate,0.0
train/loss,1.2996
train_loss,12.24251
train_runtime,395.3886
train_samples_per_second,0.445
train_steps_per_second,0.056


✅ Memory cleared.


# ==============================================================================
# CELL 7: Create the Main Training Function for the W&B Agent (CORRECTED)
# ==============================================================================
from unsloth import FastVisionModel, FastModel
from transformers import AutoProcessor
from trl import SFTTrainer, SFTConfig
from unsloth.trainer import UnslothVisionDataCollator
import torch
import gc # Import the garbage collector module
import asyncio 


def train():
    """
    This function is called by the W&B agent. It contains the entire
    model setup, training, and saving logic.
    A `try...finally` block has been added to ensure robust memory cleanup
    after each run completes or fails.
    """
    # Initialize variables to None to ensure they exist for the 'finally' block
    model, processor, trainer, run = None, None, None, None
    
    try:
        run = wandb.init()
        
        WANDB_CONFIG = wandb.config
        lora_r_value = WANDB_CONFIG.lora_r
        learning_rate_value = WANDB_CONFIG.learning_rate
        epochs_value = WANDB_CONFIG.num_train_epochs
        lora_alpha_value = lora_r_value * WANDB_CONFIG.lora_alpha_multiplier

        MODEL_NAME = "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit"

        print("--- New W&B Run ---")
        print(f"Parameters: LR={learning_rate_value}, Epochs={epochs_value}, LoRA r={lora_r_value}, LoRA alpha={lora_alpha_value}")

        model, tokenizer = FastVisionModel.from_pretrained(
            model_name=MODEL_NAME,
            max_seq_length=2048, # max_seq_length is correctly set here
            dtype=None,
            load_in_4bit=True,
            device_map={"":"cuda:0"}
            
        )
        processor = AutoProcessor.from_pretrained(MODEL_NAME)
        print("✅ Base model, tokenizer, and processor loaded.")

        model = FastVisionModel.get_peft_model(
            model,
            r=lora_r_value,
            lora_alpha=lora_alpha_value,
            finetune_vision_layers=True,
            finetune_language_layers=True,
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        )
        print("✅ PEFT adapters added.")

        FastModel.for_training(model)
        trainer = SFTTrainer(
            model=model,
            train_dataset=final_dataset_list,
            # The 'processing_class' argument is deprecated; data_collator handles this.
            # Removed for clarity.
            data_collator=UnslothVisionDataCollator(model, processor=processor),
            args=SFTConfig(
                output_dir=f"./outputs_{run.name}",
                report_to="wandb",
                num_train_epochs=epochs_value,
                learning_rate=learning_rate_value,
                per_device_train_batch_size=2,
                gradient_accumulation_steps=4,
                gradient_checkpointing=False,
                remove_unused_columns=False,
                dataset_text_field="",
                dataset_kwargs={"skip_prepare_dataset": True},
                # THIS LINE IS THE FIX: 'max_seq_length' has been removed.
                warmup_ratio=0.1,
                optim="adamw_torch_fused",
                save_strategy="no",
                seed=3407,
            ),
        )

        print(f"\n🔥 Starting training run: {run.name}...")
        trainer.train()
        print("✅ Training complete!")

        output_save_dir = f"/kaggle/working/maize_expert_adapters_{run.name}"
        model.save_pretrained(output_save_dir)
        tokenizer.save_pretrained(output_save_dir)
        print(f"✅ Model adapters saved to {output_save_dir}")

        artifact = wandb.Artifact(f'maize-adapters-{run.name}', type='model')
        artifact.add_dir(output_save_dir)
        run.log_artifact(artifact)
        print("✅ Adapters logged as a W&B Artifact.")
        
        # 2. THE FIX: Use asyncio.run() to execute the async function
        # Call the isolated async evaluation function
        results = asyncio.run(evaluate_and_log(model, processor, eval_dataset))
        
        # THE FINAL FIX: Explicitly log the accuracy and latency to W&B
        try:
            # This extracts the numerical value from the nested dictionary
            mean_accuracy = results['calculate_accuracy']['accuracy']['mean']
            model_latency = results['model_latency']['mean']
            wandb.log({
                "eval_accuracy": mean_accuracy,
                "eval_latency_ms": model_latency
            })
            print(f"✅ Logged evaluation metrics to W&B: Accuracy={mean_accuracy}")
        except (KeyError, TypeError) as e:
            print(f"⚠️ Could not log evaluation metrics to W&B: {e}")
            
    finally:
        print("\n🧹 Starting cleanup for next run...")
        if run:
            run.finish()

        # THE FINAL FIX: Safely delete each variable only if it was created.
        if 'eval_model' in locals() and eval_model is not None: del eval_model
        if 'trainer' in locals() and trainer is not None: del trainer
        if 'model' in locals() and model is not None: del model
        if 'processor' in locals() and processor is not None: del processor
        if 'tokenizer' in locals() and tokenizer is not None: del tokenizer        
        
        gc.collect()
        torch.cuda.empty_cache()
        print("✅ Memory cleared. Ready for the next agent run.")



# Execute the sweep agent
# This remains the same
#wandb.agent(sweep_id, function=train, count=5)