In [1]:
%%capture
!pip install pip3-autoremove
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu124
!pip install accelerate==1.7.0
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3  trl triton cut_cross_entropy 
!pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!pip install -U peft
!pip install --no-deps --upgrade timm # Only for Gemma 3N

In [2]:
%%capture
!pip install opensloth 

In [3]:
!pip install --no-deps git+https://github.com/huggingface/transformers.git

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-bjdeldb_
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-bjdeldb_
  Resolved https://github.com/huggingface/transformers.git to commit c6d0500d15b9eedc33e9131a6bec6db56282b875
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for transformers: filename=transformers-4.54.0.dev0-py3-none-any.whl size=11988536 sha256=071afb39a71f72d06523b2f9360dace82e28c94ce5101bff2f0f48a794bd3816
  Stored in directory: /tmp/pip-ephem-wheel-cache-b5wa9raz/wheels/32/4b/78/f195c684dd3a9ed21f3b39fe8f85b48df7918581b6437be143
Successfully b

In [4]:
%%capture
!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth-zoo.git
!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth.git

In [5]:

from datasets import Dataset, Image as HFImage
from pathlib import Path
import os

# --- NEW STEP: Copy data to the faster working directory ---
source_path = "/kaggle/input/maize1-dataset/"
local_path = "/kaggle/working/local_datasets/"

if not os.path.exists(local_path):
    print(f"Copying data from {source_path} to {local_path} for faster access...")
    !cp -r {source_path} {local_path}
    print("✅ Data copy complete.")
else:
    print(f"✅ Data already copied to {local_path}")
# ---------------------------------------------------------

Copying data from /kaggle/input/maize1-dataset/ to /kaggle/working/local_datasets/ for faster access...
✅ Data copy complete.


In [6]:
%%writefile cache_vision_dataset.py
# ==============================================================================
# STEP 1: Cache Vision Dataset to Disk (FINAL - MANUAL COMPONENT VERSION)
# ==============================================================================

"""
Pre-processes and caches a vision dataset by calling the tokenizer and image
processor components separately. This is the most robust method and bypasses
the error-prone main processor call.
"""

from datasets import Dataset
from pathlib import Path
from PIL import Image
from tqdm import tqdm
from transformers import AutoProcessor
import os

# --- Mappings and Functions ---
CLASS_NAME_MAPPING = {
    "maize_healthy": "Healthy Maize Plant",
    "phosphorus_deficiency": "Maize Phosphorus Deficiency",
}

def create_conversation_dict(image_path, class_name):
    """Creates the 'messages' dictionary structure for a single sample."""
    display_name = CLASS_NAME_MAPPING.get(class_name, "Unknown Maize Condition")
    pil_image = Image.open(image_path).convert("RGB")
    return {
        "messages": [
            { "role": "user",
              "content": [
                {"type": "text", "text": "What is the condition of this maize plant?"},
                {"type": "image", "image": pil_image}
              ]
            },
            { "role": "assistant",
              "content": [
                {"type": "text", "text": f"This is a {display_name}."}
              ]
            },
        ]
    }

def dump_vision_data():
    MODEL_NAME = "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit"
    print(f"Loading processor for '{MODEL_NAME}'...")
    processor = AutoProcessor.from_pretrained(MODEL_NAME)
    
    dataset_path = Path("/kaggle/working/local_datasets/")
    image_paths = list(dataset_path.glob("**/*.jpg")) + list(dataset_path.glob("**/*.jpeg"))
    print(f"Found {len(image_paths)} images.")
    
    print("Creating dataset with 'messages' format...")
    raw_dataset_list = []
    for path in tqdm(image_paths, desc="Processing images"):
        raw_dataset_list.append(create_conversation_dict(path, path.parent.name))
        
    # ** FINAL STRATEGY: A MANUAL BATCHING LOOP USING SEPARATE COMPONENTS **
    print("Processing dataset manually using separate tokenizer and image processor...")
    
    batch_size = 8
    processed_list = []

    for i in tqdm(range(0, len(raw_dataset_list), batch_size), desc="Processing batches"):
        batch = raw_dataset_list[i : i + batch_size]
        
        # 1. Prepare lists of texts and images for this batch
        batch_texts = []
        batch_images = []
        for sample in batch:
            text = processor.tokenizer.apply_chat_template(
                sample["messages"], tokenize=False, add_generation_prompt=False
            )
            image = sample["messages"][0]['content'][1]['image']
            batch_texts.append(text)
            batch_images.append(image)

        # 2. **THE FIX**: Call tokenizer and image_processor SEPARATELY
        # Process the text part
        text_inputs = processor.tokenizer(
            batch_texts,
            padding=True,
            truncation=True,
            return_tensors="pt",
        )
        # Process the image part
        image_inputs = processor.image_processor(
            images=batch_images,
            return_tensors="pt"
        )
        
        # 3. Manually combine the results
        # The text_inputs dictionary already has 'input_ids' and 'attention_mask'
        # We just need to add the 'pixel_values' to it.
        combined_inputs = {
            "input_ids": text_inputs.input_ids,
            "attention_mask": text_inputs.attention_mask,
            "pixel_values": image_inputs.pixel_values,
        }
        
        # 4. Unpack the processed batch back into individual samples for our list
        for j in range(len(batch_texts)):
            processed_list.append({
                "input_ids": combined_inputs["input_ids"][j],
                "attention_mask": combined_inputs["attention_mask"][j],
                "pixel_values": combined_inputs["pixel_values"][j],
            })

    # 5. Create the final dataset from the list of processed dictionaries
    print("\nCreating final dataset from processed list...")
    processed_dataset = Dataset.from_list(processed_list)

    # 6. Save the final, processed dataset to disk
    output_dir = "data/cached_vision_dataset_hf"
    os.makedirs(output_dir, exist_ok=True)
    
    print(f"Saving processed dataset to directory: {output_dir}")
    processed_dataset.save_to_disk(output_dir)
    
    print(f"\n✅ Dataset successfully processed and saved to '{output_dir}'.")
    print(f"The dataset now contains the required columns: {processed_dataset.column_names}")

if __name__ == "__main__":
    dump_vision_data()

Writing cache_vision_dataset.py


In [7]:
# Run the caching script
!python cache_vision_dataset.py

2025-07-22 20:59:59.200956: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753217999.386851     281 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753217999.443120     281 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Loading processor for 'unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit'...
processor_config.json: 100%|██████████████████| 98.0/98.0 [00:00<00:00, 830kB/s]
chat_template.jinja: 1.63kB [00:00, 7.29MB/s]
preprocessor_config.json: 1.09kB [00:00, 8.03MB/s]
config.json: 5.21kB [00:00, 23.0MB/s]
tokenizer_config.json: 1.20MB [00:00, 201MB/s]
tokenizer.model: 100%|█████████████████████| 4.70M/4.70M [00:00<00:00, 7.29MB/s]
tokenizer.json: 100%|████

In [8]:
!wandb login token

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [10]:
%%writefile train_vision_multiGPU.py
# ==============================================================================
# STEP 2: Multi-GPU Training Script
# ==============================================================================

"""
Multi-GPU Vision Model Training with OpenSloth
"""

import os
import pickle
from opensloth.opensloth_config import (
    FastModelArgs,
    LoraArgs,
    OpenSlothConfig,
    TrainingArguments,
)
from opensloth.scripts.opensloth_sft_trainer import run_mp_training, setup_envs

# Set PyTorch memory allocation configuration
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Multi-GPU Configuration
GLOBAL_BZ = 16
DEVICES = [0, 1]
BZ = 1

# OpenSloth Configuration for Vision Models
opensloth_config = OpenSlothConfig(
    data_cache_path="data/cached_vision_dataset_hf",
    devices=DEVICES,
    fast_model_args=FastModelArgs(
        model_name="unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
        max_seq_length=2048,
        load_in_4bit=True,
        dtype=None,
        use_gradient_checkpointing="unsloth",  # Use Unsloth's optimized gradient checkpointing
    ),
    lora_args=LoraArgs(
        r=16, # Consider reducing to 8 if memory is still an issue
        lora_alpha=16,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
        lora_dropout=0,
        bias="none",
        use_rslora=False,
        finetune_vision_layers=True,
        finetune_language_layers=True,
    ),
    sequence_packing=False,
)

training_config = TrainingArguments(
    output_dir="outputs/vision_multiGPU_experiment",
    per_device_train_batch_size=BZ,
    gradient_accumulation_steps=GLOBAL_BZ // (len(DEVICES) * BZ),
    learning_rate=1e-4,
    logging_steps=10,
    num_train_epochs=18,
    lr_scheduler_type="linear",
    warmup_ratio=0.1,
    save_total_limit=2,
    save_steps=100,
    weight_decay=0.01,
    optim="adamw_torch_fused",
    seed=3407,
    remove_unused_columns=False,
    dataset_text_field="",
    max_seq_length=1024,
    dataloader_pin_memory=True,
    fp16=True,  # Enable mixed-precision training
    report_to="wandb",
    resume_from_checkpoint="",

)

if __name__ == "__main__":
    # Setup environment variables for logging
    os.environ["WANDB_PROJECT"] = "open-maize-vision1"
    os.environ["WANDB_NAME"] = f"vision_multiGPU_globalbz{GLOBAL_BZ}_epochs{training_config.num_train_epochs}"

    print(f"Global batch size: {len(DEVICES) * BZ * training_config.gradient_accumulation_steps}")
    print(f"Gradient accumulation steps: {training_config.gradient_accumulation_steps}")

    setup_envs(opensloth_config, training_config)
    run_mp_training(opensloth_config.devices, opensloth_config, training_config)

Overwriting train_vision_multiGPU.py


In [11]:
# Run the training script
!python train_vision_multiGPU.py

Global batch size: 16
Gradient accumulation steps: 8
Global batch size: 16
[MP] Running on 2 GPUs
2025-07-22 21:02:07.046301: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-22 21:02:07.046302: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753218127.069956     320 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753218127.069990     321 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753218127.076952     320 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugi

In [None]:
# ==============================================================================
# ALTERNATIVE: Using torchrun for proper multi-GPU training
# ==============================================================================

%%writefile run_multiGPU_training.sh
#!/bin/bash

# Proper way to run multi-GPU training with torchrun
torchrun --nproc_per_node=2 --nnodes=1 train_vision_multiGPU.py

# Make the script executable and run it
!chmod +x run_multiGPU_training.sh
# !./run_multiGPU_training.sh  # Uncomment to run with torchrun

In [46]:
%%writefile test_trained_model.py
# ==============================================================================
# STEP 3: Inference with the Fine-Tuned LoRA Model (DEFINITIVE FINAL VERSION)
# ==============================================================================

"""
This script uses the definitive, correct method for Gemma3N inference by
explicitly separating chat templating from data processing to resolve the
token/image mismatch error.
"""

from unsloth import FastVisionModel
from transformers import AutoProcessor
from PIL import Image
import torch
import requests
from io import BytesIO

def load_image_from_url(url: str) -> Image:
    """A helper function to load an image from a URL."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        image = Image.open(BytesIO(response.content)).convert("RGB")
        print("Image loaded successfully from URL.")
        return image
    except requests.exceptions.RequestException as e:
        print(f"Error loading image from URL: {e}")
        return None
    except Image.UnidentifiedImageError:
        print("Error: The content downloaded from the URL is not a valid image.")
        return None

def run_inference():
    # Define paths
    base_model_name = "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit"
    adapter_path = "outputs/vision_multiGPU_experiment"
    
    # Load base model and processor
    print(f"Loading base model: {base_model_name}")
    model, processor = FastVisionModel.from_pretrained(
        model_name = base_model_name,
        max_seq_length = 2048,
        load_in_4bit = True,
        dtype = None,
    )
    
    print("\nPreparing model for inference...")
    FastVisionModel.for_inference(model)
    
    # Load LoRA adapter
    print(f"Loading adapter from: {adapter_path}")
    model.load_adapter(adapter_path)
    
    print("\nModel and adapter loaded successfully!")
    
    # --- Test with a sample image ---
    test_image_url = "https://github.com/surfiniaburger/tune/blob/main/sample_images/phosphorus_deficiency_test_2.jpg?raw=true"
    image = load_image_from_url(test_image_url)
    
    if image is None:
        return

    # ** THE FINAL, CRUCIAL FIX IS HERE **

    # 1. First, create the full multimodal message structure.
    #    This includes the image object, which is critical.
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What is the condition of this maize plant?"},
                {"type": "image", "image": image},
            ],
        }
    ]
    
    # 2. Use the tokenizer's templating engine to generate the prompt string.
    #    This is the step that will correctly insert the `<image>` token into the text.
    text_prompt = processor.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # 3. Now, call the main processor with the correctly formatted text and the image.
    #    This will tokenize the text (including the `<image>` token) and process the image.
    inputs = processor(
        text=text_prompt,
        images=image, # The processor can handle a single image here
        return_tensors="pt"
    ).to(model.device)

    print("\nGenerating response...")
    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens=128,
            use_cache=True,
        )
    
    response = processor.batch_decode(outputs, skip_special_tokens=True)
    full_response = response[0]
    
    prompt_marker = "model\n"
    answer_start_index = full_response.rfind(prompt_marker)
    
    if answer_start_index != -1:
        final_answer = full_response[answer_start_index + len(prompt_marker):].strip()
    else:
        final_answer = "Could not parse the model's response."

    print("="*40)
    print(f"✅ Model's Answer: {final_answer}")
    print("="*40)

if __name__ == "__main__":
    run_inference()

Overwriting test_trained_model.py


In [47]:
!python test_trained_model.py  # Uncomment to test the model

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
2025-07-23 00:10:28.733637: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753229428.756383    3348 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753229428.763162    3348 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
🦥 Unsloth Zoo will now patch everything to make training faster!
Loading base model: unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit
==((====))==  Unsloth 2025.7.7: Fast Gemma3N patching. Transformers: 4.54.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0

In [51]:
%%writefile app.py
# ==============================================================================
# GRADIO APP FOR MAIZE DIAGNOSIS (DEFINITIVE CORRECTED VERSION)
# ==============================================================================

"""
This script launches a Gradio web interface for the fine-tuned maize
vision model, using the definitive, correct two-step processing logic.
"""

import gradio as gr
from unsloth import FastVisionModel
from transformers import AutoProcessor
from PIL import Image
import torch
import os

# --- 1. Global Setup: Load Model and Processor ---
# This section runs only ONCE when the application starts.

print("Performing initial model setup...")

BASE_MODEL_NAME = "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit"
ADAPTER_PATH = "outputs/vision_multiGPU_experiment"
model = None
processor = None

try:
    print(f"Loading base model: {BASE_MODEL_NAME}")
    model, processor = FastVisionModel.from_pretrained(
        model_name=BASE_MODEL_NAME,
        max_seq_length=2048,
        load_in_4bit=True,
        dtype=None,
    )
    FastVisionModel.for_inference(model)
    print(f"Loading adapter from: {ADAPTER_PATH}")
    model.load_adapter(ADAPTER_PATH)
    print("\n✅ Model and adapter loaded successfully!")

except Exception as e:
    print(f"❌ Critical error during model loading: {e}")


# --- 2. Define the Core Prediction Function (Corrected Logic) ---

def diagnose_maize_plant(uploaded_image: Image.Image) -> str:
    """
    Takes a PIL Image, runs it through the model, and returns the diagnosis.
    """
    if model is None or processor is None or uploaded_image is None:
        return "Model is not loaded or no image was uploaded. Please check the console for errors."

    image = uploaded_image.convert("RGB")
    
    # ** THE FINAL, CRUCIAL FIX IS HERE **

    # 1. Create the multimodal message structure including the image object.
    #    This is the required input for the chat templating engine.
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What is the condition of this maize plant?"},
                {"type": "image", "image": image},
            ],
        }
    ]

    # 2. Use the tokenizer to apply the chat template.
    #    This correctly creates the final prompt string with the `<image>` placeholder.
    text_prompt = processor.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # 3. Call the main processor with the pre-formatted text and the image.
    #    This is the robust method that provides the exact inputs the model needs.
    inputs = processor(
        text=text_prompt,
        images=image,
        return_tensors="pt"
    ).to(model.device)

    # Generate the response
    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True)
    
    # Decode and clean the final answer
    response = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prompt_marker = "model\n"
    answer_start_index = response.rfind(prompt_marker)
    
    if answer_start_index != -1:
        final_answer = response[answer_start_index + len(prompt_marker):].strip()
    else:
        final_answer = "Could not parse model's response. Raw output: " + response

    return final_answer


# --- 3. Build and Launch the Gradio Interface ---

print("Building Gradio interface...")

example_images = [
    ["https://raw.githubusercontent.com/surfiniaburger/tune/main/sample_images/healthy_maize_test_1.jpg"],
    ["https://raw.githubusercontent.com/surfiniaburger/tune/main/sample_images/phosphorus_deficiency_test_2.jpg"]
]

demo = gr.Interface(
    fn=diagnose_maize_plant,
    inputs=gr.Image(type="pil", label="Upload Maize Plant Image"),
    outputs=gr.Textbox(label="Diagnosis", lines=3),
    title="🌽 Maize Health Diagnosis Assistant",
    description="Upload an image of a maize plant, and the AI will analyze its condition. This tool is powered by a fine-tuned Gemma3N vision model.",
    article="Built with Unsloth, OpenSloth, and Gradio.",
    examples=example_images,
    allow_flagging="never",
)

print("Launching Gradio app... Access it at the URL provided below.")
demo.launch(share=True)

Overwriting app.py


In [49]:
!pip install gradio



In [52]:
!python app.py

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
2025-07-23 00:21:26.404383: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753230086.427747    3454 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753230086.434953    3454 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
🦥 Unsloth Zoo will now patch everything to make training faster!
Performing initial model setup...
Loading base model: unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit
==((====))==  Unsloth 2025.7.7: Fast Gemma3N patching. Transformers: 4.54.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5.

In [53]:
%%writefile upload_adapter.py

from huggingface_hub import HfApi, create_repo

# --- CONFIGURATION ---
# Your username and a new name for this 2-GPU model version.
HF_USERNAME = "surfiniaburger"
REPO_NAME = "AuraMind-Maize-2GPU"

# The local folder containing your adapter
LOCAL_ADAPTER_FOLDER = "outputs/vision_multiGPU_experiment"
HF_REPO_ID = f"{HF_USERNAME}/{REPO_NAME}"

# --- SCRIPT LOGIC ---
api = HfApi()

print(f"Creating repository '{HF_REPO_ID}' on the Hugging Face Hub...")
try:
    create_repo(repo_id=HF_REPO_ID, repo_type="model", exist_ok=True)
    print("Repository created successfully (or already exists).")
except Exception as e:
    print(f"Error creating repository: {e}")
    exit()

print(f"\nUploading files from '{LOCAL_ADAPTER_FOLDER}' to '{HF_REPO_ID}'...")
try:
    api.upload_folder(
        folder_path=LOCAL_ADAPTER_FOLDER,
        repo_id=HF_REPO_ID,
        repo_type="model",
    )
    print(f"\n✅ Successfully uploaded adapter to: https://huggingface.co/{HF_REPO_ID}")
except Exception as e:
    print(f"Error uploading files: {e}")


Writing upload_adapter.py


In [54]:
!python upload_adapter.py

Creating repository 'surfiniaburger/AuraMind-Maize-2GPU' on the Hugging Face Hub...
Error creating repository: 401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-68802db2-58be25962cc8809a4b52cac0;3d35a5c8-3580-4e46-821a-34b42da7ad87)

Invalid username or password.


In [55]:
# ==============================================================================
# STEP 1: Securely Upload Your Adapter to the Hugging Face Hub
# ==============================================================================
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login, HfApi, create_repo

# --- 1. Secure Login ---
print("Attempting secure login to Hugging Face Hub...")
try:
    user_secrets = UserSecretsClient()
    secret_value = user_secrets.get_secret("HUGGINGFACE_API_KEY")
    login(token=secret_value)
    print("✅ Secure login successful!")
except Exception as e:
    print(f"❌ Could not log in. Please ensure 'HUGGINGFACE_API_KEY' is set in Kaggle Secrets. Error: {e}")
    # We exit here if login fails, as the rest cannot proceed.
    exit()

# --- 2. Define the Upload Script Configuration ---
HF_USERNAME = "surfiniaburger"
REPO_NAME = "AuraMind-Maize-2GPU"
LOCAL_ADAPTER_FOLDER = "outputs/vision_multiGPU_experiment"
HF_REPO_ID = f"{HF_USERNAME}/{REPO_NAME}"

# --- 3. Run the Upload Logic ---
api = HfApi()

print(f"\nCreating repository '{HF_REPO_ID}' on the Hugging Face Hub...")
try:
    create_repo(repo_id=HF_REPO_ID, repo_type="model", exist_ok=True)
    print("Repository created successfully (or already exists).")
except Exception as e:
    print(f"Error creating repository: {e}")
    exit()

print(f"\nUploading files from '{LOCAL_ADAPTER_FOLDER}' to '{HF_REPO_ID}'...")
try:
    api.upload_folder(
        folder_path=LOCAL_ADAPTER_FOLDER,
        repo_id=HF_REPO_ID,
        repo_type="model",
    )
    print(f"\n✅✅ Successfully uploaded adapter to: https://huggingface.co/{HF_REPO_ID}")
except Exception as e:
    print(f"Error uploading files: {e}")

Attempting secure login to Hugging Face Hub...
✅ Secure login successful!

Creating repository 'surfiniaburger/AuraMind-Maize-2GPU' on the Hugging Face Hub...
Repository created successfully (or already exists).

Uploading files from 'outputs/vision_multiGPU_experiment' to 'surfiniaburger/AuraMind-Maize-2GPU'...


adapter_model.safetensors:   0%|          | 0.00/91.7M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/169M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/91.7M [00:00<?, ?B/s]

Upload 20 LFS files:   0%|          | 0/20 [00:00<?, ?it/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.82k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/91.7M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/169M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.82k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.82k [00:00<?, ?B/s]


✅✅ Successfully uploaded adapter to: https://huggingface.co/surfiniaburger/AuraMind-Maize-2GPU


In [56]:
%%writefile app.py
# ==============================================================================
# AURA-MIND: MAIZE HEALTH DIAGNOSIS APP (DEPLOYMENT-READY)
# ==============================================================================

import gradio as gr
from unsloth import FastVisionModel
from transformers import AutoProcessor
from PIL import Image
import torch
import os

# --- 1. Global Setup: Load Model and Processor from Hub ---

print("Performing initial AuraMind model setup...")

BASE_MODEL_NAME = "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit"
ADAPTER_PATH = "surfiniaburger/AuraMind-Maize-2GPU"

model = None
processor = None

try:
    print(f"Loading base model: {BASE_MODEL_NAME}")
    model, processor = FastVisionModel.from_pretrained(
        model_name=BASE_MODEL_NAME, max_seq_length=2048, load_in_4bit=True, dtype=None
    )
    FastVisionModel.for_inference(model)
    
    print(f"Loading AuraMind adapter from Hub: {ADAPTER_PATH}")
    model.load_adapter(ADAPTER_PATH)
    
    print("\n✅ AuraMind model and adapter loaded successfully!")
except Exception as e:
    print(f"❌ Critical error during model loading: {e}")


# --- 2. Define the Core Prediction Function ---
def diagnose_maize_plant(uploaded_image: Image.Image) -> str:
    if model is None or processor is None or uploaded_image is None:
        return "Model is not loaded or no image was uploaded. Please check the console for errors."

    image = uploaded_image.convert("RGB")
    messages = [{"role": "user", "content": [{"type": "text", "text": "What is the condition of this maize plant?"}, {"type": "image", "image": image}]}]
    text_prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = processor(text=text_prompt, images=image, return_tensors="pt").to(model.device)

    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True)
    
    response = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    prompt_marker = "model\n"
    answer_start_index = response.rfind(prompt_marker)
    
    final_answer = response[answer_start_index + len(prompt_marker):].strip() if answer_start_index != -1 else "Could not parse model's response."
    return final_answer

# --- 3. Build and Launch the Gradio Interface ---
print("Building Gradio interface...")

demo = gr.Interface(
    fn=diagnose_maize_plant,
    inputs=gr.Image(type="pil", label="Upload Maize Plant Image"),
    outputs=gr.Textbox(label="Diagnosis", lines=3),
    title="🌽 AuraMind: Maize Health Diagnosis (2-GPU Ver.)",
    description="Upload an image of a maize plant, and the AuraMind AI will analyze its condition. This model was fine-tuned on two GPUs for enhanced performance.",
    article="Built with Unsloth and Gradio by surfiniaburger.",
    allow_flagging="never",
)

if __name__ == "__main__":
    print("Launching Gradio app for deployment...")
    # For deployment, we don't need a share link. Gradio deploy handles it.
    demo.launch()

Overwriting app.py
