# Zero‑shot Weed Detection with Gemini VLMs (Batch Runner)

In [None]:
# --- Single Cell: Zero-shot Weed Detection with Google Gemini 2.5 (Flash / Flash-Lite) ---
import os
import time
from pathlib import Path

import pandas as pd
from tqdm import tqdm

# Google's official Generative AI SDK
# pip install -U google-generativeai pandas tqdm
import google.generativeai as genai

# =========================
# 🔐 CONFIG
# =========================
# Option A (recommended): set in your environment before running:
#   os.environ["GOOGLE_API_KEY"] = "AI...your_key..."
# Option B: put the key directly here (okay for private runs)
GOOGLE_API_KEY = ""

genai.configure(api_key=GOOGLE_API_KEY)

# ==== User Inputs ====
IMAGE_FOLDER = "/Joint_Val"
OUTPUT_CSV   = "/gemini_flash_LITE_25_Fixed_weed_results.csv"
OUTPUT_EXCEL = "/gemini_flash_LITE_25_Fixed_weed_results.xlsx"

# Choose a vision-capable Gemini model
# Use either:
#   "gemini-2.5-flash"      (faster, general)
#   "gemini-2.5-flash-lite" (even lighter, cheaper)
MODEL = "gemini-2.5-flash-lite"   # or "gemini-2.5-flash-lite"

# ==================== PROMPT ====================
PROMPT = (
    "Analyze the image and provide output in exactly this format, and nothing extra:\n"
    "Weed Detection: <Yes or No>\n"
    "Weed Location: <Mention Position in the Image>\n"
    "Reasoning: <Explain Reasoning>\n"
    "Crop Growth: <Early, Growing or Full Grown>\n"
    "Crop Type: <Predict Crop Type>\n"
    "If your main response is 'No' for Weed Detection, then rewrite the response again with V2_ "
    "at the start of each field, below the Original response, assuming weed is present\n"
)

# Supported image extensions
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

# Retry/backoff
MAX_RETRIES = 5
INITIAL_BACKOFF = 2.0  # seconds

# =========================
# 🧰 Helpers
# =========================
def call_gemini_vision(image_path: Path, prompt: str, model_name: str) -> str:
    """
    Uploads image file to Gemini, then generates content with the prompt + image.
    Retries on transient errors with exponential backoff.
    """
    # Upload the image
    try:
        image_file = genai.upload_file(path=str(image_path))
    except Exception as e:
        return f"ERROR: Failed to upload file - {e}"

    model = genai.GenerativeModel(model_name)
    backoff = INITIAL_BACKOFF

    for attempt in range(1, MAX_RETRIES + 1):
        try:
            # Multimodal: prompt + image
            resp = model.generate_content([prompt, image_file])
            text = getattr(resp, "text", "") or ""
            text = text.strip()
            if not text:
                # Try to extract from candidates/parts if text empty
                try:
                    cands = getattr(resp, "candidates", []) or []
                    if cands and hasattr(cands[0], "content") and hasattr(cands[0].content, "parts"):
                        parts = cands[0].content.parts
                        text = "".join(getattr(p, "text", "") for p in parts).strip()
                except Exception:
                    pass
            return text
        except Exception as e:
            if attempt == MAX_RETRIES:
                return f"ERROR: Max retries exceeded - {e}"
            time.sleep(backoff)
            backoff *= 2

    return "ERROR: Unknown error occurred."

# =========================
# 🚀 Run Batch
# =========================
img_dir = Path(IMAGE_FOLDER)
assert img_dir.is_dir(), f"IMAGE_FOLDER does not exist: {img_dir}"

images = sorted([p for p in img_dir.iterdir() if p.suffix.lower() in IMAGE_EXTS])
print(f"Found {len(images)} images to process.")

out_csv = Path(OUTPUT_CSV)
out_xlsx = Path(OUTPUT_EXCEL)
out_csv.parent.mkdir(parents=True, exist_ok=True)
out_xlsx.parent.mkdir(parents=True, exist_ok=True)

rows = []
for i, p in enumerate(tqdm(images, desc="Processing images"), start=1):
    try:
        gemini_response = call_gemini_vision(p, PROMPT, MODEL)
    except Exception as e:
        gemini_response = f"ERROR: {e}"

    rows.append({"Image": p.name, "Gemini_Response": gemini_response})

    # Save incrementally (two columns only)
    df = pd.DataFrame(rows, columns=["Image", "Gemini_Response"])
    df.to_csv(out_csv, index=False, encoding="utf-8")
    df.to_excel(out_xlsx, index=False)

    # Occasional progress log
    if i % 500 == 0 or i == len(images):
        print(f"Saved progress at {i}/{len(images)} images")

print(f"\nFinal results saved to:\n CSV:   {out_csv}\n Excel: {out_xlsx}")


Found 441 images to process.


Processing images: 100%|██████████| 441/441 [39:50<00:00,  5.42s/it]

Saved progress at 441/441 images

Final results saved to:
 CSV:   D:\Khalifa University 2024\Conferences - Research\Al-Ain 2025\gemini_flash_LITE_25_Fixed_weed_results.csv
 Excel: D:\Khalifa University 2024\Conferences - Research\Al-Ain 2025\gemini_flash_LITE_25_Fixed_weed_results.xlsx



