# Zero‑shot Weed Detection with OpenAI VLMs (Batch Runner)

In [None]:
import os
import base64
import csv
import time
from io import BytesIO
from pathlib import Path

from PIL import Image
from tqdm import tqdm

import openai  # pip install --upgrade openai

# =========================
# 🔐 CONFIG
# =========================
openai.api_key = "sk-proj-"

# Vision-capable model (use what you have access to)
MODEL = "gpt-4o"  # or "gpt-4.1"

# Folder containing your images (441 images)
IMAGE_DIR = Path("/Joint_Val")

# Output CSV with exactly two columns: image_name, gpt_response
OUTPUT_CSV = Path("/WeedVLM_GPT_4o_outputs.csv")

# Optional: resume if CSV exists (skip already-processed files)
RESUME = True

# Retry/backoff
MAX_RETRIES = 5
BASE_SLEEP = 2.0  # seconds

# Allowed image extensions
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tif", ".tiff"}

# =========================
# 📌 Prompt (exactly as provided)
# =========================
PROMPT = (
    "Analyze the image and provide output in exactly this format, and nothing extra:\n"
    "Weed Detection: <Yes or No>\n"
    "Weed Location: <Mention Position in the Image>\n"
    "Reasoning: <Explain Reasoning>\n"
    "Crop Growth: <Early, Growing or Full Grown>\n"
    "Crop Type: <Predict Crop Type>\n"
    "If your main response is 'No Weed', then rewrite the response again with V2_ at the start of each field, assuming weed is present\n"
)

# =========================
# 🧰 Helpers
# =========================
def encode_image_to_base64_jpeg(image_path: Path) -> str:
    """
    Open the image (any format), convert to RGB, re-encode as JPEG,
    return base64 string suitable for 'data:image/jpeg;base64,...'
    """
    with Image.open(image_path) as img:
        img = img.convert("RGB")
        buf = BytesIO()
        img.save(buf, format="JPEG", quality=92)
        return base64.b64encode(buf.getvalue()).decode("utf-8")


def query_openai_vision(image_b64: str, prompt: str) -> str:
    """
    Uses the Chat Completions API in the same structure as your working reference:
    - model: vision-capable (e.g., gpt-4.1, gpt-4o)
    - messages content with text + image_url (data URL)
    Returns the plain text content from the first choice.
    """
    resp = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"},
                    },
                ],
            }
        ],
        max_tokens=350,
        temperature=0.0,
    )
    return resp.choices[0].message.content.strip()


# =========================
# 🚀 Run Batch
# =========================
def main():
    IMAGE_DIR.mkdir(parents=True, exist_ok=True)
    images = [p for p in sorted(IMAGE_DIR.iterdir()) if p.suffix.lower() in IMAGE_EXTS]
    if not images:
        raise FileNotFoundError(f"No images found in: {IMAGE_DIR}")

    # Resume support
    seen = set()
    if RESUME and OUTPUT_CSV.exists():
        try:
            with open(OUTPUT_CSV, "r", encoding="utf-8", newline="") as f:
                reader = csv.reader(f)
                header = next(reader, None)
                if header and len(header) >= 2:
                    idx_name = 0  # first column is image_name
                    for row in reader:
                        if row and len(row) >= 1:
                            seen.add(row[idx_name])
            print(f"Resuming: found {len(seen)} rows already in {OUTPUT_CSV}.")
        except Exception:
            pass

    # Open CSV and write header if new
    write_header = not OUTPUT_CSV.exists()
    with open(OUTPUT_CSV, "a", encoding="utf-8", newline="") as f:
        writer = csv.writer(f)
        if write_header:
            writer.writerow(["image_name", "gpt_response"])

        for img_path in tqdm(images, desc="Processing images"):
            if img_path.name in seen:
                continue

            # Encode image
            try:
                b64 = encode_image_to_base64_jpeg(img_path)
            except Exception as e:
                writer.writerow([img_path.name, f"ERROR: failed to encode image: {e}"])
                f.flush()
                continue

            # Query with retries
            out_text = None
            for attempt in range(1, MAX_RETRIES + 1):
                try:
                    out_text = query_openai_vision(b64, PROMPT)
                    break
                except Exception as e:
                    err_msg = str(e)
                    if attempt == MAX_RETRIES:
                        out_text = f"[ERROR after {MAX_RETRIES} retries] {err_msg}"
                        break
                    sleep_s = BASE_SLEEP * (2 ** (attempt - 1))
                    time.sleep(sleep_s)

            # Write exactly two columns
            writer.writerow([img_path.name, out_text if out_text is not None else ""])
            f.flush()

    print(f"Done. Wrote results to: {OUTPUT_CSV.resolve()}")


if __name__ == "__main__":
    main()
