In [None]:
!pip install openai pandas aiohttp tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# --- Step 3: Setup ---
import os
import base64
import json
import asyncio
import pandas as pd
from pathlib import Path
from openai import AsyncOpenAI
import time
import nest_asyncio

# Patch asyncio for Jupyter/Colab
nest_asyncio.apply()

# GitHub token
os.environ["GITHUB_TOKEN"] = ""

# Instantiate client with GitHub endpoint
client = AsyncOpenAI(
    base_url="https://models.github.ai/inference",
    api_key=os.environ["GITHUB_TOKEN"],
)

MODEL = "openai/gpt-4o-mini"

# Rate limits
RPM = 15        # requests per minute
RPD = 150       # requests per day
CONCURRENT = 5  # max concurrent requests

# --- Step 4: Utils & G-VEval style prompt ---
def encode_image(image_path: Path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

async def gveval_score_async(prompt_text: str, image_path: Path, meta_caption: str):
    image_b64 = encode_image(image_path)

    system_prompt = """You are a caption evaluation module following G-VEval from AAAI 2025.
You will be given:
- A reference caption.
- A candidate caption (meta-caption) generated from the image.
- The image itself.

You must produce:
- Four scores (Accuracy, Completeness, Conciseness, Relevance), each from 0 to 100.
- An overall score from 0 to 100.
- A short reasoning explaining the main strengths & weaknesses.

Return strictly JSON, exactly this structure:

{
  "accuracy": <int 0-100>,
  "completeness": <int 0-100>,
  "conciseness": <int 0-100>,
  "relevance": <int 0-100>,
  "overall": <int 0-100>,
  "reason": "<short explanation>"
}"""

    user_prompt = f"Reference: {prompt_text}\nCandidate: {meta_caption}"

    resp = await client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": user_prompt},
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}}
                ],
            },
        ],
        temperature=0,
        max_tokens=400,
    )

    return resp.choices[0].message.content

def safe_append_jsonl(out_path: str, record: dict):
    """Append record to jsonl safely (flush/sync) without overwriting previous content."""
    with open(out_path, "a", encoding="utf-8") as f:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")
        f.flush()
        os.fsync(f.fileno())

async def process_triple(triple, out_path: str):
    img_name, prompt, meta_caption, img_path = triple
    try:
        json_result = await gveval_score_async(prompt, img_path, meta_caption)
        record = {
            "image_name": img_name,
            "result": json.loads(json_result) if isinstance(json_result, str) else json_result
        }
        safe_append_jsonl(out_path, record)
        print(f"Saved: {img_name}")
        return record
    except Exception as e:
        print(f"Error for {img_name}: {e}")
        return None

async def run_eval(triples, out_path: str):
    sem = asyncio.Semaphore(CONCURRENT)
    start_time = time.time()
    completed = 0
    results = []

    total = len(triples)
    for i, triple in enumerate(triples, start=1):
        async with sem:
            # enforce per-minute limit
            elapsed = time.time() - start_time
            if completed >= RPM and elapsed < 60:
                await asyncio.sleep(60 - elapsed)
                completed = 0
                start_time = time.time()
            completed += 1
            rec = await process_triple(triple, out_path)
            if rec:
                results.append(rec)
            # verbose progress
            print(f"[{i}/{total}] Processed image: {triple[0]} | Total saved: {len(results)}")
            if len(results) >= RPD:
                print("Hit daily cap, stopping for today.")
                break
    return results

# --- Step 5: Load triples ---
def load_triples(prompt_csv: str, meta_csv: str, image_folder: str):
    df_prompts = pd.read_csv(prompt_csv)
    df_meta = pd.read_csv(meta_csv)
    triples = []
    for _, row in df_meta.iterrows():
        img_name = str(row["image_name"]).strip()
        prompt = row["Prompts"]
        meta_caption = row.get("Meta Caption", "")
        if not isinstance(meta_caption, str) or meta_caption.strip() == "":
            continue
        # find image
        img_path = None
        for ext in [".png", ".jpg", ".jpeg"]:
            candidate = Path(image_folder) / f"{img_name}{ext}"
            if candidate.exists():
                img_path = candidate
                break
        if img_path is None:
            continue
        triples.append((img_name, prompt, meta_caption, img_path))
    return triples

# --- Step 6: Run for SD2 model ---
base = "/content/drive/MyDrive/gveval"
prompt_csv = f"{base}/DrawBenchPrompts.csv"
meta_csv = f"{base}/meta_captions_sd2.csv"
image_folder = f"{base}/sd2"
out_file = f"{base}/results_sd2_gveval.jsonl"

# Prepare data
triples = load_triples(prompt_csv, meta_csv, image_folder)
print(f"Loaded {len(triples)} triples.")

# Load already processed IDs and **filter triples before sending to GPT**
done_ids = set()
if Path(out_file).exists():
    with open(out_file, "r", encoding="utf-8") as f:
        for line in f:
            try:
                rec = json.loads(line)
                done_ids.add(rec["image_name"])
            except:
                pass
print(f"Already done: {len(done_ids)}")

# Filter out already processed triples
triples_to_process = [t for t in triples if t[0] not in done_ids]
print(f"Remaining images to process: {len(triples_to_process)}")

# Run evaluation (Colab-safe)
loop = asyncio.get_event_loop()
results = loop.run_until_complete(run_eval(triples_to_process, out_file))
print("Evaluation completed.")


Loaded 200 triples.
Already done: 20
Remaining images to process: 180
Saved: 20
[1/180] Processed image: 20 | Total saved: 1
Saved: 21
[2/180] Processed image: 21 | Total saved: 2
Saved: 22
[3/180] Processed image: 22 | Total saved: 3
Saved: 23
[4/180] Processed image: 23 | Total saved: 4
Saved: 24
[5/180] Processed image: 24 | Total saved: 5
Saved: 25
[6/180] Processed image: 25 | Total saved: 6
Saved: 26
[7/180] Processed image: 26 | Total saved: 7
Saved: 27
[8/180] Processed image: 27 | Total saved: 8
Saved: 28
[9/180] Processed image: 28 | Total saved: 9
Saved: 29
[10/180] Processed image: 29 | Total saved: 10
Saved: 30
[11/180] Processed image: 30 | Total saved: 11
