In [None]:
import pandas as pd

In [None]:
curated_path = "/kaggle/input/curated-data-abo/finl/"

In [None]:
pip install transformers torch pandas scikit-learn nltk

In [None]:
import nltk
nltk.download('punkt')

In [None]:
import pandas as pd
import torch
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import re
import os
from tqdm import tqdm

# ========== Load BLIP-2 Model ==========
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b", use_fast=True)
model = Blip2ForConditionalGeneration.from_pretrained(
    "Salesforce/blip2-opt-2.7b",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
)
model.to(device)

In [None]:
import pandas as pd
vqapath = "/kaggle/input/vqa23left/vqa23.csv"
df = pd.read_csv(vqapath)

In [None]:
df.info()

In [None]:
BATCH_SIZE = 32
SAVE_EVERY = 32  # Save every N examples
SAVE_PATH = "qa_predictions_with_eval.csv"


In [None]:
if os.path.exists(SAVE_PATH):
    result_df = pd.read_csv(SAVE_PATH)
    start_idx = len(result_df)
    print(f"🔁 Resuming from index {start_idx}")
else:
    result_df = pd.DataFrame(columns=df.columns.tolist() + ['predicted_answer', 'exact_match', 'f1_score'])
    start_idx = 0

In [None]:
def normalize_number_words(text):
    num_map = {
        "zero": "0", "one": "1", "two": "2", "three": "3", "four": "4",
        "five": "5", "six": "6", "seven": "7", "eight": "8", "nine": "9",
        "ten": "10"
    }
    words = text.lower().split()
    converted = [num_map.get(word, word) for word in words]
    return " ".join(converted)

def normalize_text(text):
    text = normalize_number_words(text)
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]", "", text)
    return text.strip()

def compute_f1(pred, truth):
    pred_tokens = pred.split()
    truth_tokens = truth.split()
    common = set(pred_tokens) & set(truth_tokens)
    if not common:
        return 0.0
    precision = len(common) / len(pred_tokens)
    recall = len(common) / len(truth_tokens)
    return 2 * (precision * recall) / (precision + recall)

In [None]:
import os
import re
from PIL import Image
import pandas as pd
from tqdm import tqdm

# Config
image_folder = "/kaggle/input/curated-data-abo/finl"
BATCH_SIZE = 128  # Adjust based on GPU
SAVE_PATH = "result1.csv"
start_idx = 0

# Initialize results
all_results = []

for batch_start in tqdm(range(start_idx, len(df), BATCH_SIZE)):
    batch_end = min(batch_start + BATCH_SIZE, len(df))
    batch = df.iloc[batch_start:batch_end]

    batch_results = []

    for idx, row in batch.iterrows():
        image_path = os.path.join(image_folder, row['image_path'])
        question = str(row['question'])
        gt_answer = str(row['answer'])

        if not os.path.exists(image_path):
            pred_answer = "image_not_found"
            em = 0
            f1 = 0.0
        else:
            try:
                image = Image.open(image_path).convert("RGB")
                prompt = f"Question: {question} Answer:"
                inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)

                output = model.generate(**inputs, max_new_tokens=20, num_beams=5, early_stopping=True)
                full_output = processor.decode(output[0], skip_special_tokens=True).strip()

                # Extract answer
                answer_only = re.sub(r"(?i)question:.*?answer: ?", "", full_output).strip()
                answer_only = answer_only.split(",")[0].split(".")[0]
                pred_answer = answer_only.strip().split()[0] if answer_only.strip() else "unknown"

                # Normalize and score
                pred_norm = normalize_text(pred_answer)
                gt_norm = normalize_text(gt_answer)
                em = int(pred_norm == gt_norm)
                f1 = compute_f1(pred_norm, gt_norm)

            except Exception as e:
                print(f"❌ Error at index {idx}: {e}")
                pred_answer = "error"
                em = 0
                f1 = 0.0

        batch_results.append({
            "image_path": row['image_path'],
            "question": question,
            "answer": gt_answer,
            "predicted_answer": pred_answer,
            "exact_match": em,
            "f1_score": f1
        })

    all_results.extend(batch_results)

    # Save to disk every batch
    pd.DataFrame(all_results).to_csv(SAVE_PATH, index=False)
    print(f"💾 Saved up to index {batch_end}")
