### Installing and Importing necessary libraries

In [None]:
!pip install transformers accelerate git+https://github.com/salesforce/BLIP.git -q
!pip install torch torchvision -q


In [None]:
from transformers import BlipProcessor, BlipForQuestionAnswering
from PIL import Image
import torch

### Loading the BLIP VQA model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load processor and model (BLIP base VQA)
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
processor.tokenizer = processor.tokenizer.__class__.from_pretrained(
    processor.tokenizer.name_or_path, use_fast=True
)

model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device)

In [None]:

# 🔍 Test with one image
image_path = "/content/abo-images-small/abo-images-small/00/00000529.jpg"
question = "How many wheels does the object have? Answer in a ONE WORD ONLY"

image = Image.open(image_path).convert("RGB")
inputs = processor(images=image, text=question, return_tensors="pt").to(device, torch.float16)

out = model.generate(**inputs)
answer = processor.decode(out[0], skip_special_tokens=True)

print(f"Q: {question}\nA: {answer}")

### Loading the Data

In [None]:
import zipfile
import os

def extract_zip(zip_file_path, extract_to_path='.'):
  try:
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
      zip_ref.extractall(extract_to_path)
    print(f"Successfully extracted '{zip_file_path}' to '{extract_to_path}'")
  except FileNotFoundError:
    print(f"Error: Zip file not found at '{zip_file_path}'")
  except zipfile.BadZipFile:
    print(f"Error: '{zip_file_path}' is not a valid zip file.")
  except Exception as e:
    print(f"An error occurred during extraction: {e}")

# Example usage:
zip_file = '/content/abo-images-small.zip'  # Replace with the actual path to your zip file
extraction_directory = '/content/abo-images-small' # Replace with your desired extraction directory

# Create the extraction directory if it doesn't exist
os.makedirs(extraction_directory, exist_ok=True)

extract_zip(zip_file, extraction_directory)

In [None]:
import pandas as pd
import re

# Load your CSV
df = pd.read_csv("/content/proper_result_2_final.csv")

# Add constraint to each question
df['question'] = df['question'].apply(lambda q: f"Answer in one word: {q.strip()}")

# Save back or to a new CSV
df.to_csv("test_1_oneword.csv", index=False)
print("Updated CSV saved as test_1_oneword.csv with 'Answer in one word' prompt.")


In [None]:
df.shape

###  Running BLIP Model on Each Image-Question Pair and generating results

In [None]:
from tqdm import tqdm
import os
import pandas as pd

# Load dataset
df = pd.read_csv("test_1_oneword.csv")

# 👇 Set your row range here
start_idx = 25002
end_idx = 45002  # exclusive, so rows 0 to 19
subset_df = df.iloc[start_idx:end_idx]
results = []

for idx, row in tqdm(subset_df.iterrows(), total=len(subset_df), desc="Running BLIP on selected rows"):
    image_path = os.path.join("/content/abo-images-small/abo-images-small", row['path'])  # change if needed
    if not os.path.exists(image_path):
        results.append("image_not_found")
        continue

    try:
        image = Image.open(image_path).convert("RGB")
        question = row["question"]
        inputs = processor(image, question, return_tensors="pt").to(device)
        out = model.generate(**inputs)
        decoded = processor.decode(out[0], skip_special_tokens=True)
        answer = re.sub(r"[^\w]", "", decoded.strip().split()[0])  # removes punctuation like '.' or ',' etc.
    except Exception as e:
        answer = f"error: {str(e)}"

    results.append(answer)

# Save result
subset_df['blip_answer'] = results
subset_df.to_csv(f"blip_vqa_results_{start_idx}_{end_idx}.csv", index=False)



In [None]:
import pandas as pd
import re

# Load the CSV
df = pd.read_csv("/content/blip_vqa_results_15002_25002 (1).csv")  # change to your actual file if different

# Clean the blip_answer column
def clean_answer(ans):
    if pd.isna(ans):
        return ""
    ans = ans.strip()
    first_word = ans.split()[0]
    return re.sub(r"[^\w]", "", first_word)  # remove punctuation

df["blip_answer"] = df["blip_answer"].apply(clean_answer)

# Save cleaned results
df.to_csv("blip_vqa_results_2_cleaned.csv", index=False)
print("✅ Cleaned file saved as blip_vqa_results_2_cleaned.csv")


In [None]:
df.shape