In [None]:
pip install transformers timm fairscale

In [None]:
import sys
sys.path.append('/shared/home/mis6559/neurobio240/BLIP')

In [None]:
from BLIP.models.blip import blip_decoder

In [None]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
from torchvision import transforms
from models.blip import blip_decoder
from nsd_access import NSDAccess

# Parameters
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
image_size = 240
n_images = 73000

# Load BLIP model
print("Loading BLIP model...")
model_url = "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_capfilt_large.pth"
model = blip_decoder(
    pretrained=model_url,
    image_size=image_size,
    vit="base",
    med_config="/shared/home/mis6559/neurobio240/BLIP/configs/med_config.json"
)
model.eval()
model = model.to(device)

# Load NSD access
print("Loading NSD image dataset...")
nsd_path = "/shared/home/mis6559/neurobio240/nsd/"
nsda = NSDAccess(nsd_path)

# Output directory
outdir = "/shared/home/mis6559/neurobio240/nsdfeat/blip/"
os.makedirs(outdir, exist_ok=True)

# Process and save
print("Extracting BLIP visual features...")
for idx in tqdm(range(n_images)):
    out_path = os.path.join(outdir, f"{idx:06}.npy")
    if os.path.exists(out_path):
        continue

    try:
        img_arr = nsda.read_images(idx)
        image = Image.fromarray(img_arr).convert("RGB")
        image = image.resize((image_size, image_size), resample=Image.LANCZOS)
        img_tensor = transforms.ToTensor()(image).unsqueeze(0).to(device)

        with torch.no_grad():
            features = model.visual_encoder(img_tensor)
            features = features.squeeze().cpu().numpy()

        np.save(out_path, features)

    except Exception as e:
        print(f"Skipping image {idx}: {e}")