Load images with Pillow

In [2]:
import os
from PIL import Image

# Open test.txt and read the lines
with open(os.path.join(os.pardir, '2_test.txt'), 'r') as file:
  test_items = file.read().splitlines()

images = []

for item in test_items:
  image_path = os.path.join(os.pardir,'dataset', 'ArtDL', 'JPEGImages', f"{item}.jpg")
  try:
    image = Image.open(image_path)
    images.append(image)
  except Exception as e:
    print(f"Error loading image {image_path}: {e}")

print(f"Loaded {len(images)} images")


Loaded 1864 images


Test CLIP with these models:

* openai/clip-vit-base-patch32
* openai/clip-vit-base-patch16
* openai/clip-vit-large-patch14


Process the images and see their probability against classes.
Use small batches (16 images)

In [3]:
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
from tqdm import tqdm
import pandas as pd
import torch
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["PYDEVD_DISABLE_FILE_VALIDATION"] = "true"

model_name = "clip-vit-base-patch32"
#model_name = "clip-vit-base-patch16"
#model_name = "clip-vit-large-patch14"

print(f"Number of images: {len(images)}")

# Load the model and processor
processor = AutoProcessor.from_pretrained(f'openai/{model_name}')
model = AutoModelForZeroShotImageClassification.from_pretrained(f'openai/{model_name}')

# Load classes
with open(os.path.join(os.pardir, 'classes_desc.txt'), 'r') as f:
  classes = [tuple(line.strip().split(',')) for line in f]

# Break images into smaller batches
batch_size = 16
images_batches = [images[i:i + batch_size] for i in range(0, len(images), batch_size)]

all_probs = []
with tqdm(total=len(images), desc="Processing Images", unit="image") as pbar:
    for batch_index, batch in enumerate(images_batches):
        try:
            # Process the batch
            inputs = processor(text=[cls[1] for cls in classes], images=batch, return_tensors="pt", padding=True)
            outputs = model(**inputs)
            
            # Get probabilities for the batch
            logits_per_image = outputs.logits_per_image  
            batch_probs = logits_per_image.softmax(dim=1)
            all_probs.append(batch_probs.detach())
            
            pbar.update(len(batch))
        except Exception as e:
            print(f"Error processing batch {batch_index + 1}: {e}")
            pbar.update(len(batch))

# Get one tensor with all the probabilities
all_probs = torch.cat(all_probs, dim=0)
print(f"Probabilities shape: {all_probs.shape}")

# Convert all_probs to a DataFrame and store it as a CSV file
all_probs_df = pd.DataFrame(all_probs.numpy())
torch.save(all_probs, os.path.join(os.curdir, model_name, 'probs.pt'))

  from .autonotebook import tqdm as notebook_tqdm


Number of images: 1864


Processing Images: 100%|██████████| 1864/1864 [00:36<00:00, 50.61image/s]

Probabilities shape: torch.Size([1864, 10])



