In [1]:
!pip install git+https://github.com/facebookresearch/ImageBind.git
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/facebookresearch/ImageBind.git
  Cloning https://github.com/facebookresearch/ImageBind.git to /tmp/pip-req-build-igppdcgb
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/ImageBind.git /tmp/pip-req-build-igppdcgb
  Resolved https://github.com/facebookresearch/ImageBind.git to commit 3fcf5c9039de97f6ff5528ee4a9dce903c5979b3
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting pytorchvideo@ git+https://github.com/facebookresearch/pytorchvideo.git@28fe037d212663c6a24f373b94cc5d478c8c1a1d (from imagebind==0.1.0)
  Cloning https://github.com/facebookresearch/pytorchvideo.git (to revision 28fe037d212663c6a24f373b94cc5d478c8c1a1d) to /tmp/pip-install-h99zb212/pytorchvideo_74a3d0ef3fdf45af84c6e0dcdab10e8c
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/pytorchvideo.git /tmp/pip-install-h99zb212/pytorchvideo_74a3d0ef3fdf45af84c6e0dcdab10e8c
  Running command git rev-

In [3]:
import torch
import numpy as np
from PIL import Image
import pandas as pd
from torchvision import transforms
import os

# Import ImageBind modules
from imagebind.models import imagebind_model
from imagebind.models.imagebind_model import ModalityType

# Load your DataFrame with image paths
df = pd.read_csv("/output/6_cleaned_use_for_CRL.csv")

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load pretrained ImageBind model
model = imagebind_model.imagebind_huge(pretrained=True)
model.eval()
model.to(device)

# Define image preprocessing
transform = transforms.Compose([
    transforms.Resize(224),  # ImageBind expects 224x224 images
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.48145466, 0.4578275, 0.40821073],
        std=[0.26862954, 0.26130258, 0.27577711]
    )
])

# Function to extract embeddings in batches
def extract_embeddings_batch(image_paths, batch_size=32):
    """Extract embeddings for multiple images in batches"""
    # Create a numpy array to store all embeddings (n×1024)
    total_images = len(image_paths)
    embedding_dim = 1024  # ImageBind's default embedding dimension
    all_embeddings = np.zeros((total_images, embedding_dim), dtype=np.float32)
    valid_indices = []  # Track which images were successfully processed
    
    # Process images in batches
    for i in range(0, total_images, batch_size):
        batch_paths = image_paths[i:i+batch_size]
        batch_tensors = []
        batch_indices = []  # Original indices of valid images in this batch
        
        for j, path in enumerate(batch_paths):
            try:
                # Check if file exists
                if not os.path.exists(path):
                    print(f"Warning: File not found: {path}")
                    continue
                    
                image = Image.open(path).convert('RGB')
                image_tensor = transform(image).unsqueeze(0)
                batch_tensors.append(image_tensor)
                batch_indices.append(i + j)  # Store original index
            except Exception as e:
                print(f"Error processing {path}: {e}")
                continue
        
        if not batch_tensors:
            continue
            
        # Stack all processed images into a batch
        batch = torch.cat(batch_tensors, dim=0).to(device)
        
        # Prepare input for ImageBind
        inputs = {ModalityType.VISION: batch}
        
        # Extract embeddings
        with torch.no_grad():
            embeddings = model(inputs)
        
        # Get embedding vectors
        batch_embeddings = embeddings[ModalityType.VISION].cpu().numpy()
        
        # Insert embeddings into the correct positions in the all_embeddings array
        for idx, orig_idx in enumerate(batch_indices):
            all_embeddings[orig_idx] = batch_embeddings[idx]
            valid_indices.append(orig_idx)
        
        print(f"Processed batch {i//batch_size + 1}/{(total_images-1)//batch_size + 1} ({len(batch_indices)} images)")
    
    # Get only the embeddings for valid images
    valid_indices = sorted(valid_indices)
    valid_embeddings = all_embeddings[valid_indices]
    
    # Create a mapping from original indices to row indices in the valid embeddings matrix
    index_mapping = {orig_idx: new_idx for new_idx, orig_idx in enumerate(valid_indices)}
    
    return valid_embeddings, valid_indices, index_mapping

# Extract full path for each image
image_paths = []
for index, row in df.iterrows():
    image_path = "/athlete/" + row['image_path']
    image_paths.append(image_path)

print(f"Total images to process: {len(image_paths)}")

# Extract embeddings
embeddings_matrix, valid_indices, index_mapping = extract_embeddings_batch(image_paths, batch_size=32)

# Create a record of which images were successfully processed
valid_images = [image_paths[i] for i in valid_indices]
valid_relative_paths = [df.iloc[i]['image_path'] for i in valid_indices]

# Save the embeddings matrix (n×h format)
output_path = "dataset/athlete_3m/X1_imagebind_embeddings.npy"
np.save(output_path, embeddings_matrix)
print("X1_imagebind_embeddings shape:", embeddings_matrix.shape)


Using device: cuda
Downloading imagebind weights to .checkpoints/imagebind_huge.pth ...


100%|██████████| 4.47G/4.47G [03:24<00:00, 23.5MB/s]


Total images to process: 5778
Processed batch 1/181 (32 images)
Processed batch 2/181 (32 images)
Processed batch 3/181 (32 images)
Processed batch 4/181 (32 images)
Processed batch 5/181 (32 images)
Processed batch 6/181 (32 images)
Processed batch 7/181 (32 images)
Processed batch 8/181 (32 images)
Processed batch 9/181 (32 images)
Processed batch 10/181 (32 images)
Processed batch 11/181 (32 images)
Processed batch 12/181 (32 images)
Processed batch 13/181 (32 images)
Processed batch 14/181 (32 images)
Processed batch 15/181 (32 images)
Processed batch 16/181 (32 images)
Processed batch 17/181 (32 images)
Processed batch 18/181 (32 images)
Processed batch 19/181 (32 images)
Processed batch 20/181 (32 images)
Processed batch 21/181 (32 images)
Processed batch 22/181 (32 images)
Processed batch 23/181 (32 images)
Processed batch 24/181 (32 images)
Processed batch 25/181 (32 images)
Processed batch 26/181 (32 images)
Processed batch 27/181 (32 images)
Processed batch 28/181 (32 images)