In [11]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import os
import numpy as np
from PIL import Image
from tqdm import tqdm  # Import tqdm for the progress bar

# Define the transformation to preprocess images
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Define a custom dataset class
class ImageDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_paths = [os.path.join(image_dir, fname) for fname in os.listdir(image_dir)]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, img_path

# Check if GPU is available and use it if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the pre-trained MobileNet model (excluding the top layer)
model = models.mobilenet_v2(pretrained=True)
model = model.features  # Only use the feature extractor (no classifier)

# Move the model to the GPU (if available)
model = model.to(device)

# Set the model to evaluation mode
model.eval()

# Function to generate embeddings for a batch of images
def generate_embeddings(image_dir):
    # Create the dataset and dataloader
    dataset = ImageDataset(image_dir, transform=preprocess)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

    embeddings = []
    image_paths = []

    # Extract embeddings
    with torch.no_grad():  # Disable gradient calculation (faster inference)
        for imgs, paths in tqdm(dataloader, desc="Processing Images", unit="batch"):
            # Move images to the GPU (if available)
            imgs = imgs.to(device)

            # Pass the images through the MobileNet feature extractor
            features = model(imgs)

            # Global average pooling
            pooled_features = features.mean([2, 3])  # (batch_size, 1280)

            embeddings.append(pooled_features)
            image_paths.extend(paths)

    embeddings = torch.cat(embeddings, dim=0)  # Concatenate all embeddings
    return embeddings.cpu().numpy(), image_paths  # Move embeddings back to CPU and convert to NumPy

# Function to save embeddings to a file
def save_embeddings(embeddings, image_paths, filename="embeddings.npy"):
    np.save(filename, embeddings)  # Save embeddings as a .npy file
    with open("image_paths.txt", "w") as f:
        for path in image_paths:
            f.write(f"{path}\n")  # Save image paths in a separate text file




Using device: cuda


In [12]:
# Example usage
image_directory = 'BackgroundRemoved'  # Directory where your images are stored
embeddings, image_paths = generate_embeddings(image_directory)

# Save embeddings and image paths
save_embeddings(embeddings, image_paths)

print(f"Embeddings saved to 'embeddings.npy' and image paths to 'image_paths.txt'")

Processing Images: 100%|██████████████████████████████████████████████████████████| 277/277 [01:02<00:00,  4.40batch/s]


Embeddings saved to 'embeddings.npy' and image paths to 'image_paths.txt'
