<a href="https://colab.research.google.com/github/PragyaKumari2203/Image-Captioning-Model/blob/main/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torchvision import models, transforms
from PIL import Image
import os

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:

def get_image_transforms():
    return transforms.Compose([
        transforms.Resize(456),                # Resize shorter side to 456 pixels
        transforms.CenterCrop(380),            # Crop center 380x380 region
        transforms.ToTensor(),                 # Convert to tensor (0-1 range)
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],        # ImageNet mean
            std=[0.229, 0.224, 0.225]          # ImageNet standard deviation
        )
    ])

def extract_features(image_dir, batch_size=32, device='cuda'):

    model = models.efficientnet_b4(weights='DEFAULT')

    model = torch.nn.Sequential(*list(model.children())[:-1])
    model = model.to(device).eval()

    transform = get_image_transforms()

    image_files = [f for f in os.listdir(image_dir)
                  if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    features = {}

    for i in range(0, len(image_files), batch_size):
        batch_files = image_files[i:i+batch_size]
        batch_images = []

        for file in batch_files:

            try:
                img = Image.open(os.path.join(image_dir, file)).convert('RGB')
                batch_images.append(transform(img))
            except Exception as e:
                print(f"Skipping {file}: {e}")
                continue

        if not batch_images:
            continue

        batch_tensor = torch.stack(batch_images).to(device)

        with torch.no_grad():
            batch_features = model(batch_tensor)
            batch_features = batch_features.squeeze()


        for file, feat in zip(batch_files, batch_features):
            features[file] = feat.cpu()

    return features




features = extract_features(
        "/content/drive/MyDrive/Image Captioning Model Dataset/Flickr8k_Dataset/Flicker8k_Dataset",
        batch_size=64,
        device='cuda' if torch.cuda.is_available() else 'cpu'
    )


In [7]:
print(f"Number of images processed: {len(features)}")
first_filename = list(features.keys())[0]
first_features = features[first_filename]
print(f"\nFeature details for {first_filename}:")
print(f"Shape: {first_features.shape}")
print(f"Data type: {first_features.dtype}")

print(f"Values:\n{first_features[:]}")


Number of images processed: 8091

Feature details for 3361990489_92244a58ef.jpg:
Shape: torch.Size([1792])
Data type: torch.float32
Values:
tensor([-0.1170, -0.0843, -0.0572,  ...,  0.0172, -0.1116, -0.0325])
