## Introduction

In this notebook, we will generate embeddings for the images from a EfficientVIT model whose weights were trained on the ImageNet-1K dataset.

In [1]:
import numpy as np
import pandas as pd
import torch
import timm
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from tqdm import tqdm
from pathlib import Path

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Loading the data

In [2]:
path = Path('/kaggle/input/planttraits2024/')
imgs_train = path / 'train_images'
imgs_test = path / 'test_images'

train = pd.read_csv(path / 'train.csv')
train['img_path'] = train['id'].apply(lambda x: imgs_train / f'{x}.jpeg')

test = pd.read_csv(path / 'test.csv')
test['img_path'] = test['id'].apply(lambda x: imgs_test / f'{x}.jpeg')

## Load model and transforms

In [3]:
model = timm.create_model('efficientvit_b0.r224_in1k', pretrained=True, num_classes=0).eval().to(device)
transforms = timm.data.create_transform(
    **timm.data.resolve_data_config(pretrained_cfg=model.pretrained_cfg)
)

model.safetensors:   0%|          | 0.00/13.7M [00:00<?, ?B/s]

## Image datasets

In [4]:
class CustomDataset(Dataset):
    def __init__(self, img_paths, transform=None):
        self.img_paths = img_paths
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img
    

train_dataset = CustomDataset(train['img_path'].astype(str).values, transform=transforms)
test_dataset = CustomDataset(test['img_path'].astype(str).values, transform=transforms)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

## Generate embeddings

In [5]:
train_embeddings = []
with torch.no_grad():
    for img in tqdm(train_loader):
        img = img.to(device)
        output = model(img)
        train_embeddings.append(output.cpu().numpy())
        
train_embeddings = np.concatenate(train_embeddings, axis=0)

100%|██████████| 55/55 [12:12<00:00, 13.32s/it]


In [6]:
test_embeddings = []
with torch.no_grad():
    for img in tqdm(test_loader):
        img = img.to(device)
        output = model(img)
        test_embeddings.append(output.cpu().numpy())
        
test_embeddings = np.concatenate(test_embeddings, axis=0)

100%|██████████| 7/7 [01:21<00:00, 11.59s/it]


In [7]:
train_embeddings = pd.DataFrame(
    train_embeddings,
    columns = [f'emb_{i}' for i in range(train_embeddings.shape[1])],
    index = train['id']
).reset_index()
train_embeddings.to_parquet('train_embeddings.parquet', index=False)

In [8]:
test_embeddings = pd.DataFrame(
    test_embeddings,
    columns = [f'emb_{i}' for i in range(test_embeddings.shape[1])],
    index = test['id']
).reset_index()
test_embeddings.to_parquet('test_embeddings.parquet', index=False)