# Jaguar Re-Identification

## Score: .346

In [1]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

Device: cuda


In [2]:
# =============================================================================
# CONFIG
# =============================================================================
class CFG:
    data_dir = Path('jaguar-re-id')
    test_csv = data_dir / 'test.csv'
    test_dir = data_dir / 'test' / 'test'
    
    backbone = 'hf-hub:BVRA/MegaDescriptor-L-384'
    image_size = 384
    batch_size = 8
    use_tta = True

In [3]:
# =============================================================================
# DATA
# =============================================================================
test_df = pd.read_csv(CFG.test_csv)
unique_images = sorted(set(test_df['query_image']) | set(test_df['gallery_image']))
print(f"Test pairs: {len(test_df)} | Unique images: {len(unique_images)}")

Test pairs: 137270 | Unique images: 371


In [4]:
# =============================================================================
# DATASET
# =============================================================================
def get_transforms(flip=False):
    t = [A.LongestMaxSize(max_size=CFG.image_size),
         A.PadIfNeeded(CFG.image_size, CFG.image_size, border_mode=0)]
    if flip:
        t.append(A.HorizontalFlip(p=1.0))
    t.extend([A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2()])
    return A.Compose(t)

class TestDataset(Dataset):
    def __init__(self, image_files, img_dir, transform):
        self.image_files = image_files
        self.img_dir = Path(img_dir)
        self.transform = transform
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        fname = self.image_files[idx]
        img = np.array(Image.open(self.img_dir / fname).convert('RGB'))
        return self.transform(image=img)['image'], fname

In [5]:
# =============================================================================
# MODEL
# =============================================================================
print(f"Loading {CFG.backbone}...")
model = timm.create_model(CFG.backbone, pretrained=True, num_classes=0).to(device).eval()
print(f"Embedding: {model.num_features} | Params: {sum(p.numel() for p in model.parameters()):,}")

Loading hf-hub:BVRA/MegaDescriptor-L-384...
Embedding: 1536 | Params: 195,198,516


In [6]:
# =============================================================================
# INFERENCE
# =============================================================================
@torch.no_grad()
def extract_embeddings(transform):
    loader = DataLoader(TestDataset(unique_images, CFG.test_dir, transform), 
                        batch_size=CFG.batch_size, shuffle=False, num_workers=0)
    emb_dict = {}
    for images, fnames in tqdm(loader, desc='Extracting'):
        emb = F.normalize(model(images.to(device)), p=2, dim=1)
        for f, e in zip(fnames, emb):
            emb_dict[f] = e.cpu().numpy()
    return emb_dict

if CFG.use_tta:
    print("TTA: original + flip")
    emb_orig = extract_embeddings(get_transforms(flip=False))
    emb_flip = extract_embeddings(get_transforms(flip=True))
    embeddings = {f: (emb_orig[f] + emb_flip[f]) / np.linalg.norm(emb_orig[f] + emb_flip[f]) for f in unique_images}
else:
    embeddings = extract_embeddings(get_transforms(flip=False))

TTA: original + flip


Extracting:   0%|          | 0/47 [00:00<?, ?it/s]

Extracting:   0%|          | 0/47 [00:00<?, ?it/s]

In [7]:
# =============================================================================
# SUBMISSION
# =============================================================================
print("Computing similarities...")
sims = [np.dot(embeddings[r['query_image']], embeddings[r['gallery_image']]) for _, r in tqdm(test_df.iterrows(), total=len(test_df))]
sims = [(s + 1) / 2 for s in sims]

submission = pd.DataFrame({'row_id': test_df['row_id'], 'similarity': sims})
submission.to_csv('submission.csv', index=False)
print(f"Saved | Mean: {submission['similarity'].mean():.4f} | Std: {submission['similarity'].std():.4f}")

Computing similarities...


  0%|          | 0/137270 [00:00<?, ?it/s]

Saved | Mean: 0.5826 | Std: 0.0638
