In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import timm
from torchvision import transforms as T
from tqdm import tqdm
import gc

class CFG:
    # General
    num_workers = 4
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Data paths
    data_dir = './'
    labels_csv_path = os.path.join(data_dir, 'labels.csv')
    sample_submission_path = os.path.join(data_dir, 'sample_submission.csv')
    test_img_dir = os.path.join(data_dir, 'test')
    # MODIFIED: Use final robust thresholds from expert advice
    thresholds_path = 'thresholds_final.npy'

    # Model
    model_name = 'tf_efficientnet_b4_ns'
    img_size = 384
    model_paths = [
        'models/tf_efficientnet_b4_ns_fold0_best.pth',
        'models/tf_efficientnet_b4_ns_fold1_best.pth',
        'models/tf_efficientnet_b4_ns_fold2_best.pth'
    ]

    # Inference
    # Using a very safe batch size as per expert advice
    batch_size = 8

# Load label mappings
labels_df = pd.read_csv(CFG.labels_csv_path)
CFG.attr_ids = labels_df['attribute_id'].values
CFG.attr_id_to_idx = {attr_id: i for i, attr_id in enumerate(CFG.attr_ids)}
CFG.idx_to_attr_id = {i: attr_id for i, attr_id in enumerate(CFG.attr_ids)}
CFG.num_classes = len(labels_df)

# Clean up memory
torch.cuda.empty_cache()
gc.collect()

0

In [2]:
def get_test_transforms():
    # This uses aspect-ratio preserving resize, center crop, and ImageNet normalization.
    # TTA is disabled as per expert advice for the final run to save time and reduce complexity.
    print("--- Applying CORRECTED validation transforms (Resize+CenterCrop, ImageNet Norm) ---")
    
    return T.Compose([
        T.Resize(CFG.img_size), # Preserves aspect ratio
        T.CenterCrop(CFG.img_size),
        T.ToTensor(),
        T.Normalize(
            mean=[0.485, 0.456, 0.406], # ImageNet stats
            std=[0.229, 0.224, 0.225],
        ),
    ])

class iMetTestDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.filepaths = df['filepath'].values
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        filepath = self.filepaths[idx]
        image = Image.open(filepath).convert('RGB')
        
        if self.transforms:
            image = self.transforms(image)
            
        return image

class iMetModel(nn.Module):
    def __init__(self, model_name, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, num_classes=CFG.num_classes)

    def forward(self, x):
        return self.model(x)

In [3]:
## Prepare Test Data
sub_df = pd.read_csv(CFG.sample_submission_path)
sub_df['filepath'] = sub_df['id'].apply(lambda x: os.path.join(CFG.test_img_dir, x + '.png'))
display(sub_df.head())

# Create the test dataset and loader (NO TTA)
test_dataset = iMetTestDataset(sub_df, transforms=get_test_transforms())
test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers)

print("Test data prepared.")

Unnamed: 0,id,attribute_ids,filepath
0,347c119163f84420f10f7a8126c1b8a2,0 1 2,./test/347c119163f84420f10f7a8126c1b8a2.png
1,98c91458324cba5415c5f5d8ead68328,0 1 2,./test/98c91458324cba5415c5f5d8ead68328.png
2,3f75d332f579af62ff88d369c0736c76,0 1 2,./test/3f75d332f579af62ff88d369c0736c76.png
3,3fa35a29218b7449c8f03e2a368a708d,0 1 2,./test/3fa35a29218b7449c8f03e2a368a708d.png
4,c848b91558e4edd8034cb7d334b4e448,0 1 2,./test/c848b91558e4edd8034cb7d334b4e448.png


--- Applying CORRECTED validation transforms (Resize+CenterCrop, ImageNet Norm) ---
Test data prepared.


In [5]:
## Inference (Ensemble, NO TTA)

# Pre-allocate array for summed predictions for memory efficiency
n_samples = len(sub_df)
total_preds = np.zeros((n_samples, CFG.num_classes), dtype=np.float32)

for i, model_path in enumerate(CFG.model_paths):
    print(f"--- Inferencing with model {i+1}/{len(CFG.model_paths)}: {model_path} ---")
    
    # FIX for OOM: Create model on CPU, load weights, THEN move to GPU.
    model = iMetModel(CFG.model_name, pretrained=False) # 1. Create on CPU
    state_dict = torch.load(model_path, map_location='cpu', weights_only=True) # 2. Load weights to CPU
    model.load_state_dict(state_dict)
    model.to(CFG.device) # 3. Move fully loaded model to GPU
    model.eval()

    pbar = tqdm(test_loader, desc=f"Predicting (Model {i+1})")
    current_pos = 0
    with torch.no_grad():
        for images in pbar:
            images = images.to(CFG.device)
            logits = model(images)
            preds = logits.sigmoid().cpu().numpy()
            
            batch_size = images.size(0)
            total_preds[current_pos : current_pos + batch_size] += preds
            current_pos += batch_size
            
    # Clean up memory after each model
    del model, state_dict
    torch.cuda.empty_cache()
    gc.collect()

# Average the predictions (3 models)
all_preds = total_preds / len(CFG.model_paths)
print("\nEnsemble predictions calculated.")

In [4]:
## Create Submission with Guarded, Robust Thresholds

# Load the final thresholds
thresholds = np.load(CFG.thresholds_path)
print(f"Loaded final thresholds from: {CFG.thresholds_path}")
print(f"Thresholds shape: {thresholds.shape}")

# Define the guarded apply_thresholds function from expert advice
def apply_thresholds_guarded(probs, th):
    y = (probs > th).astype(np.uint8)
    # At-least-one fallback
    empty = y.sum(axis=1) == 0
    if np.any(empty):
        idx = probs[empty].argmax(axis=1)
        rows = np.where(empty)[0]
        y[rows, idx] = 1
    # Optional: cap max positives per image
    max_k = 3
    row_sum = y.sum(axis=1)
    too_many = np.where(row_sum > max_k)[0]
    if too_many.size:
        # This loop is slow, but for the final submission it's acceptable for correctness
        for r in tqdm(too_many, desc="Applying max-k cap"):
            # Find the indices of the top-k probabilities for this row
            topk_indices = np.argpartition(-probs[r], max_k)[:max_k]
            # Create a new row of zeros and set only the top-k predictions to 1
            new_row = np.zeros_like(y[r])
            new_row[topk_indices] = 1
            y[r] = new_row
    return y

print("Applying final thresholds with guards...")
final_preds_binary = apply_thresholds_guarded(all_preds, thresholds)

# Format for submission
predictions = []
for pred_labels in tqdm(final_preds_binary, desc="Formatting submission strings"):
    # pred_labels is already a binary vector (0s and 1s)
    attr_ids = [CFG.idx_to_attr_id[i] for i, label in enumerate(pred_labels) if label == 1]
    predictions.append(' '.join(map(str, attr_ids)))

sub_df['attribute_ids'] = predictions
sub_df[['id', 'attribute_ids']].to_csv('submission.csv', index=False)
print("Submission file created successfully!")
display(sub_df.head())