# Plan

- Goal: Build a strong, GPU-accelerated image classification pipeline for iWildCam 2019 with reliable CV and fast iterations. Target: ≥ bronze (macro-F1 ≥ 0.114), aim ≥ silver.
- Validation:
  - Stratified KFold on category_id (5 folds). If metadata like site/sequence is available, consider GroupKFold by location/sequence; otherwise stick to StratifiedKFold for speed initially.
- Baseline model:
  - Pretrained timm backbone (e.g., convnext_tiny or efficientnet_b0) fine-tuned with mixed precision.
  - Input size 224, augmentations (RandomResizedCrop, HFlip, ColorJitter, AutoAugment).
  - Class-balanced sampler or weighted loss (CrossEntropy with class weights or focal).
  - Early stopping, cosine schedule, warmup.
- Iteration steps:
  1) Environment + GPU check
  2) Data load & sanity checks; unzip images (to /mnt for I/O speed), verify paths
  3) Fast smoke run on small subset (e.g., 5k images, 1 fold) to validate pipeline
  4) Full training with 5-fold CV; save OOF and test logits
  5) Error analysis; try alternative backbones (beit_base_224, eva02, nfnet), resolutions 256/320; tune class weights
  6) Blend top-2 models by logits

- Deliverables:
  - Deterministic folds saved
  - submission.csv
  - Logged training progress per epoch/fold

Request expert review after: plan (now), data EDA, baseline smoke OOF, first full CV, and any poor score.

In [2]:
# Environment + Data EDA + Prepare fast storage
import os, sys, shutil, time, json, math, subprocess, zipfile, io
import pandas as pd
from pathlib import Path

print('=== GPU CHECK (nvidia-smi) ===', flush=True)
try:
    out = subprocess.run(['bash','-lc','nvidia-smi || true'], capture_output=True, text=True, check=False)
    print(out.stdout)
except Exception as e:
    print('nvidia-smi failed:', e)

print('=== Python & Disk Info ===')
print(sys.version)
print('CWD:', os.getcwd())
print('Free space on / and CWD (GB):', shutil.disk_usage('/').free/1e9, shutil.disk_usage('.').free/1e9)

DATA_DIR = Path('.')
# Use a writable local fast directory instead of /mnt (permission denied)
FAST_DIR = Path('./fast_storage/iwildcam')
FAST_DIR.mkdir(parents=True, exist_ok=True)

train_csv = DATA_DIR/'train.csv'
test_csv = DATA_DIR/'test.csv'
train_zip = DATA_DIR/'train_images.zip'
test_zip = DATA_DIR/'test_images.zip'

print('=== Load CSVs ===')
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)
print('train_df shape:', train_df.shape)
print('test_df shape:', test_df.shape)
print('Columns:', list(train_df.columns))
print('Head:\n', train_df.head(3))

# Identify potential grouping columns to avoid leakage
candidate_groups = [c for c in ['sequence_id','location','site','seq_id','sequence','location_id'] if c in train_df.columns]
print('Candidate group columns:', candidate_groups)
for c in candidate_groups:
    vc = train_df[c].value_counts().head(5)
    print(f'Top groups for {c}:\n{vc}')

# Target distribution
target_col = 'category_id'
if target_col in train_df.columns:
    n_classes = train_df[target_col].nunique()
    print('Num classes:', n_classes)
    print('Target sample:\n', train_df[target_col].value_counts().head(10))

# Plan image extraction to fast disk if not already extracted
fast_train_dir = FAST_DIR/'train_images'
fast_test_dir = FAST_DIR/'test_images'

def unzip_if_needed(zip_path: Path, out_dir: Path, label: str):
    if out_dir.exists() and any(out_dir.iterdir()):
        print(f'{label}: already extracted at {out_dir}')
        return
    out_dir.mkdir(parents=True, exist_ok=True)
    t0 = time.time()
    print(f'Extracting {zip_path} -> {out_dir} ...', flush=True)
    # Use system unzip for speed if available
    if shutil.which('unzip') is not None:
        cmd = f"unzip -qq -o '{zip_path}' -d '{out_dir}'"
        print('Running:', cmd)
        rc = subprocess.call(['bash','-lc', cmd])
        if rc != 0:
            print('unzip failed with code', rc, '; falling back to python zipfile')
        else:
            print(f'{label}: unzip completed in {time.time()-t0:.1f}s')
            return
    # Fallback: python zipfile (slower)
    with zipfile.ZipFile(zip_path) as zf:
        members = zf.namelist()
        n = len(members)
        for i, m in enumerate(members, 1):
            zf.extract(m, out_dir)
            if i % 5000 == 0:
                print(f'{label}: extracted {i}/{n}', flush=True)
    print(f'{label}: extraction completed in {time.time()-t0:.1f}s')

print('=== Ensure images on local fast storage (./fast_storage) ===')
if train_zip.exists():
    unzip_if_needed(train_zip, fast_train_dir, 'train')
else:
    print('Missing train_images.zip')
if test_zip.exists():
    unzip_if_needed(test_zip, fast_test_dir, 'test')
else:
    print('Missing test_images.zip')

# Sanity: sample a few image paths from train_df
img_col_candidates = [c for c in ['file_name','filename','image_id','id','image_path'] if c in train_df.columns]
print('Image id/path columns candidates:', img_col_candidates)
if img_col_candidates:
    img_col = img_col_candidates[0]
    print('Using image column:', img_col)
    # Try to construct absolute paths under fast dir
    sample_rows = train_df.head(5)
    for _, r in sample_rows.iterrows():
        fn = str(r[img_col])
        # Many competitions store just filename; adapt if nested
        p1 = fast_train_dir/fn
        # If not exists, try basename
        if not p1.exists():
            p1 = fast_train_dir/Path(fn).name
        print('Sample image path candidate:', p1, 'exists:', p1.exists())
else:
    print('Could not identify image filename column; will inspect in later step.')

print('=== EDA step complete ===')

# Next steps (for operator log):
# - Decide grouping key for CV based on candidate_groups (prefer sequence_id, else location).
# - Build folds and persist to disk.
# - Implement dataloaders and baseline model training script.

=== GPU CHECK (nvidia-smi) ===


Wed Sep 24 19:30:42 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.06             Driver Version: 550.144.06     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A10-24Q                 On  |   00000002:00:00.0 Off |                    0 |
| N/A   N/A    P0             N/A /  N/A  |     182MiB /  24512MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

train_df shape: (179422, 11)
test_df shape: (16877, 10)
Columns: ['category_id', 'date_captured', 'file_name', 'frame_num', 'id', 'location', 'rights_holder', 'seq_id', 'seq_num_frames', 'width', 'height']
Head:
    category_id        date_captured                                 file_name  \
0           19  2012-03-17 03:48:44  588a679f-23d2-11e8-a6a3-ec086b02610b.jpg   
1            0  2014-05-11 11:56:46  59279ce3-23d2-11e8-a6a3-ec086b02610b.jpg   
2            0  2013-10-06 02:00:00  5a2af4ab-23d2-11e8-a6a3-ec086b02610b.jpg   

   frame_num                                    id  location  rights_holder  \
0          2  588a679f-23d2-11e8-a6a3-ec086b02610b       115   Justin Brown   
1          1  59279ce3-23d2-11e8-a6a3-ec086b02610b        96  Erin Boydston   
2          1  5a2af4ab-23d2-11e8-a6a3-ec086b02610b        57  Erin Boydston   

                                 seq_id  seq_num_frames  width  height  
0  6f12067d-5567-11e8-b3c0-dca9047ef277               3   1024     747  

train: extracted 5000/179224


train: extracted 10000/179224


train: extracted 15000/179224


train: extracted 20000/179224


train: extracted 25000/179224


train: extracted 30000/179224


train: extracted 35000/179224


train: extracted 40000/179224


train: extracted 45000/179224


train: extracted 50000/179224


train: extracted 55000/179224


train: extracted 60000/179224


train: extracted 65000/179224


train: extracted 70000/179224


train: extracted 75000/179224


train: extracted 80000/179224


train: extracted 85000/179224


train: extracted 90000/179224


train: extracted 95000/179224


train: extracted 100000/179224


train: extracted 105000/179224


train: extracted 110000/179224


train: extracted 115000/179224


train: extracted 120000/179224


train: extracted 125000/179224


train: extracted 130000/179224


train: extracted 135000/179224


train: extracted 140000/179224


train: extracted 145000/179224


train: extracted 150000/179224


train: extracted 155000/179224


train: extracted 160000/179224


train: extracted 165000/179224


train: extracted 170000/179224


train: extracted 175000/179224


train: extraction completed in 93.2s
Extracting test_images.zip -> fast_storage/iwildcam/test_images ...


test: extracted 5000/16862


test: extracted 10000/16862


test: extracted 15000/16862


test: extraction completed in 7.7s
Image id/path columns candidates: ['file_name', 'id']
Using image column: file_name
Sample image path candidate: fast_storage/iwildcam/train_images/588a679f-23d2-11e8-a6a3-ec086b02610b.jpg exists: True
Sample image path candidate: fast_storage/iwildcam/train_images/59279ce3-23d2-11e8-a6a3-ec086b02610b.jpg exists: True
Sample image path candidate: fast_storage/iwildcam/train_images/5a2af4ab-23d2-11e8-a6a3-ec086b02610b.jpg exists: True
Sample image path candidate: fast_storage/iwildcam/train_images/593d68d7-23d2-11e8-a6a3-ec086b02610b.jpg exists: True
Sample image path candidate: fast_storage/iwildcam/train_images/58782b45-23d2-11e8-a6a3-ec086b02610b.jpg exists: True
=== EDA step complete ===


In [3]:
# Build grouped CV folds, label maps, and class weights
import pandas as pd, numpy as np, json
from pathlib import Path
from collections import Counter
from sklearn.model_selection import GroupKFold
try:
    from sklearn.model_selection import StratifiedGroupKFold
    HAS_SGK = True
except Exception:
    HAS_SGK = False

DATA_DIR = Path('.')
FAST_DIR = Path('./fast_storage/iwildcam')
train_df = pd.read_csv(DATA_DIR/'train.csv')

# Determine groups and target
target_col = 'category_id'
group_col = 'seq_id' if 'seq_id' in train_df.columns else ('location' if 'location' in train_df.columns else None)
assert group_col is not None, 'No grouping column found (seq_id/location missing)'
y = train_df[target_col].values
groups = train_df[group_col].values

n_splits = 5
rng_seed = 42

folds = np.full(len(train_df), -1, dtype=int)
if HAS_SGK:
    sgk = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=rng_seed)
    for fi, (_, val_idx) in enumerate(sgk.split(train_df, y, groups=groups)):
        folds[val_idx] = fi
else:
    print('StratifiedGroupKFold not available; using GroupKFold (CV may be less balanced).')
    gk = GroupKFold(n_splits=n_splits)
    for fi, (_, val_idx) in enumerate(gk.split(train_df, y, groups=groups)):
        folds[val_idx] = fi

assert (folds >= 0).all(), 'Fold assignment failed'
train_df['fold'] = folds

# Create label maps (category_id -> idx 0..K-1)
classes = np.sort(train_df[target_col].unique())
class_to_idx = {int(c): i for i, c in enumerate(classes)}
idx_to_class = {i: int(c) for i, c in enumerate(classes)}
train_df['target_idx'] = train_df[target_col].map(class_to_idx).astype(int)

# Class weights: (freq + 1e-3)^-0.5 normalized to mean=1
counts = train_df[target_col].value_counts().reindex(classes, fill_value=0).astype(float).values
weights = (counts + 1e-3) ** -0.5
weights = weights / weights.mean()
class_weights = {int(c): float(w) for c, w in zip(classes, weights)}

# Persist artifacts
out_dir = Path('./artifacts')
out_dir.mkdir(parents=True, exist_ok=True)
(DATA_DIR/'folds.csv').write_text(train_df[['id','file_name',target_col, group_col, 'fold']].to_csv(index=False))
json.dump({'class_to_idx': class_to_idx, 'idx_to_class': idx_to_class}, open(out_dir/'label_maps.json','w'))
json.dump({'class_weights': class_weights}, open(out_dir/'class_weights.json','w'))

# Save a compact meta for training
meta_cols = ['id','file_name', target_col, 'target_idx', 'fold', group_col, 'location', 'width', 'height', 'seq_num_frames', 'frame_num']
meta_cols = [c for c in meta_cols if c in train_df.columns]
train_df[meta_cols].to_csv(out_dir/'train_meta.csv', index=False)

# Print summary
print('Folds distribution:')
print(train_df.groupby('fold')[target_col].agg(['count']).reset_index())
print('Num classes:', len(classes))
print('Artifacts written to:', out_dir.resolve())

Folds distribution:
   fold  count
0     0  35901
1     1  35915
2     2  35826
3     3  35870
4     4  35910
Num classes: 14
Artifacts written to: /var/lib/simon/agent_run_states/iwildcam-2019-fgvc6-20250924-184738/artifacts


In [4]:
# Install CUDA 12.1 torch stack and core deps
import os, sys, subprocess, shutil
from pathlib import Path

def pip(*args):
    print('>', *args, flush=True)
    subprocess.run([sys.executable, '-m', 'pip', *args], check=True)

# Uninstall any preexisting torch stacks (best-effort)
for pkg in ('torch','torchvision','torchaudio'):
    subprocess.run([sys.executable, '-m', 'pip', 'uninstall', '-y', pkg], check=False)

# Clean stray site dirs that can shadow correct wheels (idempotent)
for d in (
    '/app/.pip-target/torch',
    '/app/.pip-target/torch-2.8.0.dist-info',
    '/app/.pip-target/torch-2.4.1.dist-info',
    '/app/.pip-target/torchvision',
    '/app/.pip-target/torchvision-0.23.0.dist-info',
    '/app/.pip-target/torchvision-0.19.1.dist-info',
    '/app/.pip-target/torchaudio',
    '/app/.pip-target/torchaudio-2.8.0.dist-info',
    '/app/.pip-target/torchaudio-2.4.1.dist-info',
    '/app/.pip-target/torchgen',
    '/app/.pip-target/functorch',
):
    if os.path.exists(d):
        print('Removing', d)
        shutil.rmtree(d, ignore_errors=True)

# 1) Install EXACT cu121 torch stack
pip('install',
    '--index-url', 'https://download.pytorch.org/whl/cu121',
    '--extra-index-url', 'https://pypi.org/simple',
    'torch==2.4.1', 'torchvision==0.19.1', 'torchaudio==2.4.1')

# 2) Freeze torch versions
Path('constraints.txt').write_text(
    'torch==2.4.1\n'
    'torchvision==0.19.1\n'
    'torchaudio==2.4.1\n'
)

# 3) Install deps honoring constraints
pip('install', '-c', 'constraints.txt',
    'timm==1.0.9', 'albumentations==1.4.8', 'opencv-python-headless',
    'scikit-learn', 'pandas', 'numpy', 'matplotlib', 'seaborn',
    'einops', 'ema-pytorch',
    '--upgrade-strategy', 'only-if-needed')

# 4) Sanity gate
import torch
print('torch:', torch.__version__, 'built CUDA:', getattr(torch.version, 'cuda', None))
print('CUDA available:', torch.cuda.is_available())
assert str(getattr(torch.version,'cuda','')).startswith('12.1'), f'Wrong CUDA build: {torch.version.cuda}'
assert torch.cuda.is_available(), 'CUDA not available'
print('GPU:', torch.cuda.get_device_name(0))





> install --index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.org/simple torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1




Looking in indexes: https://download.pytorch.org/whl/cu121, https://pypi.org/simple


Collecting torch==2.4.1
  Downloading https://download.pytorch.org/whl/cu121/torch-2.4.1%2Bcu121-cp311-cp311-linux_x86_64.whl (799.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 799.0/799.0 MB 575.2 MB/s eta 0:00:00


Collecting torchvision==0.19.1
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.19.1%2Bcu121-cp311-cp311-linux_x86_64.whl (7.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.1/7.1 MB 437.8 MB/s eta 0:00:00


Collecting torchaudio==2.4.1
  Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.4.1%2Bcu121-cp311-cp311-linux_x86_64.whl (3.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.4/3.4 MB 416.0 MB/s eta 0:00:00


Collecting fsspec
  Downloading fsspec-2025.9.0-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.3/199.3 KB 7.8 MB/s eta 0:00:00


Collecting triton==3.0.0
  Downloading triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 209.4/209.4 MB 345.0 MB/s eta 0:00:00


Collecting nvidia-cuda-cupti-cu12==12.1.105
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.1/14.1 MB 222.0 MB/s eta 0:00:00


Collecting jinja2
  Downloading jinja2-3.1.6-py3-none-any.whl (134 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.9/134.9 KB 478.8 MB/s eta 0:00:00


Collecting typing-extensions>=4.8.0
  Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 KB 390.6 MB/s eta 0:00:00
Collecting nvidia-cusparse-cu12==12.1.0.106
  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 196.0/196.0 MB 333.3 MB/s eta 0:00:00


Collecting nvidia-cudnn-cu12==9.1.0.70
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 222.6 MB/s eta 0:00:00


Collecting nvidia-cusolver-cu12==11.4.5.107
  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 124.2/124.2 MB 332.9 MB/s eta 0:00:00


Collecting nvidia-cublas-cu12==12.1.3.1
  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 410.6/410.6 MB 195.3 MB/s eta 0:00:00


Collecting nvidia-curand-cu12==10.3.2.106
  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.5/56.5 MB 307.2 MB/s eta 0:00:00


Collecting nvidia-cufft-cu12==11.0.2.54
  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.6/121.6 MB 311.5 MB/s eta 0:00:00


Collecting networkx
  Downloading networkx-3.5-py3-none-any.whl (2.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 498.6 MB/s eta 0:00:00


Collecting nvidia-nvtx-cu12==12.1.105
  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.1/99.1 KB 451.5 MB/s eta 0:00:00


Collecting nvidia-nccl-cu12==2.20.5
  Downloading nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 176.2/176.2 MB 314.0 MB/s eta 0:00:00


Collecting filelock
  Downloading filelock-3.19.1-py3-none-any.whl (15 kB)


Collecting nvidia-cuda-runtime-cu12==12.1.105
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 823.6/823.6 KB 467.6 MB/s eta 0:00:00
Collecting nvidia-cuda-nvrtc-cu12==12.1.105
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.7/23.7 MB 307.4 MB/s eta 0:00:00
Collecting sympy
  Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.3/6.3 MB 492.4 MB/s eta 0:00:00


Collecting numpy
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 468.0 MB/s eta 0:00:00


Collecting pillow!=8.3.*,>=5.3.0
  Downloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 492.0 MB/s eta 0:00:00
Collecting nvidia-nvjitlink-cu12
  Downloading nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.7 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.7/39.7 MB 313.2 MB/s eta 0:00:00


Collecting MarkupSafe>=2.0
  Downloading MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)


Collecting mpmath<1.4,>=1.1.0
  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 KB 487.0 MB/s eta 0:00:00


Installing collected packages: mpmath, typing-extensions, sympy, pillow, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, networkx, MarkupSafe, fsspec, filelock, triton, nvidia-cusparse-cu12, nvidia-cudnn-cu12, jinja2, nvidia-cusolver-cu12, torch, torchvision, torchaudio


Successfully installed MarkupSafe-3.0.2 filelock-3.19.1 fsspec-2025.9.0 jinja2-3.1.6 mpmath-1.3.0 networkx-3.5 numpy-1.26.4 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.9.86 nvidia-nvtx-cu12-12.1.105 pillow-11.3.0 sympy-1.14.0 torch-2.4.1+cu121 torchaudio-2.4.1+cu121 torchvision-0.19.1+cu121 triton-3.0.0 typing-extensions-4.15.0


> install -c constraints.txt timm==1.0.9 albumentations==1.4.8 opencv-python-headless scikit-learn pandas numpy matplotlib seaborn einops ema-pytorch --upgrade-strategy only-if-needed


Collecting timm==1.0.9
  Downloading timm-1.0.9-py3-none-any.whl (2.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.3/2.3 MB 71.7 MB/s eta 0:00:00
Collecting albumentations==1.4.8
  Downloading albumentations-1.4.8-py3-none-any.whl (156 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 156.8/156.8 KB 408.3 MB/s eta 0:00:00


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (54.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.0/54.0 MB 234.8 MB/s eta 0:00:00
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.7/9.7 MB 180.3 MB/s eta 0:00:00
Collecting pandas
  Downloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.4/12.4 MB 279.5 MB/s eta 0:00:00


Collecting numpy
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 216.0 MB/s eta 0:00:00


Collecting matplotlib
  Downloading matplotlib-3.10.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.7/8.7 MB 193.5 MB/s eta 0:00:00
Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 294.9/294.9 KB 160.6 MB/s eta 0:00:00
Collecting einops
  Downloading einops-0.8.1-py3-none-any.whl (64 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.4/64.4 KB 403.3 MB/s eta 0:00:00
Collecting ema-pytorch
  Downloading ema_pytorch-0.7.7-py3-none-any.whl (9.8 kB)


Collecting huggingface_hub
  Downloading huggingface_hub-0.35.1-py3-none-any.whl (563 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 563.3/563.3 KB 344.8 MB/s eta 0:00:00
Collecting torchvision
  Downloading torchvision-0.19.1-cp311-cp311-manylinux1_x86_64.whl (7.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.0/7.0 MB 210.8 MB/s eta 0:00:00
Collecting torch
  Downloading torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl (797.1 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 797.1/797.1 MB 231.3 MB/s eta 0:00:00


Collecting pyyaml
  Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (762 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 763.0/763.0 KB 515.0 MB/s eta 0:00:00
Collecting safetensors
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (485 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 485.8/485.8 KB 479.5 MB/s eta 0:00:00


Collecting albucore>=0.0.4
  Downloading albucore-0.0.33-py3-none-any.whl (18 kB)
Collecting scipy>=1.10.0
  Downloading scipy-1.16.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.9 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 35.9/35.9 MB 151.0 MB/s eta 0:00:00
Collecting typing-extensions>=4.9.0
  Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 KB 392.3 MB/s eta 0:00:00


Collecting pydantic>=2.7.0
  Downloading pydantic-2.11.9-py3-none-any.whl (444 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 444.9/444.9 KB 534.6 MB/s eta 0:00:00
Collecting scikit-image>=0.21.0
  Downloading scikit_image-0.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.8/14.8 MB 236.2 MB/s eta 0:00:00


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (50.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 50.0/50.0 MB 157.5 MB/s eta 0:00:00
Collecting joblib>=1.2.0
  Downloading joblib-1.5.2-py3-none-any.whl (308 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 308.4/308.4 KB 543.6 MB/s eta 0:00:00
Collecting threadpoolctl>=3.1.0
  Downloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)


Collecting python-dateutil>=2.8.2
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 229.9/229.9 KB 529.0 MB/s eta 0:00:00
Collecting pytz>=2020.1
  Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 509.2/509.2 KB 560.5 MB/s eta 0:00:00
Collecting tzdata>=2022.7
  Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 347.8/347.8 KB 552.2 MB/s eta 0:00:00
Collecting contourpy>=1.0.1


  Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (355 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 355.2/355.2 KB 435.7 MB/s eta 0:00:00


Collecting fonttools>=4.22.0
  Downloading fonttools-4.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.0/5.0 MB 207.4 MB/s eta 0:00:00


Collecting pillow>=8
  Downloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 142.6 MB/s eta 0:00:00
Collecting pyparsing>=2.3.1
  Downloading pyparsing-3.2.5-py3-none-any.whl (113 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 113.9/113.9 KB 476.0 MB/s eta 0:00:00
Collecting cycler>=0.10
  Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB)
Collecting packaging>=20.0
  Downloading packaging-25.0-py3-none-any.whl (66 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 66.5/66.5 KB 431.7 MB/s eta 0:00:00


Collecting kiwisolver>=1.3.1
  Downloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 116.2 MB/s eta 0:00:00


Collecting simsimd>=5.9.2
  Downloading simsimd-6.5.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (1.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 287.0 MB/s eta 0:00:00


Collecting stringzilla>=3.10.4
  Downloading stringzilla-4.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (496 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 496.5/496.5 KB 452.2 MB/s eta 0:00:00


Collecting pydantic-core==2.33.2
  Downloading pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 540.9 MB/s eta 0:00:00
Collecting typing-inspection>=0.4.0
  Downloading typing_inspection-0.4.1-py3-none-any.whl (14 kB)
Collecting annotated-types>=0.6.0
  Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)
Collecting six>=1.5
  Downloading six-1.17.0-py2.py3-none-any.whl (11 kB)
Collecting networkx>=3.0
  Downloading networkx-3.5-py3-none-any.whl (2.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 528.4 MB/s eta 0:00:00
Collecting tifffile>=2022.8.12
  Downloading tifffile-2025.9.20-py3-none-any.whl (230 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 230.1/230.1 KB 488.7 MB/s eta 0:00:00


Collecting lazy-loader>=0.4
  Downloading lazy_loader-0.4-py3-none-any.whl (12 kB)
Collecting imageio!=2.35.0,>=2.33
  Downloading imageio-2.37.0-py3-none-any.whl (315 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 315.8/315.8 KB 513.0 MB/s eta 0:00:00
Collecting nvidia-nccl-cu12==2.20.5
  Downloading nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 176.2/176.2 MB 216.0 MB/s eta 0:00:00
Collecting nvidia-cublas-cu12==12.1.3.1
  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 410.6/410.6 MB 232.8 MB/s eta 0:00:00


Collecting nvidia-cusolver-cu12==11.4.5.107
  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 124.2/124.2 MB 358.6 MB/s eta 0:00:00
Collecting nvidia-cusparse-cu12==12.1.0.106
  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 196.0/196.0 MB 189.7 MB/s eta 0:00:00
Collecting jinja2
  Downloading jinja2-3.1.6-py3-none-any.whl (134 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.9/134.9 KB 487.7 MB/s eta 0:00:00
Collecting fsspec
  Downloading fsspec-2025.9.0-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.3/199.3 KB 508.3 MB/s eta 0:00:00


Collecting nvidia-cuda-runtime-cu12==12.1.105
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 823.6/823.6 KB 559.2 MB/s eta 0:00:00
Collecting nvidia-curand-cu12==10.3.2.106
  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.5/56.5 MB 148.7 MB/s eta 0:00:00
Collecting filelock
  Downloading filelock-3.19.1-py3-none-any.whl (15 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 112.9 MB/s eta 0:00:00


Collecting nvidia-nvtx-cu12==12.1.105
  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.1/99.1 KB 470.9 MB/s eta 0:00:00
Collecting nvidia-cuda-nvrtc-cu12==12.1.105
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.7/23.7 MB 289.0 MB/s eta 0:00:00
Collecting triton==3.0.0
  Downloading triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 209.4/209.4 MB 97.5 MB/s eta 0:00:00
Collecting nvidia-cufft-cu12==11.0.2.54
  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.6/121.6 MB 228.7 MB/s eta 0:00:00
Collecting nvidia-cuda-cupti-cu12==12.1.105
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.1/14.1 MB 296.1 MB/s eta 0:00:00
Collecting sympy
  Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.3/6.3 MB 327.5 MB/s eta 0:00:00
Collecting nvidia-nvjitlink-cu12
  Downloading nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.7 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.7/39.7 MB 233.6 MB/s eta 0:00:00
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl (64 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.7/64.7 KB 449.1 MB/s eta 0:00:00


Collecting hf-xet<2.0.0,>=1.1.3
  Downloading hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 577.8 MB/s eta 0:00:00
Collecting tqdm>=4.42.1
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.5/78.5 KB 479.1 MB/s eta 0:00:00


Collecting MarkupSafe>=2.0
  Downloading MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)
Collecting idna<4,>=2.5
  Downloading idna-3.10-py3-none-any.whl (70 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 70.4/70.4 KB 439.1 MB/s eta 0:00:00
Collecting urllib3<3,>=1.21.1
  Downloading urllib3-2.5.0-py3-none-any.whl (129 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 129.8/129.8 KB 467.5 MB/s eta 0:00:00
Collecting certifi>=2017.4.17
  Downloading certifi-2025.8.3-py3-none-any.whl (161 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.2/161.2 KB 480.8 MB/s eta 0:00:00


Collecting charset_normalizer<4,>=2
  Downloading charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (150 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 150.3/150.3 KB 518.3 MB/s eta 0:00:00
Collecting mpmath<1.4,>=1.1.0
  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 KB 188.4 MB/s eta 0:00:00


Installing collected packages: simsimd, pytz, mpmath, urllib3, tzdata, typing-extensions, tqdm, threadpoolctl, sympy, stringzilla, six, safetensors, pyyaml, pyparsing, pillow, packaging, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, networkx, MarkupSafe, kiwisolver, joblib, idna, hf-xet, fsspec, fonttools, filelock, einops, cycler, charset_normalizer, certifi, annotated-types, typing-inspection, triton, tifffile, scipy, requests, python-dateutil, pydantic-core, opencv-python-headless, nvidia-cusparse-cu12, nvidia-cudnn-cu12, lazy-loader, jinja2, imageio, contourpy, scikit-learn, scikit-image, pydantic, pandas, nvidia-cusolver-cu12, matplotlib, huggingface_hub, albucore, torch, seaborn, albumentations, torchvision, ema-pytorch, timm


Successfully installed MarkupSafe-3.0.2 albucore-0.0.33 albumentations-1.4.8 annotated-types-0.7.0 certifi-2025.8.3 charset_normalizer-3.4.3 contourpy-1.3.3 cycler-0.12.1 einops-0.8.1 ema-pytorch-0.7.7 filelock-3.19.1 fonttools-4.60.0 fsspec-2025.9.0 hf-xet-1.1.10 huggingface_hub-0.35.1 idna-3.10 imageio-2.37.0 jinja2-3.1.6 joblib-1.5.2 kiwisolver-1.4.9 lazy-loader-0.4 matplotlib-3.10.6 mpmath-1.3.0 networkx-3.5 numpy-1.26.4 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.9.86 nvidia-nvtx-cu12-12.1.105 opencv-python-headless-4.11.0.86 packaging-25.0 pandas-2.3.2 pillow-11.3.0 pydantic-2.11.9 pydantic-core-2.33.2 pyparsing-3.2.5 python-dateutil-2.9.0.post0 pytz-2025.2 pyyaml-6.0.2 requests-2.32.5 safetensors-0.6.2 s









torch: 2.4.1+cu121 built CUDA: 12.1
CUDA available: True
GPU: NVIDIA A10-24Q


In [6]:
# Smoke training: fold 0, tf_efficientnet_b3_ns @300 for 3 epochs with weighted CE + smoothing
import os, time, math, random, json
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score
import timm
from PIL import Image, ImageFile
import torchvision.transforms as T

torch.backends.cudnn.benchmark = True
ImageFile.LOAD_TRUNCATED_IMAGES = True

SEED = 42
def seed_everything(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
seed_everything()

DATA_DIR = Path('.')
FAST_DIR = Path('./fast_storage/iwildcam')
IMG_DIR = FAST_DIR/'train_images'
assert IMG_DIR.exists(), 'Train images not found; ensure extraction complete.'

# Load metadata and build maps
df = pd.read_csv(DATA_DIR/'train.csv')
assert 'fold' in pd.read_csv(DATA_DIR/'folds.csv').columns, 'folds.csv missing or malformed'
folds_df = pd.read_csv(DATA_DIR/'folds.csv')
df = df.merge(folds_df[['id','fold']], on='id', how='left')
assert df['fold'].notna().all(), 'Some rows missing fold assignment'
classes = np.sort(df['category_id'].unique())
class_to_idx = {int(c): i for i, c in enumerate(classes)}
idx_to_class = {i: int(c) for i, c in enumerate(classes)}
df['target_idx'] = df['category_id'].map(class_to_idx).astype(int)
K = len(classes)

# Class weights (inverse sqrt freq, mean=1)
counts = df['category_id'].value_counts().reindex(classes, fill_value=0).astype(float).values
weights = (counts + 1e-3) ** -0.5
weights = weights / weights.mean()
class_weights_tensor = torch.tensor(weights, dtype=torch.float32).cuda()

IMG_SIZE = 300
VAL_SHORT = 336

train_tfm = T.Compose([
    T.RandomResizedCrop(IMG_SIZE, scale=(0.6,1.0), interpolation=T.InterpolationMode.BICUBIC),
    T.RandomHorizontalFlip(p=0.5),
    T.TrivialAugmentWide(num_magnitude_bins=31),
    T.ToTensor(),
    T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
    T.RandomErasing(p=0.25, scale=(0.02,0.15), ratio=(0.3,3.3)),
])
val_tfm = T.Compose([
    T.Resize(VAL_SHORT, interpolation=T.InterpolationMode.BICUBIC),
    T.CenterCrop(IMG_SIZE),
    T.ToTensor(),
    T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

class IWildCamDataset(Dataset):
    def __init__(self, df, img_dir, tfm, with_target=True):
        self.df = df.reset_index(drop=True)
        self.img_dir = Path(img_dir)
        self.tfm = tfm
        self.with_target = with_target
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fn = str(r['file_name'])
        p = self.img_dir/fn
        if not p.exists():
            p = self.img_dir/Path(fn).name
        try:
            img = Image.open(p).convert('RGB')
        except Exception:
            # fallback: create a black image to avoid crash but log index
            img = Image.new('RGB', (IMG_SIZE, IMG_SIZE), (0,0,0))
        img = self.tfm(img)
        if self.with_target:
            return img, int(r['target_idx']), r['seq_id'], r['id']
        else:
            return img, r['seq_id'], r['id']

def build_loaders(fold, bs=32, nw=8):
    tr = df[df['fold'] != fold].copy()
    va = df[df['fold'] == fold].copy()
    # Print val class distribution for sanity
    print('Fold', fold, 'val class distribution (category_id top-10):')
    print(va['category_id'].value_counts().head(10))
    ds_tr = IWildCamDataset(tr, IMG_DIR, train_tfm, with_target=True)
    ds_va = IWildCamDataset(va, IMG_DIR, val_tfm, with_target=True)
    dl_tr = DataLoader(ds_tr, batch_size=bs, shuffle=True, num_workers=nw, pin_memory=True, persistent_workers=True, drop_last=True)
    dl_va = DataLoader(ds_va, batch_size=bs*2, shuffle=False, num_workers=nw, pin_memory=True, persistent_workers=True)
    return dl_tr, dl_va, va.reset_index(drop=True)

def macro_f1_from_logits_np(logits_np, targets_np, K):
    preds = logits_np.argmax(1)
    return f1_score(targets_np, preds, average='macro', labels=list(range(K)), zero_division=0)

def evaluate(model, dl, df_va, device, tta=False):
    model.eval()
    all_logits = []
    all_targets = []
    all_seq = []
    with torch.no_grad():
        for xb, yb, seqs, _ids in dl:
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)
            logits = model(xb)
            if tta:
                xb_flip = torch.flip(xb, dims=[3])
                logits_flip = model(xb_flip)
                logits = 0.5*(logits + logits_flip)
            all_logits.append(logits.float().cpu())
            all_targets.append(yb.cpu())
            all_seq.extend(list(seqs))
    logits = torch.cat(all_logits, 0).numpy()
    targets = torch.cat(all_targets, 0).numpy()
    # Plain macro-F1 over all K classes
    f1_plain = macro_f1_from_logits_np(logits, targets, K)
    # Sequence-level averaging
    df_tmp = pd.DataFrame({'seq_id': all_seq})
    seq_indices = {}
    for i, s in enumerate(df_tmp['seq_id']):
        seq_indices.setdefault(s, []).append(i)
    logits_seq = logits.copy()
    for s, idxs in seq_indices.items():
        m = logits[idxs].mean(axis=0, keepdims=True)
        for i in idxs:
            logits_seq[i] = m
    f1_seq = macro_f1_from_logits_np(logits_seq, targets, K)
    return float(f1_plain), float(f1_seq)

def train_fold(fold=0, epochs=3, img_size=IMG_SIZE, bs=32, lr=3e-4, wd=1e-5):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'Training fold {fold} for {epochs} epochs on {device}', flush=True)
    dl_tr, dl_va, df_va = build_loaders(fold, bs=bs, nw=8)
    model = timm.create_model('tf_efficientnet_b3_ns', pretrained=True, num_classes=K)
    model = model.to(device).to(memory_format=torch.channels_last)
    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    num_steps = epochs * len(dl_tr)
    warmup_steps = max(1, len(dl_tr))
    def lr_lambda(step):
        if step < warmup_steps:
            return float(step+1)/float(warmup_steps)
        progress = (step - warmup_steps) / max(1, num_steps - warmup_steps)
        return 0.5*(1.0 + math.cos(math.pi * progress))
    sch = torch.optim.lr_scheduler.LambdaLR(opt, lr_lambda=lr_lambda)
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=0.05)
    scaler = torch.cuda.amp.GradScaler(enabled=True)
    best_f1 = -1.0
    ckpt_dir = Path('checkpoints'); ckpt_dir.mkdir(exist_ok=True, parents=True)
    t_start = time.time()
    global_step = 0
    for epoch in range(1, epochs+1):
        model.train()
        epoch_loss = 0.0
        t0 = time.time()
        for it, (xb, yb, _seqs, _ids) in enumerate(dl_tr):
            xb = xb.to(device, non_blocking=True).to(memory_format=torch.channels_last)
            yb = yb.to(device, non_blocking=True)
            opt.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=True):
                logits = model(xb)
                loss = criterion(logits, yb)
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(opt)
            scaler.update()
            sch.step()
            epoch_loss += loss.item()
            global_step += 1
            if (it+1) % 100 == 0:
                elapsed = time.time()-t0
                print(f'Epoch {epoch} iter {it+1}/{len(dl_tr)} loss {epoch_loss/(it+1):.4f} elapsed {elapsed:.1f}s', flush=True)
        # Eval
        f1_plain, f1_seq = evaluate(model, dl_va, df_va, device, tta=True)
        print(f'Epoch {epoch} done. TrainLoss {epoch_loss/len(dl_tr):.4f} | Val F1 {f1_plain:.4f} | Val F1 seq-avg {f1_seq:.4f} | elapsed {time.time()-t0:.1f}s', flush=True)
        if f1_seq > best_f1:
            best_f1 = f1_seq
            ckpt_path = ckpt_dir/f'b3_fold{fold}_best.pth'
            torch.save({'model': model.state_dict(), 'f1_seq': best_f1, 'epoch': epoch}, ckpt_path)
            print('Saved best checkpoint:', ckpt_path, flush=True)
    print(f'Fold {fold} training complete. Best Val F1 seq-avg: {best_f1:.4f}. Total time {time.time()-t_start:.1f}s', flush=True)

# Run smoke training on fold 0
train_fold(fold=0, epochs=3, bs=32, lr=3e-4, wd=1e-5)

Training fold 0 for 3 epochs on cuda


Fold 0 val class distribution (category_id top-10):
category_id
0     25727
19     2137
1      1226
8      1208
11     1175
13     1047
16      986
17      810
3       609
18      357
Name: count, dtype: int64


  model = create_fn(


  scaler = torch.cuda.amp.GradScaler(enabled=True)


  with torch.cuda.amp.autocast(enabled=True):


Epoch 1 iter 100/4506 loss 3.9344 elapsed 11.8s


Epoch 1 iter 200/4506 loss 3.6609 elapsed 23.0s


Epoch 1 iter 300/4506 loss 3.3738 elapsed 34.2s


Epoch 1 iter 400/4506 loss 3.1659 elapsed 45.4s


Epoch 1 iter 500/4506 loss 3.0023 elapsed 56.6s


Epoch 1 iter 600/4506 loss 2.8753 elapsed 67.8s


Epoch 1 iter 700/4506 loss 2.7643 elapsed 79.0s


Epoch 1 iter 800/4506 loss 2.6739 elapsed 90.2s


Epoch 1 iter 900/4506 loss 2.5908 elapsed 101.3s


Epoch 1 iter 1000/4506 loss 2.5219 elapsed 112.6s


Epoch 1 iter 1100/4506 loss 2.4629 elapsed 123.7s


Epoch 1 iter 1200/4506 loss 2.4095 elapsed 135.0s


Epoch 1 iter 1300/4506 loss 2.3644 elapsed 146.2s


Epoch 1 iter 1400/4506 loss 2.3208 elapsed 157.4s


Epoch 1 iter 1500/4506 loss 2.2813 elapsed 168.6s


Epoch 1 iter 1600/4506 loss 2.2460 elapsed 179.8s


Epoch 1 iter 1700/4506 loss 2.2152 elapsed 191.0s


Epoch 1 iter 1800/4506 loss 2.1841 elapsed 202.2s


Epoch 1 iter 1900/4506 loss 2.1577 elapsed 213.5s


Epoch 1 iter 2000/4506 loss 2.1326 elapsed 224.7s


Epoch 1 iter 2100/4506 loss 2.1089 elapsed 236.0s


Epoch 1 iter 2200/4506 loss 2.0865 elapsed 247.2s


Epoch 1 iter 2300/4506 loss 2.0650 elapsed 258.5s


Epoch 1 iter 2400/4506 loss 2.0442 elapsed 269.7s


Epoch 1 iter 2500/4506 loss 2.0261 elapsed 280.9s


Epoch 1 iter 2600/4506 loss 2.0089 elapsed 292.1s


Epoch 1 iter 2700/4506 loss 1.9907 elapsed 303.4s


Epoch 1 iter 2800/4506 loss 1.9745 elapsed 314.6s


Epoch 1 iter 2900/4506 loss 1.9601 elapsed 325.8s


Epoch 1 iter 3000/4506 loss 1.9467 elapsed 337.3s


Epoch 1 iter 3100/4506 loss 1.9355 elapsed 348.6s


Epoch 1 iter 3200/4506 loss 1.9228 elapsed 359.9s


Epoch 1 iter 3300/4506 loss 1.9118 elapsed 371.1s


Epoch 1 iter 3400/4506 loss 1.9012 elapsed 382.4s


Epoch 1 iter 3500/4506 loss 1.8910 elapsed 393.7s


Epoch 1 iter 3600/4506 loss 1.8822 elapsed 405.0s


Epoch 1 iter 3700/4506 loss 1.8725 elapsed 416.3s


Epoch 1 iter 3800/4506 loss 1.8627 elapsed 427.6s


Epoch 1 iter 3900/4506 loss 1.8552 elapsed 438.8s


Epoch 1 iter 4000/4506 loss 1.8484 elapsed 450.1s


Epoch 1 iter 4100/4506 loss 1.8411 elapsed 461.4s


Epoch 1 iter 4200/4506 loss 1.8341 elapsed 472.7s


Epoch 1 iter 4300/4506 loss 1.8271 elapsed 483.9s


Epoch 1 iter 4400/4506 loss 1.8199 elapsed 495.2s


Epoch 1 iter 4500/4506 loss 1.8131 elapsed 506.4s


Epoch 1 done. TrainLoss 1.8128 | Val F1 0.7811 | Val F1 seq-avg 0.7979 | elapsed 631.2s


Saved best checkpoint: checkpoints/b3_fold0_best.pth


  with torch.cuda.amp.autocast(enabled=True):


Epoch 2 iter 100/4506 loss 1.4339 elapsed 11.6s


Epoch 2 iter 200/4506 loss 1.4325 elapsed 22.8s


Epoch 2 iter 300/4506 loss 1.4274 elapsed 34.1s


Epoch 2 iter 400/4506 loss 1.4386 elapsed 45.4s


Epoch 2 iter 500/4506 loss 1.4383 elapsed 56.6s


Epoch 2 iter 600/4506 loss 1.4406 elapsed 67.8s


Epoch 2 iter 700/4506 loss 1.4388 elapsed 79.1s


Epoch 2 iter 800/4506 loss 1.4409 elapsed 90.3s


Epoch 2 iter 900/4506 loss 1.4459 elapsed 101.6s


Epoch 2 iter 1000/4506 loss 1.4490 elapsed 112.9s


Epoch 2 iter 1100/4506 loss 1.4476 elapsed 124.1s


Epoch 2 iter 1200/4506 loss 1.4487 elapsed 135.6s


Epoch 2 iter 1300/4506 loss 1.4446 elapsed 146.9s


Epoch 2 iter 1400/4506 loss 1.4417 elapsed 158.2s


Epoch 2 iter 1500/4506 loss 1.4428 elapsed 169.4s


Epoch 2 iter 1600/4506 loss 1.4434 elapsed 180.7s


Epoch 2 iter 1700/4506 loss 1.4418 elapsed 192.0s


Epoch 2 iter 1800/4506 loss 1.4406 elapsed 203.2s


Epoch 2 iter 1900/4506 loss 1.4388 elapsed 214.4s


Epoch 2 iter 2000/4506 loss 1.4373 elapsed 225.7s


Epoch 2 iter 2100/4506 loss 1.4376 elapsed 237.0s


Epoch 2 iter 2200/4506 loss 1.4387 elapsed 248.3s


Epoch 2 iter 2300/4506 loss 1.4376 elapsed 259.6s


Epoch 2 iter 2400/4506 loss 1.4394 elapsed 270.9s


Epoch 2 iter 2500/4506 loss 1.4389 elapsed 282.2s


Epoch 2 iter 2600/4506 loss 1.4393 elapsed 293.5s


Epoch 2 iter 2700/4506 loss 1.4396 elapsed 304.7s


Epoch 2 iter 2800/4506 loss 1.4394 elapsed 316.0s


Epoch 2 iter 2900/4506 loss 1.4392 elapsed 327.3s


Epoch 2 iter 3000/4506 loss 1.4408 elapsed 338.6s


Epoch 2 iter 3100/4506 loss 1.4412 elapsed 349.8s


Epoch 2 iter 3200/4506 loss 1.4423 elapsed 361.1s


Epoch 2 iter 3300/4506 loss 1.4427 elapsed 372.4s


Epoch 2 iter 3400/4506 loss 1.4439 elapsed 383.7s


Epoch 2 iter 3500/4506 loss 1.4440 elapsed 395.0s


Epoch 2 iter 3600/4506 loss 1.4429 elapsed 406.3s


Epoch 2 iter 3700/4506 loss 1.4430 elapsed 417.6s


KeyboardInterrupt: 

In [None]:
# Inference: TTA + optional sequence averaging, generate submission.csv aligned to sample_submission Ids
import os, json, math, time
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import timm
from PIL import Image
import torchvision.transforms as T

DATA_DIR = Path('.')
FAST_DIR = Path('./fast_storage/iwildcam')
TEST_IMG_DIR = FAST_DIR/'test_images'
assert TEST_IMG_DIR.exists(), 'Test images dir missing'

# Load label maps from train
train_df_map = pd.read_csv(DATA_DIR/'train.csv')
classes = np.sort(train_df_map['category_id'].unique())
class_to_idx = {int(c): i for i, c in enumerate(classes)}
idx_to_class = {i: int(c) for i, c in enumerate(classes)}
K = len(classes)

# Load test and sample_submission to define required Id order
test_df = pd.read_csv(DATA_DIR/'test.csv')
has_seq = 'seq_id' in test_df.columns
n_test = len(test_df)
print('test.csv rows:', n_test, 'unique ids:', test_df['id'].nunique())
sample_df = pd.read_csv(DATA_DIR/'sample_submission.csv')
if sample_df.columns[0].startswith('Unnamed'):
    sample_df = sample_df.drop(columns=[sample_df.columns[0]])
assert set(['Id','Category']).issubset(sample_df.columns), 'sample_submission missing required columns'
sample_ids = sample_df['Id'].astype(str).values
print('sample_submission rows:', len(sample_ids))

IMG_SIZE = 300
VAL_SHORT = 336
val_tfm = T.Compose([
    T.Resize(VAL_SHORT, interpolation=T.InterpolationMode.BICUBIC),
    T.CenterCrop(IMG_SIZE),
    T.ToTensor(),
    T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

class TestDataset(Dataset):
    def __init__(self, df, img_dir, tfm):
        self.df = df.reset_index(drop=True)
        self.img_dir = Path(img_dir)
        self.tfm = tfm
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fn = str(r['file_name']) if 'file_name' in r else str(r['id'])
        p = self.img_dir/fn
        if not p.exists():
            p = self.img_dir/Path(fn).name
        try:
            img = Image.open(p).convert('RGB')
        except Exception:
            img = Image.new('RGB', (IMG_SIZE, IMG_SIZE), (0,0,0))
        img = self.tfm(img)
        sid = r['seq_id'] if 'seq_id' in r else None
        iid = str(r['id']) if 'id' in r else Path(fn).stem
        return img, sid, iid

def infer_checkpoint(ckpt_path, bs=64, tta=True):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    ds = TestDataset(test_df, TEST_IMG_DIR, val_tfm)
    dl = DataLoader(ds, batch_size=bs, shuffle=False, num_workers=8, pin_memory=True, persistent_workers=True)
    model = timm.create_model('tf_efficientnet_b3_ns', pretrained=False, num_classes=K)
    state = torch.load(ckpt_path, map_location='cpu')
    model.load_state_dict(state['model'] if isinstance(state, dict) and 'model' in state else state, strict=False)
    model = model.to(device).to(memory_format=torch.channels_last)
    model.eval()
    all_logits = []
    all_seqs = []
    all_ids = []
    with torch.no_grad():
        for xb, seqs, ids in dl:
            xb = xb.to(device, non_blocking=True)
            logits = model(xb)
            if tta:
                xb_flip = torch.flip(xb, dims=[3])
                logits_flip = model(xb_flip)
                logits = 0.5*(logits + logits_flip)
            all_logits.append(logits.float().cpu())
            all_seqs.extend(list(seqs))
            all_ids.extend(list(ids))
    logits = torch.cat(all_logits, 0).numpy()
    return logits, np.array(all_seqs, dtype=object), np.array(all_ids, dtype=object)

# Choose checkpoint
ckpt1 = Path('checkpoints/b3_fold0_best.pth')
if not ckpt1.exists():
    alt = FAST_DIR/'b3_fold0_best.pth'
    ckpt1 = alt if alt.exists() else ckpt1
print('Using checkpoint:', ckpt1)

logits, seqs, ids = infer_checkpoint(ckpt1, bs=64, tta=True)
assert logits.shape[0] == len(test_df), f'logits rows {logits.shape[0]} != test rows {len(test_df)}'

# Sequence-level averaging on test if seq_id present
if has_seq:
    df_tmp = pd.DataFrame({'seq_id': seqs})
    seq_indices = {}
    for i, s in enumerate(df_tmp['seq_id']):
        seq_indices.setdefault(s, []).append(i)
    logits_seq = logits.copy()
    for s, idxs in seq_indices.items():
        m = logits[idxs].mean(axis=0, keepdims=True)
        for i in idxs:
            logits_seq[i] = m
    logits = logits_seq
else:
    print('seq_id not in test.csv; skipping sequence averaging')

# Aggregate duplicate ids by mean logits
df_pred = pd.DataFrame({'id': ids})
for k in range(K):
    df_pred[f'l{k}'] = logits[:, k]
agg = df_pred.groupby('id', as_index=False).mean()
print('Unique ids predicted:', len(agg))

# Map predictions to sample_submission Id order; fallback to class 0 for missing ids
logit_cols = [c for c in agg.columns if c.startswith('l')]
id_to_vec = {iid: agg.loc[i, logit_cols].values for i, iid in enumerate(agg['id'].astype(str).values)}
vec0 = np.zeros(K, dtype=np.float32); vec0[class_to_idx.get(0, 0)] = 1.0
pred_idx_list = []
miss = 0
for iid in sample_ids:
    v = id_to_vec.get(str(iid))
    if v is None:
        miss += 1
        v = vec0
    pred_idx_list.append(int(np.argmax(v)))
print('Missing ids in predictions (filled with 0):', miss)
pred_cat = [idx_to_class[i] for i in pred_idx_list]

sub = pd.DataFrame({'Id': sample_ids, 'Category': pred_cat})
assert list(sub.columns) == ['Id','Category'] and len(sub) == len(sample_ids)
sub.to_csv('submission.csv', index=False)
print('Saved submission.csv with shape', sub.shape, 'and columns', list(sub.columns))

test.csv rows: 16877 unique ids: 16862
sample_submission rows: 16877
Using checkpoint: checkpoints/b3_fold0_best.pth


  model = create_fn(
  state = torch.load(ckpt_path, map_location='cpu')
