# PetFinder Pawpularity: Plan

Objectives:
- Win a medal (RMSE ≤ 17.097).
- Build a strong, GPU-accelerated image model with robust CV and fast iteration.

Milestones:
1) Environment + Data Check
   - Verify GPU availability, install PyTorch/torchvision, check image counts and CSVs.
   - Lock folds (StratifiedKFold on Pawpularity bins). Save folds.

2) Baseline
   - Simple image-only CNN (timm pretrained, e.g., convnext_tiny or efficientnet_v2_s).
   - Input size 384, light augs, MSE loss; predict Pawpularity scaled to 0-1 then rescale.
   - 5-fold CV, early stopping, mixed precision, EMA, cosine LR.

3) Add Metadata
   - Use train.csv binary attributes (Subject Focus, Eyes, etc.).
   - Tabular head + image backbone (late fusion).

4) Improve
   - Resolution sweep (384→512), TTA, better augs, CutMix/TrivialAugment optional.
   - Seeds x2, model variants, simple weighted blend.

5) Error Analysis
   - OOF diagnostics, bins by target, per-fold checks.

6) Submission
   - Inference with TTA, generate submission.csv.

Validation:
- 5-fold Stratified by binned Pawpularity (e.g., 10 bins), deterministic seeds.
- All transforms fitted per-fold. Cache OOF/test preds.

Logging/Speed:
- Print progress/time per epoch/fold, early stop patience, checkpoint best RMSE.
- Subsample for smoke tests before full runs.

Next:
- Run env check cell: GPU, package install, data sanity; then lock folds and request expert review.

In [3]:
# Environment and data sanity check
import sys, subprocess, time, os, glob, platform, json
print(f"Python: {sys.version.split()[0]} | Platform: {platform.platform()}")

def pip_run(args):
    print(f"\n[pip] {' '.join(args)}", flush=True)
    cmd = [sys.executable, '-m', 'pip'] + args
    subprocess.run(cmd, check=True)

# 1) Install GPU builds of torch/torchvision for CUDA 12.1 (pin to avoid surprises)
pip_run(['install', '--index-url', 'https://download.pytorch.org/whl/cu121', 'torch==2.5.1+cu121', 'torchvision==0.20.1+cu121'])

# 2) Install timm without deps to avoid touching torch/torchvision
pip_run(['install', 'timm', '--no-deps', '--upgrade-strategy', 'only-if-needed'])

# 3) Install remaining deps without upgrading torch
other_pkgs = ['albumentations>=1.4.0', 'opencv-python-headless', 'pandas', 'numpy', 'scikit-learn', 'tqdm', 'matplotlib']
pip_run(['install', '--upgrade-strategy', 'only-if-needed'] + other_pkgs)

import torch, torchvision
import pandas as pd, numpy as np

print(f"Torch: {torch.__version__} | TorchVision: {torchvision.__version__}")
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"GPU Count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    props = torch.cuda.get_device_properties(0)
    print(f"GPU Memory: {props.total_memory/1024**3:.1f} GB")

# Data sanity
train_csv = 'train.csv'; test_csv = 'test.csv'
train_dir = 'train'; test_dir = 'test'
assert os.path.exists(train_csv) and os.path.exists(test_csv), 'CSV files missing'
assert os.path.isdir(train_dir) and os.path.isdir(test_dir), 'Image dirs missing'
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)
print('train.csv shape:', train_df.shape, '| test.csv shape:', test_df.shape)
print('Columns (train):', list(train_df.columns))
print('Columns (test):', list(test_df.columns))

# Check image counts and missing files
train_imgs = set(os.path.splitext(os.path.basename(p))[0] for p in glob.glob(os.path.join(train_dir, '*.jpg')))
test_imgs = set(os.path.splitext(os.path.basename(p))[0] for p in glob.glob(os.path.join(test_dir, '*.jpg')))
missing_train = [i for i in train_df['Id'].astype(str).tolist() if i not in train_imgs]
missing_test = [i for i in test_df['Id'].astype(str).tolist() if i not in test_imgs]
print(f"Train images on disk: {len(train_imgs)} | in CSV: {len(train_df)} | missing: {len(missing_train)}")
print(f"Test images on disk: {len(test_imgs)} | in CSV: {len(test_df)} | missing: {len(missing_test)}")
if missing_train[:5]: print('Sample missing train ids:', missing_train[:5])
if missing_test[:5]: print('Sample missing test ids:', missing_test[:5])

# Target distribution quick stats
if 'Pawpularity' in train_df.columns:
    y = train_df['Pawpularity'].values
    print('Pawpularity stats:', {'min': int(np.min(y)), 'max': int(np.max(y)), 'mean': float(np.mean(y)), 'std': float(np.std(y))})
    # propose bins for stratification (20 bins using floor/5 capped to 19)
    bins = np.clip((y // 5).astype(int), 0, 19)
    uniq, counts = np.unique(bins, return_counts=True)
    print('Strat bins (floor/5) distribution sample:', dict(zip(uniq.tolist()[:10], counts.tolist()[:10])))

print('\nEnv & data check complete.')

Python: 3.11.0rc1 | Platform: Linux-6.8.0-1031-azure-x86_64-with-glibc2.35

[pip] install --index-url https://download.pytorch.org/whl/cu121 torch==2.5.1+cu121 torchvision==0.20.1+cu121


Looking in indexes: https://download.pytorch.org/whl/cu121


Collecting torch==2.5.1+cu121
  Downloading https://download.pytorch.org/whl/cu121/torch-2.5.1%2Bcu121-cp311-cp311-linux_x86_64.whl (780.5 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 780.5/780.5 MB 566.0 MB/s eta 0:00:00


Collecting torchvision==0.20.1+cu121
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.20.1%2Bcu121-cp311-cp311-linux_x86_64.whl (7.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.3/7.3 MB 371.7 MB/s eta 0:00:00
Collecting sympy==1.13.1
  Downloading https://download.pytorch.org/whl/sympy-1.13.1-py3-none-any.whl (6.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.2/6.2 MB 549.4 MB/s eta 0:00:00


Collecting nvidia-cufft-cu12==11.0.2.54
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.6/121.6 MB 559.6 MB/s eta 0:00:00
Collecting nvidia-nccl-cu12==2.21.5
  Downloading https://download.pytorch.org/whl/nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl (188.7 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 188.7/188.7 MB 561.8 MB/s eta 0:00:00


Collecting nvidia-cuda-runtime-cu12==12.1.105
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 823.6/823.6 KB 530.9 MB/s eta 0:00:00
Collecting nvidia-curand-cu12==10.3.2.106
  Downloading https://download.pytorch.org/whl/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.5/56.5 MB 316.1 MB/s eta 0:00:00
Collecting nvidia-nvtx-cu12==12.1.105
  Downloading https://download.pytorch.org/whl/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.1/99.1 KB 422.8 MB/s eta 0:00:00


Collecting fsspec
  Downloading https://download.pytorch.org/whl/fsspec-2024.6.1-py3-none-any.whl (177 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 177.6/177.6 KB 479.2 MB/s eta 0:00:00
Collecting nvidia-cuda-cupti-cu12==12.1.105
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.1/14.1 MB 310.1 MB/s eta 0:00:00


Collecting nvidia-cublas-cu12==12.1.3.1
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 410.6/410.6 MB 479.9 MB/s eta 0:00:00


Collecting jinja2
  Downloading https://download.pytorch.org/whl/Jinja2-3.1.4-py3-none-any.whl (133 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 133.3/133.3 KB 443.1 MB/s eta 0:00:00


Collecting nvidia-cuda-nvrtc-cu12==12.1.105
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.7/23.7 MB 290.7 MB/s eta 0:00:00


Collecting nvidia-cudnn-cu12==9.1.0.70
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 489.6 MB/s eta 0:00:00


Collecting nvidia-cusolver-cu12==11.4.5.107
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 124.2/124.2 MB 487.6 MB/s eta 0:00:00
Collecting triton==3.1.0
  Downloading https://download.pytorch.org/whl/triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 209.5/209.5 MB 482.8 MB/s eta 0:00:00


Collecting nvidia-cusparse-cu12==12.1.0.106
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 196.0/196.0 MB 494.4 MB/s eta 0:00:00


Collecting networkx
  Downloading https://download.pytorch.org/whl/networkx-3.3-py3-none-any.whl (1.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 446.7 MB/s eta 0:00:00


Collecting filelock
  Downloading https://download.pytorch.org/whl/filelock-3.13.1-py3-none-any.whl (11 kB)


Collecting typing-extensions>=4.8.0
  Downloading https://download.pytorch.org/whl/typing_extensions-4.12.2-py3-none-any.whl (37 kB)


Collecting pillow!=8.3.*,>=5.3.0
  Downloading https://download.pytorch.org/whl/pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.4/4.4 MB 382.5 MB/s eta 0:00:00


Collecting numpy
  Downloading https://download.pytorch.org/whl/numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 268.0 MB/s eta 0:00:00


Collecting nvidia-nvjitlink-cu12
  Downloading https://download.pytorch.org/whl/nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.7/39.7 MB 331.0 MB/s eta 0:00:00


Collecting mpmath<1.4,>=1.1.0
  Downloading https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 KB 535.9 MB/s eta 0:00:00
Collecting MarkupSafe>=2.0
  Downloading https://download.pytorch.org/whl/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28 kB)


Installing collected packages: mpmath, typing-extensions, sympy, pillow, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, networkx, MarkupSafe, fsspec, filelock, triton, nvidia-cusparse-cu12, nvidia-cudnn-cu12, jinja2, nvidia-cusolver-cu12, torch, torchvision


Successfully installed MarkupSafe-2.1.5 filelock-3.13.1 fsspec-2024.6.1 jinja2-3.1.4 mpmath-1.3.0 networkx-3.3 numpy-1.26.3 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.21.5 nvidia-nvjitlink-cu12-12.9.86 nvidia-nvtx-cu12-12.1.105 pillow-11.0.0 sympy-1.13.1 torch-2.5.1+cu121 torchvision-0.20.1+cu121 triton-3.1.0 typing-extensions-4.12.2





[pip] install timm --no-deps --upgrade-strategy only-if-needed


Collecting timm
  Downloading timm-1.0.20-py3-none-any.whl (2.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.5/2.5 MB 36.5 MB/s eta 0:00:00
Installing collected packages: timm


Successfully installed timm-1.0.20

[pip] install --upgrade-strategy only-if-needed albumentations>=1.4.0 opencv-python-headless pandas numpy scikit-learn tqdm matplotlib


Collecting albumentations>=1.4.0
  Downloading albumentations-2.0.8-py3-none-any.whl (369 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 369.4/369.4 KB 16.1 MB/s eta 0:00:00


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (54.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.0/54.0 MB 177.2 MB/s eta 0:00:00


Collecting pandas
  Downloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.4/12.4 MB 126.3 MB/s eta 0:00:00


Collecting numpy
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 428.2 MB/s eta 0:00:00


Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.7/9.7 MB 106.7 MB/s eta 0:00:00


Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.5/78.5 KB 370.8 MB/s eta 0:00:00


Collecting matplotlib
  Downloading matplotlib-3.10.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.7/8.7 MB 77.6 MB/s eta 0:00:00


Collecting albucore==0.0.24
  Downloading albucore-0.0.24-py3-none-any.whl (15 kB)
Collecting PyYAML
  Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (762 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 763.0/763.0 KB 495.6 MB/s eta 0:00:00


Collecting pydantic>=2.9.2
  Downloading pydantic-2.11.9-py3-none-any.whl (444 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 444.9/444.9 KB 524.9 MB/s eta 0:00:00


Collecting scipy>=1.10.0
  Downloading scipy-1.16.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.9 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 35.9/35.9 MB 94.3 MB/s eta 0:00:00


Collecting simsimd>=5.9.2
  Downloading simsimd-6.5.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (1.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 91.0 MB/s eta 0:00:00


Collecting stringzilla>=3.10.4
  Downloading stringzilla-4.0.13-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (496 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 496.5/496.5 KB 277.8 MB/s eta 0:00:00
Collecting opencv-python-headless
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (50.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 50.0/50.0 MB 227.9 MB/s eta 0:00:00
Collecting python-dateutil>=2.8.2
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 229.9/229.9 KB 520.5 MB/s eta 0:00:00


Collecting tzdata>=2022.7
  Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 347.8/347.8 KB 518.6 MB/s eta 0:00:00
Collecting pytz>=2020.1
  Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 509.2/509.2 KB 498.9 MB/s eta 0:00:00
Collecting threadpoolctl>=3.1.0
  Downloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)


Collecting joblib>=1.2.0
  Downloading joblib-1.5.2-py3-none-any.whl (308 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 308.4/308.4 KB 547.9 MB/s eta 0:00:00


Collecting cycler>=0.10
  Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB)


Collecting pillow>=8
  Downloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 173.6 MB/s eta 0:00:00
Collecting pyparsing>=2.3.1
  Downloading pyparsing-3.2.5-py3-none-any.whl (113 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 113.9/113.9 KB 472.4 MB/s eta 0:00:00


Collecting kiwisolver>=1.3.1
  Downloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 444.4 MB/s eta 0:00:00


Collecting contourpy>=1.0.1
  Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (355 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 355.2/355.2 KB 533.8 MB/s eta 0:00:00
Collecting packaging>=20.0
  Downloading packaging-25.0-py3-none-any.whl (66 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 66.5/66.5 KB 25.4 MB/s eta 0:00:00


Collecting fonttools>=4.22.0
  Downloading fonttools-4.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.0/5.0 MB 165.3 MB/s eta 0:00:00


Collecting annotated-types>=0.6.0
  Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)


Collecting pydantic-core==2.33.2
  Downloading pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 271.7 MB/s eta 0:00:00
Collecting typing-extensions>=4.12.2
  Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 KB 393.9 MB/s eta 0:00:00
Collecting typing-inspection>=0.4.0
  Downloading typing_inspection-0.4.1-py3-none-any.whl (14 kB)
Collecting six>=1.5
  Downloading six-1.17.0-py2.py3-none-any.whl (11 kB)


Installing collected packages: simsimd, pytz, tzdata, typing-extensions, tqdm, threadpoolctl, stringzilla, six, PyYAML, pyparsing, pillow, packaging, numpy, kiwisolver, joblib, fonttools, cycler, annotated-types, typing-inspection, scipy, python-dateutil, pydantic-core, opencv-python-headless, contourpy, scikit-learn, pydantic, pandas, matplotlib, albucore, albumentations


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
timm 1.0.20 requires huggingface_hub, which is not installed.
timm 1.0.20 requires safetensors, which is not installed.


Successfully installed PyYAML-6.0.2 albucore-0.0.24 albumentations-2.0.8 annotated-types-0.7.0 contourpy-1.3.3 cycler-0.12.1 fonttools-4.60.0 joblib-1.5.2 kiwisolver-1.4.9 matplotlib-3.10.6 numpy-1.26.4 opencv-python-headless-4.11.0.86 packaging-25.0 pandas-2.3.2 pillow-11.3.0 pydantic-2.11.9 pydantic-core-2.33.2 pyparsing-3.2.5 python-dateutil-2.9.0.post0 pytz-2025.2 scikit-learn-1.7.2 scipy-1.16.2 simsimd-6.5.3 six-1.17.0 stringzilla-4.0.13 threadpoolctl-3.6.0 tqdm-4.67.1 typing-extensions-4.15.0 typing-inspection-0.4.1 tzdata-2025.2




Torch: 2.5.1+cu121 | TorchVision: 0.20.1+cu121
GPU Available: False
GPU Count: 0
train.csv shape: (8920, 14) | test.csv shape: (992, 13)
Columns (train): ['Id', 'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur', 'Pawpularity']
Columns (test): ['Id', 'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']
Train images on disk: 8920 | in CSV: 8920 | missing: 0
Test images on disk: 992 | in CSV: 992 | missing: 0
Pawpularity stats: {'min': 1, 'max': 100, 'mean': 38.024775784753366, 'std': 20.646554573509412}
Strat bins (floor/5) distribution sample: {0: 184, 1: 147, 2: 267, 3: 528, 4: 1009, 5: 1369, 6: 1256, 7: 974, 8: 746, 9: 535}

Env & data check complete.




In [4]:
# Create and save stratified 5-fold splits (20 bins)
import os, time
import pandas as pd, numpy as np
from sklearn.model_selection import StratifiedKFold

t0 = time.time()
train_df = pd.read_csv('train.csv')
assert 'Id' in train_df.columns and 'Pawpularity' in train_df.columns, 'Missing Id/Pawpularity'

# 20-bin stratification via floor/5 capped to 19
y = train_df['Pawpularity'].values.astype(int)
bins = np.clip((y // 5).astype(int), 0, 19)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2025)
folds = np.full(len(train_df), -1, dtype=int)
for f, (_, val_idx) in enumerate(skf.split(np.zeros(len(bins)), bins)):
    folds[val_idx] = f

train_df['fold'] = folds
assert (train_df['fold'] >= 0).all(), 'Unassigned fold discovered'

# Save folds mapping for reuse
folds_path = 'folds.csv'
train_df[['Id', 'fold']].to_csv(folds_path, index=False)

# Summary
print('Saved:', folds_path, '| shape:', train_df.shape)
print('Fold distribution (counts):', train_df['fold'].value_counts().sort_index().to_dict())
print('Bins per fold (head):')
print(train_df.groupby('fold')['Pawpularity'].agg(['mean','std','min','max']).round(3))
print(f'Done in {time.time()-t0:.2f}s')

Saved: folds.csv | shape: (8920, 15)
Fold distribution (counts): {0: 1784, 1: 1784, 2: 1784, 3: 1784, 4: 1784}
Bins per fold (head):
        mean     std  min  max
fold                          
0     38.031  20.561    1  100
1     37.999  20.636    2  100
2     38.006  20.725    2  100
3     38.066  20.667    1  100
4     38.022  20.671    2  100
Done in 0.02s


In [5]:
# GPU diagnostics
import os, subprocess, sys, torch

def run_cmd(cmd):
    print(f"\n$ {' '.join(cmd)}", flush=True)
    try:
        out = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, check=False)
        print(out.stdout)
    except Exception as e:
        print(f"FAILED: {e}")

print('Env vars:')
for k in ['CUDA_VISIBLE_DEVICES','NVIDIA_VISIBLE_DEVICES','NVIDIA_DRIVER_CAPABILITIES']:
    print(f"  {k} = {os.getenv(k)}")

print('\nPyTorch CUDA:')
print('  torch_ver =', torch.__version__)
print('  torch.version.cuda =', torch.version.cuda)
print('  is_available =', torch.cuda.is_available())
print('  device_count =', torch.cuda.device_count())

run_cmd(['nvidia-smi'])
run_cmd(['bash','-lc','ls -l /dev/nvidia*'])
run_cmd(['bash','-lc','cat /proc/driver/nvidia/version || true'])
run_cmd(['bash','-lc','ldconfig -p | grep nvidia || true'])

print('\nDiagnostics complete.')

Env vars:
  CUDA_VISIBLE_DEVICES = None
  NVIDIA_VISIBLE_DEVICES = None
  NVIDIA_DRIVER_CAPABILITIES = None

PyTorch CUDA:
  torch_ver = 2.5.1+cu121
  torch.version.cuda = 12.1
  is_available = False
  device_count = 0

$ nvidia-smi


Failed to initialize NVML: Unknown Error


$ bash -lc ls -l /dev/nvidia*


crw-rw-rw- 1 root root 234,   0 Sep 22 04:48 /dev/nvidia-uvm
crw-rw-rw- 1 root root 234,   1 Sep 22 04:48 /dev/nvidia-uvm-tools
crw-rw-rw- 1 root root 195,   0 Sep 22 04:47 /dev/nvidia0
crw-rw-rw- 1 root root 195, 255 Sep 22 04:47 /dev/nvidiactl


$ bash -lc cat /proc/driver/nvidia/version || true


NVRM version: NVIDIA UNIX x86_64 Kernel Module  550.144.06  Mon Apr 14 05:50:23 UTC 2025
GCC version:  gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04.2) 


$ bash -lc ldconfig -p | grep nvidia || true


	libnvidia-ptxjitcompiler.so.1 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
	libnvidia-pkcs11.so.550.144.06 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-pkcs11.so.550.144.06
	libnvidia-pkcs11-openssl3.so.550.144.06 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-pkcs11-openssl3.so.550.144.06
	libnvidia-opencl.so.1 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.1
	libnvidia-nvvm.so.4 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so.4
	libnvidia-ml.so.1 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
	libnvidia-gpucomp.so.550.144.06 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-gpucomp.so.550.144.06
	libnvidia-cfg.so.1 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.1
	libnvidia-allocator.so.1 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.1


Diagnostics complete.




In [6]:
# CPU fallback: install extras for embeddings + boosted trees
import sys, subprocess
def pip_run(args):
    print(f"[pip] {' '.join(args)}", flush=True)
    subprocess.run([sys.executable, '-m', 'pip'] + args, check=True)

pkgs1 = ['install', '--upgrade-strategy', 'only-if-needed', 'huggingface_hub', 'safetensors']
pkgs2 = ['install', '--upgrade-strategy', 'only-if-needed', 'lightgbm', 'xgboost']
pip_run(pkgs1)
pip_run(pkgs2)
print('Installed huggingface_hub, safetensors, lightgbm, xgboost')

[pip] install --upgrade-strategy only-if-needed huggingface_hub safetensors


Collecting huggingface_hub


  Downloading huggingface_hub-0.35.0-py3-none-any.whl (563 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 563.4/563.4 KB 21.8 MB/s eta 0:00:00


Collecting safetensors
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (485 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 485.8/485.8 KB 50.1 MB/s eta 0:00:00
Collecting typing-extensions>=3.7.4.3
  Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 KB 397.7 MB/s eta 0:00:00
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl (64 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.7/64.7 KB 418.4 MB/s eta 0:00:00


Collecting hf-xet<2.0.0,>=1.1.3
  Downloading hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 14.7 MB/s eta 0:00:00
Collecting filelock
  Downloading filelock-3.19.1-py3-none-any.whl (15 kB)
Collecting packaging>=20.9
  Downloading packaging-25.0-py3-none-any.whl (66 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 66.5/66.5 KB 435.0 MB/s eta 0:00:00
Collecting tqdm>=4.42.1
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.5/78.5 KB 422.1 MB/s eta 0:00:00
Collecting fsspec>=2023.5.0
  Downloading fsspec-2025.9.0-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.3/199.3 KB 417.3 MB/s eta 0:00:00


Collecting pyyaml>=5.1
  Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (762 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 763.0/763.0 KB 264.9 MB/s eta 0:00:00
Collecting idna<4,>=2.5
  Downloading idna-3.10-py3-none-any.whl (70 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 70.4/70.4 KB 450.3 MB/s eta 0:00:00
Collecting certifi>=2017.4.17
  Downloading certifi-2025.8.3-py3-none-any.whl (161 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.2/161.2 KB 477.8 MB/s eta 0:00:00
Collecting urllib3<3,>=1.21.1
  Downloading urllib3-2.5.0-py3-none-any.whl (129 kB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 129.8/129.8 KB 443.3 MB/s eta 0:00:00
Collecting charset_normalizer<4,>=2
  Downloading charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (150 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 150.3/150.3 KB 482.4 MB/s eta 0:00:00


Installing collected packages: urllib3, typing-extensions, tqdm, safetensors, pyyaml, packaging, idna, hf-xet, fsspec, filelock, charset_normalizer, certifi, requests, huggingface_hub


Successfully installed certifi-2025.8.3 charset_normalizer-3.4.3 filelock-3.19.1 fsspec-2025.9.0 hf-xet-1.1.10 huggingface_hub-0.35.0 idna-3.10 packaging-25.0 pyyaml-6.0.2 requests-2.32.5 safetensors-0.6.2 tqdm-4.67.1 typing-extensions-4.15.0 urllib3-2.5.0
[pip] install --upgrade-strategy only-if-needed lightgbm xgboost




Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl (3.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.6/3.6 MB 7.4 MB/s eta 0:00:00
Collecting xgboost
  Downloading xgboost-3.0.5-py3-none-manylinux_2_28_x86_64.whl (94.9 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 94.9/94.9 MB 330.3 MB/s eta 0:00:00


Collecting numpy>=1.17.0
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 440.8 MB/s eta 0:00:00


Collecting scipy
  Downloading scipy-1.16.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.9 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 35.9/35.9 MB 18.6 MB/s eta 0:00:00
Collecting nvidia-nccl-cu12
  Downloading nvidia_nccl_cu12-2.28.3-py3-none-manylinux_2_18_x86_64.whl (295.9 MB)


     ━━━━━━━━━━━━━━━━━━━━━╸                 170.9/295.9 MB 22.5 MB/s eta 0:00:06


ERROR: Exception:
Traceback (most recent call last):
  File "/usr/lib/python3/dist-packages/pip/_vendor/urllib3/response.py", line 438, in _error_catcher
    yield
  File "/usr/lib/python3/dist-packages/pip/_vendor/urllib3/response.py", line 519, in read
    data = self._fp.read(amt) if not fp_closed else b""
           ^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/http/client.py", line 465, in read
    s = self.fp.read(amt)
        ^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/ssl.py", line 1278, in recv_into
    return self.read(nbytes, buffer)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/ssl.py", line 1134, in read
    return self._sslobj.read(len, buffer)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TimeoutError: The read operation timed out

During handling of the above exception, another exception occurred:

Traceback (most recent c

CalledProcessError: Command '['/usr/bin/python3.11', '-m', 'pip', 'install', '--upgrade-strategy', 'only-if-needed', 'lightgbm', 'xgboost']' returned non-zero exit status 2.

In [7]:
# CPU fallback: extract 224px frozen embeddings for two timm models
import os, time, gc, sys, math, glob
import numpy as np, pandas as pd
from PIL import Image
import torch
import timm
from torch.utils.data import Dataset, DataLoader
from timm.data import resolve_data_config, create_transform

torch.set_num_threads(8)

class ImageDataset(Dataset):
    def __init__(self, ids, folder, transform):
        self.ids = ids
        self.folder = folder
        self.transform = transform
    def __len__(self):
        return len(self.ids)
    def __getitem__(self, idx):
        img_id = self.ids[idx]
        path = os.path.join(self.folder, f"{img_id}.jpg")
        img = Image.open(path).convert('RGB')
        img = self.transform(img)
        return img

def extract_embeddings(model_name, img_size=224, batch_size=128, num_workers=8):
    t0 = time.time()
    print(f"\n[Emb] {model_name} @ {img_size}")
    train_df = pd.read_csv('train.csv')
    test_df = pd.read_csv('test.csv')
    train_ids = train_df['Id'].tolist()
    test_ids = test_df['Id'].tolist()

    # Model
    model = timm.create_model(model_name, pretrained=True, num_classes=0, global_pool='avg')
    model.eval()
    device = torch.device('cpu')
    model.to(device)

    # Transforms based on timm config
    cfg = resolve_data_config({}, model=model)
    cfg['input_size'] = (3, img_size, img_size)
    transform = create_transform(**cfg, is_training=False)

    # Datasets/DataLoaders
    train_ds = ImageDataset(train_ids, 'train', transform)
    test_ds = ImageDataset(test_ids, 'test', transform)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=False)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=False)

    # Infer embedding dimension
    with torch.no_grad():
        sample = next(iter(train_loader))[:1].to(device)
        emb_dim = model(sample).shape[1]
    print(f"Embedding dim: {emb_dim}")

    def run_loader(loader, n_items):
        X = np.zeros((n_items, emb_dim), dtype=np.float32)
        i0 = 0
        with torch.no_grad():
            for i, xb in enumerate(loader):
                xb = xb.to(device)
                feats = model(xb).cpu().numpy().astype(np.float32)
                X[i0:i0+feats.shape[0]] = feats
                i0 += feats.shape[0]
                if (i+1) % 20 == 0:
                    print(f"  Batches {i+1}/{math.ceil(n_items/loader.batch_size)} | rows {i0}/{n_items} | elapsed {time.time()-t0:.1f}s", flush=True)
        return X

    X_tr = run_loader(train_loader, len(train_ds))
    X_te = run_loader(test_loader, len(test_ds))

    tr_out = f"X_img_train_{model_name.replace('/', '_')}_{img_size}.npy"
    te_out = f"X_img_test_{model_name.replace('/', '_')}_{img_size}.npy"
    np.save(tr_out, X_tr)
    np.save(te_out, X_te)
    print(f"Saved {tr_out} {X_tr.shape}, {te_out} {X_te.shape} | time {time.time()-t0:.1f}s")
    del model, X_tr, X_te, train_loader, test_loader, train_ds, test_ds
    gc.collect()

models = [
    ('tf_efficientnet_b0', 224),
    ('swin_tiny_patch4_window7_224', 224),
]

for name, sz in models:
    extract_embeddings(name, img_size=sz, batch_size=128, num_workers=8)
print('Embeddings extraction complete.')

  from .autonotebook import tqdm as notebook_tqdm



[Emb] tf_efficientnet_b0 @ 224


Embedding dim: 1280


  Batches 20/70 | rows 2560/8920 | elapsed 19.2s


  Batches 40/70 | rows 5120/8920 | elapsed 34.0s


  Batches 60/70 | rows 7680/8920 | elapsed 48.5s


Saved X_img_train_tf_efficientnet_b0_224.npy (8920, 1280), X_img_test_tf_efficientnet_b0_224.npy (992, 1280) | time 62.3s

[Emb] swin_tiny_patch4_window7_224 @ 224


Embedding dim: 768


  Batches 20/70 | rows 2560/8920 | elapsed 54.0s


  Batches 40/70 | rows 5120/8920 | elapsed 105.1s


  Batches 60/70 | rows 7680/8920 | elapsed 155.9s


Saved X_img_train_swin_tiny_patch4_window7_224_224.npy (8920, 768), X_img_test_swin_tiny_patch4_window7_224_224.npy (992, 768) | time 201.3s
Embeddings extraction complete.


In [8]:
# CPU fallback: build metadata + simple image stats and save aligned arrays
import os, time, cv2, numpy as np, pandas as pd

def compute_img_stats(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    if img is None:
        return [0,0,0.0, 0.0,0.0,0.0, 0.0,0.0,0.0]
    h, w = img.shape[:2]
    aspect = (w / max(h,1)) if h>0 else 0.0
    # per-channel means/std in BGR
    ch_means = img.reshape(-1,3).mean(axis=0).tolist()
    ch_stds = img.reshape(-1,3).std(axis=0).tolist()
    # blur metric: Laplacian variance (grayscale)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    lap = cv2.Laplacian(gray, cv2.CV_64F)
    lap_var = float(lap.var())
    return [w, h, aspect] + ch_means + ch_stds + [lap_var]

t0 = time.time()
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

meta_cols = ['Subject Focus','Eyes','Face','Near','Action','Accessory','Group','Collage','Human','Occlusion','Info','Blur']
assert all(c in train_df.columns for c in meta_cols), 'Meta columns missing'

def build_features(df, folder):
    ids = df['Id'].tolist()
    X_meta = df[meta_cols].astype(np.float32).values
    stats = []
    for i, id_ in enumerate(ids):
        if (i+1) % 1000 == 0:
            print(f'  Stats {i+1}/{len(ids)} ...', flush=True)
        stats.append(compute_img_stats(os.path.join(folder, f'{id_}.jpg')))
    X_stats = np.asarray(stats, dtype=np.float32)
    X = np.concatenate([X_meta, X_stats], axis=1)
    return ids, X

print('Building train features...')
tr_ids, X_tr = build_features(train_df, 'train')
print('Building test features...')
te_ids, X_te = build_features(test_df, 'test')

np.save('X_meta_stats_train.npy', X_tr)
np.save('X_meta_stats_test.npy', X_te)
pd.Series(tr_ids, name='Id').to_csv('ids_train_order.csv', index=False)
pd.Series(te_ids, name='Id').to_csv('ids_test_order.csv', index=False)
print('Saved X_meta_stats_train.npy', X_tr.shape, '| X_meta_stats_test.npy', X_te.shape)
print(f'Done in {time.time()-t0:.1f}s')

Building train features...


  Stats 1000/8920 ...


  Stats 2000/8920 ...


  Stats 3000/8920 ...


  Stats 4000/8920 ...


  Stats 5000/8920 ...


  Stats 6000/8920 ...


  Stats 7000/8920 ...


  Stats 8000/8920 ...


Building test features...


Saved X_meta_stats_train.npy (8920, 22) | X_meta_stats_test.npy (992, 22)
Done in 781.4s


In [10]:
# CPU fallback: 5-fold CV training on embeddings + meta-stats; blend and create submission
import os, glob, time, math, json
import numpy as np, pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

def load_feature_set(name):
    if name == 'meta_stats':
        X_tr = np.load('X_meta_stats_train.npy') if os.path.exists('X_meta_stats_train.npy') else None
        X_te = np.load('X_meta_stats_test.npy') if os.path.exists('X_meta_stats_test.npy') else None
        return X_tr, X_te
    elif name.startswith('emb:'):
        tag = name.split(':',1)[1]
        tr_path = f"X_img_train_{tag}.npy"
        te_path = f"X_img_test_{tag}.npy"
        X_tr = np.load(tr_path) if os.path.exists(tr_path) else None
        X_te = np.load(te_path) if os.path.exists(te_path) else None
        return X_tr, X_te
    else:
        return None, None

def discover_embeddings():
    feats = []
    for tr_path in sorted(glob.glob('X_img_train_*.npy')):
        tag = tr_path[len('X_img_train_'):-len('.npy')]
        te_path = f"X_img_test_{tag}.npy"
        if os.path.exists(te_path):
            feats.append(f"emb:{tag}")
    return feats

# Try importing boosted trees; fallback to sklearn if unavailable
lgbm_ok = False; xgb_ok = False
try:
    import lightgbm as lgb
    lgbm_ok = True
except Exception as e:
    print('LightGBM not available:', e)
try:
    from xgboost import XGBRegressor
    xgb_ok = True
except Exception as e:
    print('XGBoost not available:', e)

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
folds_df = pd.read_csv('folds.csv')
id2fold = dict(zip(folds_df['Id'], folds_df['fold']))
train_df['fold'] = train_df['Id'].map(id2fold)
y = train_df['Pawpularity'].values.astype(float)

# Gather feature sets available
feature_sets = []
if os.path.exists('X_meta_stats_train.npy') and os.path.exists('X_meta_stats_test.npy'):
    feature_sets.append('meta_stats')
feature_sets += discover_embeddings()
print('Feature sets found:', feature_sets)
assert len(feature_sets) > 0, 'No feature sets found yet. Run extraction cells first.'

results = {}
oof_blend = np.zeros(len(train_df), dtype=float)
test_preds_for_blend = []
weights = []

for fs in feature_sets:
    X_tr, X_te = load_feature_set(fs)
    if X_tr is None or X_te is None:
        print(f'Skipping {fs}: files missing')
        continue
    print(f'Feature set {fs}: train {X_tr.shape}, test {X_te.shape}')

    oof = np.zeros(len(train_df), dtype=float)
    te_pred_accum = np.zeros(len(test_df), dtype=float)
    fold_rmses = []

    for fold in range(5):
        tr_idx = np.where(train_df['fold'].values != fold)[0]
        va_idx = np.where(train_df['fold'].values == fold)[0]
        X_tr_fold, y_tr_fold = X_tr[tr_idx], y[tr_idx]
        X_va_fold, y_va_fold = X_tr[va_idx], y[va_idx]

        model_name = None
        if lgbm_ok:
            model_name = 'lgbm'
            dtrain = lgb.Dataset(X_tr_fold, label=y_tr_fold)
            dvalid = lgb.Dataset(X_va_fold, label=y_va_fold)
            params = dict(objective='regression', metric='rmse', learning_rate=0.03,
                          num_leaves=64, min_data_in_leaf=20, feature_fraction=0.8,
                          bagging_fraction=0.8, bagging_freq=1, verbosity=-1)
            gbm = lgb.train(params, dtrain, num_boost_round=5000, valid_sets=[dvalid],
                            valid_names=['valid'], callbacks=[lgb.early_stopping(200), lgb.log_evaluation(100)])
            va_pred = gbm.predict(X_va_fold, num_iteration=gbm.best_iteration)
            te_pred = gbm.predict(X_te, num_iteration=gbm.best_iteration)
        elif xgb_ok:
            model_name = 'xgb'
            xgb = XGBRegressor(n_estimators=5000, learning_rate=0.03, max_depth=7, subsample=0.8, colsample_bytree=0.8,
                               reg_alpha=0.0, reg_lambda=1.0, tree_method='hist', random_state=42)
            xgb.fit(X_tr_fold, y_tr_fold,
                    eval_set=[(X_va_fold, y_va_fold)], eval_metric='rmse', verbose=100,
                    callbacks=[])
            va_pred = xgb.predict(X_va_fold)
            te_pred = xgb.predict(X_te)
        else:
            model_name = 'ridge'
            scaler = StandardScaler(with_mean=True, with_std=True)
            X_tr_s = scaler.fit_transform(X_tr_fold)
            X_va_s = scaler.transform(X_va_fold)
            X_te_s = scaler.transform(X_te)
            ridge = RidgeCV(alphas=[0.1, 0.3, 1.0, 3.0, 10.0], cv=5, scoring='neg_root_mean_squared_error')
            ridge.fit(X_tr_s, y_tr_fold)
            va_pred = ridge.predict(X_va_s)
            te_pred = ridge.predict(X_te_s)

        oof[va_idx] = va_pred
        fold_rmse = rmse(y_va_fold, va_pred)
        fold_rmses.append(fold_rmse)
        te_pred_accum += te_pred
        print(f'  {fs} fold {fold} [{model_name}] RMSE: {fold_rmse:.4f}')

    te_pred_mean = te_pred_accum / 5.0
    fs_oof_rmse = rmse(y, oof)
    results[fs] = {'oof_rmse': fs_oof_rmse, 'fold_rmses': fold_rmses}
    np.save(f'oof_{fs}.npy', oof)
    np.save(f'test_pred_{fs}.npy', te_pred_mean)
    print(f'{fs} OOF RMSE: {fs_oof_rmse:.5f}')

    test_preds_for_blend.append(te_pred_mean)
    # inverse-variance style weight; avoid div by zero
    w = 1.0 / max(fs_oof_rmse**2, 1e-6)
    weights.append(w)

# Blend available models
if len(test_preds_for_blend) == 0:
    raise RuntimeError('No successful models to blend.')
weights = np.array(weights, dtype=float)
weights = weights / weights.sum()
print('Blend weights:', {fs: float(w) for fs, w in zip(results.keys(), weights)})
stacked = np.vstack(test_preds_for_blend)
blend_test = (weights.reshape(-1,1) * stacked).sum(axis=0)

# Clip predictions and save submission
blend_test = np.clip(blend_test, 1.0, 100.0)
sub = pd.DataFrame({'Id': test_df['Id'], 'Pawpularity': blend_test})
sub.to_csv('submission.csv', index=False)
print('Saved submission.csv. Head:')
print(sub.head())
print('Results summary:', json.dumps(results, indent=2))

Feature sets found: ['meta_stats', 'emb:swin_tiny_patch4_window7_224_224', 'emb:tf_efficientnet_b0_224']
Feature set meta_stats: train (8920, 22), test (992, 22)
Training until validation scores don't improve for 200 rounds
[100]	valid's rmse: 20.4865


[200]	valid's rmse: 20.6486
Early stopping, best iteration is:
[62]	valid's rmse: 20.4324
  meta_stats fold 0 [lgbm] RMSE: 20.4324
Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 20.842
[200]	valid's rmse: 21.1366
Early stopping, best iteration is:
[5]	valid's rmse: 20.6156
  meta_stats fold 1 [lgbm] RMSE: 20.6156
Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 20.8778
[200]	valid's rmse: 21.0774


Early stopping, best iteration is:
[16]	valid's rmse: 20.695
  meta_stats fold 2 [lgbm] RMSE: 20.6950
Training until validation scores don't improve for 200 rounds
[100]	valid's rmse: 20.7922


[200]	valid's rmse: 20.9894
Early stopping, best iteration is:
[32]	valid's rmse: 20.6158
  meta_stats fold 3 [lgbm] RMSE: 20.6158
Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 20.8897
[200]	valid's rmse: 21.0811
Early stopping, best iteration is:
[13]	valid's rmse: 20.6591
  meta_stats fold 4 [lgbm] RMSE: 20.6591
meta_stats OOF RMSE: 20.60378
Feature set emb:swin_tiny_patch4_window7_224_224: train (8920, 768), test (992, 768)


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 17.8075


[200]	valid's rmse: 17.6133


[300]	valid's rmse: 17.6113


[400]	valid's rmse: 17.6335


Early stopping, best iteration is:
[238]	valid's rmse: 17.5942
  emb:swin_tiny_patch4_window7_224_224 fold 0 [lgbm] RMSE: 17.5942


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 18.5057


[200]	valid's rmse: 18.4667


[300]	valid's rmse: 18.4493


[400]	valid's rmse: 18.4496


Early stopping, best iteration is:
[273]	valid's rmse: 18.4447
  emb:swin_tiny_patch4_window7_224_224 fold 1 [lgbm] RMSE: 18.4447


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 18.5767


[200]	valid's rmse: 18.4363


[300]	valid's rmse: 18.4152


[400]	valid's rmse: 18.4215


[500]	valid's rmse: 18.4099


[600]	valid's rmse: 18.4084


Early stopping, best iteration is:
[474]	valid's rmse: 18.4051
  emb:swin_tiny_patch4_window7_224_224 fold 2 [lgbm] RMSE: 18.4051


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 18.3535


[200]	valid's rmse: 18.2623


[300]	valid's rmse: 18.2307


[400]	valid's rmse: 18.2382


[500]	valid's rmse: 18.2368


Early stopping, best iteration is:
[326]	valid's rmse: 18.2243
  emb:swin_tiny_patch4_window7_224_224 fold 3 [lgbm] RMSE: 18.2243


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 18.7809


[200]	valid's rmse: 18.642


[300]	valid's rmse: 18.6101


[400]	valid's rmse: 18.5916


[500]	valid's rmse: 18.5965


[600]	valid's rmse: 18.5969
Early stopping, best iteration is:
[410]	valid's rmse: 18.5834
  emb:swin_tiny_patch4_window7_224_224 fold 4 [lgbm] RMSE: 18.5834
emb:swin_tiny_patch4_window7_224_224 OOF RMSE: 18.25365
Feature set emb:tf_efficientnet_b0_224: train (8920, 1280), test (992, 1280)


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 17.5537


[200]	valid's rmse: 17.5098


[300]	valid's rmse: 17.5496


Early stopping, best iteration is:
[183]	valid's rmse: 17.4973
  emb:tf_efficientnet_b0_224 fold 0 [lgbm] RMSE: 17.4973


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 18.1929


[200]	valid's rmse: 18.1584


[300]	valid's rmse: 18.1706


Early stopping, best iteration is:
[156]	valid's rmse: 18.1494
  emb:tf_efficientnet_b0_224 fold 1 [lgbm] RMSE: 18.1494


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 18.4794


[200]	valid's rmse: 18.4558


[300]	valid's rmse: 18.4649


Early stopping, best iteration is:
[129]	valid's rmse: 18.4391
  emb:tf_efficientnet_b0_224 fold 2 [lgbm] RMSE: 18.4391


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 18.0803


[200]	valid's rmse: 18.0345


[300]	valid's rmse: 18.0611


Early stopping, best iteration is:
[172]	valid's rmse: 18.015
  emb:tf_efficientnet_b0_224 fold 3 [lgbm] RMSE: 18.0150


Training until validation scores don't improve for 200 rounds


[100]	valid's rmse: 18.3521


[200]	valid's rmse: 18.31


[300]	valid's rmse: 18.3303


Early stopping, best iteration is:
[145]	valid's rmse: 18.298
  emb:tf_efficientnet_b0_224 fold 4 [lgbm] RMSE: 18.2980
emb:tf_efficientnet_b0_224 OOF RMSE: 18.08268
Blend weights: {'meta_stats': 0.27992766980776335, 'emb:swin_tiny_patch4_window7_224_224': 0.3566479893738934, 'emb:tf_efficientnet_b0_224': 0.3634243408183432}
Saved submission.csv. Head:
                                 Id  Pawpularity
0  ee51b99832f1ba868f646df93d2b6b81    49.616345
1  caddfb3f8bff9c4b95dbe022018eea21    33.025846
2  582eeabd4a448a53ebb79995888a4b0b    35.647525
3  afc1ad7f0c5eea880759d09e77f7deee    32.913339
4  d5bdf3446e86ce4ec67ce7a00f1cccc2    32.701493
Results summary: {
  "meta_stats": {
    "oof_rmse": 20.603775323132393,
    "fold_rmses": [
      20.432436381299492,
      20.61560497149953,
      20.694954573731803,
      20.615822958224896,
      20.65906246566223
    ]
  },
  "emb:swin_tiny_patch4_window7_224_224": {
    "oof_rmse": 18.25365466088792,
    "fold_rmses": [
      17.5942140123543

In [16]:
# Extract additional CPU-friendly embeddings (per expert ROI list) incl. OpenCLIP
import os, time, gc, math, subprocess, sys
import numpy as np, pandas as pd
import torch
import timm
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from timm.data import resolve_data_config, create_transform

torch.set_num_threads(8)

def ensure_openclip():
    try:
        import open_clip  # noqa
    except Exception:
        print("Installing open-clip-torch (no-deps)...", flush=True)
        subprocess.run([sys.executable, '-m', 'pip', 'install', '--no-deps', '--upgrade-strategy', 'only-if-needed', 'open-clip-torch'], check=True)
    # ensure runtime deps without touching torch
    try:
        import ftfy  # noqa
    except Exception:
        print("Installing ftfy...", flush=True)
        subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade-strategy', 'only-if-needed', 'ftfy'], check=True)
    try:
        import regex  # noqa
    except Exception:
        print("Installing regex...", flush=True)
        subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade-strategy', 'only-if-needed', 'regex'], check=True)
    return True

class ImageDataset(Dataset):
    def __init__(self, ids, folder, transform):
        self.ids = ids
        self.folder = folder
        self.transform = transform
    def __len__(self):
        return len(self.ids)
    def __getitem__(self, idx):
        img_id = self.ids[idx]
        path = os.path.join(self.folder, f"{img_id}.jpg")
        img = Image.open(path).convert('RGB')
        img = self.transform(img)
        return img

def extract_timm_embeddings(model_name, img_size=224, batch_size=128, num_workers=8):
    tr_out = f"X_img_train_{model_name.replace('/', '_')}_{img_size}.npy"
    te_out = f"X_img_test_{model_name.replace('/', '_')}_{img_size}.npy"
    if os.path.exists(tr_out) and os.path.exists(te_out):
        print(f"[Skip] {model_name} exists: {tr_out}, {te_out}")
        return
    t0 = time.time()
    print(f"\n[Emb] {model_name} @ {img_size}", flush=True)
    train_df = pd.read_csv('train.csv')
    test_df = pd.read_csv('test.csv')
    train_ids = train_df['Id'].tolist()
    test_ids = test_df['Id'].tolist()

    model = timm.create_model(model_name, pretrained=True, num_classes=0, global_pool='avg')
    model.eval().to('cpu')

    cfg = resolve_data_config({}, model=model)
    cfg['input_size'] = (3, img_size, img_size)
    transform = create_transform(**cfg, is_training=False)

    train_ds = ImageDataset(train_ids, 'train', transform)
    test_ds = ImageDataset(test_ids, 'test', transform)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=False)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=False)

    with torch.no_grad():
        sample = next(iter(train_loader))[:1]
        emb_dim = model(sample).shape[1]
    print(f"Embedding dim: {emb_dim}")

    def run_loader(loader, n_items):
        X = np.zeros((n_items, emb_dim), dtype=np.float32)
        i0 = 0
        with torch.no_grad():
            for i, xb in enumerate(loader):
                feats = model(xb).cpu().numpy().astype(np.float32)
                X[i0:i0+feats.shape[0]] = feats
                i0 += feats.shape[0]
                if (i+1) % 20 == 0:
                    print(f"  Batches {i+1}/{math.ceil(n_items/loader.batch_size)} | rows {i0}/{n_items} | elapsed {time.time()-t0:.1f}s", flush=True)
        return X

    X_tr = run_loader(train_loader, len(train_ds))
    X_te = run_loader(test_loader, len(test_ds))
    np.save(tr_out, X_tr); np.save(te_out, X_te)
    print(f"Saved {tr_out} {X_tr.shape}, {te_out} {X_te.shape} | time {time.time()-t0:.1f}s")
    del model, X_tr, X_te, train_loader, test_loader, train_ds, test_ds
    gc.collect()

def extract_openclip_vitb32(img_size=224, batch_size=128, num_workers=8):
    tag = f"openclip_vit_b32_{img_size}"
    tr_out = f"X_img_train_{tag}.npy"
    te_out = f"X_img_test_{tag}.npy"
    if os.path.exists(tr_out) and os.path.exists(te_out):
        print(f"[Skip] OpenCLIP ViT-B/32 exists: {tr_out}, {te_out}")
        return
    ensure_openclip()
    import open_clip
    from torchvision import transforms as T

    t0 = time.time()
    print(f"\n[Emb] OpenCLIP ViT-B/32 @ {img_size}", flush=True)
    model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k', device='cpu')
    model.eval()
    preprocess = T.Compose([
        T.Resize(img_size, interpolation=T.InterpolationMode.BICUBIC),
        T.CenterCrop(img_size),
        T.ToTensor(),
        T.Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711)),
    ])

    class OpenClipDS(Dataset):
        def __init__(self, ids, folder):
            self.ids = ids; self.folder = folder
        def __len__(self): return len(self.ids)
        def __getitem__(self, idx):
            img = Image.open(os.path.join(self.folder, f"{self.ids[idx]}.jpg")).convert('RGB')
            return preprocess(img)

    train_df = pd.read_csv('train.csv'); test_df = pd.read_csv('test.csv')
    train_ids = train_df['Id'].tolist(); test_ids = test_df['Id'].tolist()
    train_loader = DataLoader(OpenClipDS(train_ids, 'train'), batch_size=batch_size, shuffle=False, num_workers=num_workers)
    test_loader = DataLoader(OpenClipDS(test_ids, 'test'), batch_size=batch_size, shuffle=False, num_workers=num_workers)

    with torch.no_grad():
        sample = next(iter(train_loader))[:1]
        feats = model.encode_image(sample)
        emb_dim = feats.shape[1]
    print(f"Embedding dim: {emb_dim}")

    def run_loader(loader, n_items):
        X = np.zeros((n_items, emb_dim), dtype=np.float32)
        i0 = 0
        with torch.no_grad():
            for i, xb in enumerate(loader):
                feats = model.encode_image(xb).float().cpu().numpy().astype(np.float32)
                X[i0:i0+feats.shape[0]] = feats
                i0 += feats.shape[0]
                if (i+1) % 20 == 0:
                    print(f"  Batches {i+1}/{math.ceil(n_items/loader.batch_size)} | rows {i0}/{n_items} | elapsed {time.time()-t0:.1f}s", flush=True)
        return X

    X_tr = run_loader(train_loader, len(train_ids))
    X_te = run_loader(test_loader, len(test_ids))
    np.save(tr_out, X_tr); np.save(te_out, X_te)
    print(f"Saved {tr_out} {X_tr.shape}, {te_out} {X_te.shape} | time {time.time()-t0:.1f}s")
    del model, X_tr, X_te, train_loader, test_loader
    gc.collect()

# Queue models (fast ROI on CPU)
models = [
    ('convnext_tiny_in22k', 224),
    ('tf_efficientnetv2_s_in21k', 224),
    ('swin_small_patch4_window7_224', 224),
    ('vit_base_patch16_224', 224),
    ('deit3_small_patch16_224', 224),
]

for name, sz in models:
    try:
        extract_timm_embeddings(name, img_size=sz, batch_size=128, num_workers=8)
    except Exception as e:
        print(f"[Warn] Failed {name}: {e}")

# OpenCLIP ViT-B/32
try:
    extract_openclip_vitb32(img_size=224, batch_size=128, num_workers=8)
except Exception as e:
    print(f"[Warn] Failed OpenCLIP ViT-B/32: {e}")
print('Additional embeddings extraction done.')

[Skip] convnext_tiny_in22k exists: X_img_train_convnext_tiny_in22k_224.npy, X_img_test_convnext_tiny_in22k_224.npy
[Skip] tf_efficientnetv2_s_in21k exists: X_img_train_tf_efficientnetv2_s_in21k_224.npy, X_img_test_tf_efficientnetv2_s_in21k_224.npy
[Skip] swin_small_patch4_window7_224 exists: X_img_train_swin_small_patch4_window7_224_224.npy, X_img_test_swin_small_patch4_window7_224_224.npy
[Skip] vit_base_patch16_224 exists: X_img_train_vit_base_patch16_224_224.npy, X_img_test_vit_base_patch16_224_224.npy
[Skip] deit3_small_patch16_224 exists: X_img_train_deit3_small_patch16_224_224.npy, X_img_test_deit3_small_patch16_224_224.npy
Installing open-clip-torch (no-deps)...


Collecting open-clip-torch
  Downloading open_clip_torch-3.2.0-py3-none-any.whl (1.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.5/1.5 MB 42.0 MB/s eta 0:00:00
Installing collected packages: open-clip-torch
Successfully installed open-clip-torch-3.2.0
Installing ftfy...




Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.8/44.8 KB 3.4 MB/s eta 0:00:00
Collecting wcwidth
  Downloading wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)


Installing collected packages: wcwidth, ftfy
Successfully installed ftfy-6.3.1 wcwidth-0.2.13
Installing regex...


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
open-clip-torch 3.2.0 requires regex, which is not installed.


Collecting regex
  Downloading regex-2025.9.18-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (798 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 799.0/799.0 KB 29.6 MB/s eta 0:00:00


Installing collected packages: regex
Successfully installed regex-2025.9.18



[Emb] OpenCLIP ViT-B/32 @ 224


Embedding dim: 512


  Batches 20/70 | rows 2560/8920 | elapsed 37.9s


  Batches 40/70 | rows 5120/8920 | elapsed 67.5s


  Batches 60/70 | rows 7680/8920 | elapsed 97.1s


Saved X_img_train_openclip_vit_b32_224.npy (8920, 512), X_img_test_openclip_vit_b32_224.npy (992, 512) | time 124.5s
Additional embeddings extraction done.


In [20]:
# Improved training: L2+PCA for embeddings, LightGBM per set, try raw vs sqrt target per-set, NNLS blend
import os, glob, time, math, json
import numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

def load_feature_set(name):
    if name == 'meta_stats':
        X_tr = np.load('X_meta_stats_train.npy') if os.path.exists('X_meta_stats_train.npy') else None
        X_te = np.load('X_meta_stats_test.npy') if os.path.exists('X_meta_stats_test.npy') else None
        return X_tr, X_te
    elif name.startswith('emb:'):
        tag = name.split(':',1)[1]
        tr_path = f"X_img_train_{tag}.npy"
        te_path = f"X_img_test_{tag}.npy"
        X_tr = np.load(tr_path) if os.path.exists(tr_path) else None
        X_te = np.load(te_path) if os.path.exists(te_path) else None
        return X_tr, X_te
    else:
        return None, None

def discover_feature_sets():
    feats = []
    if os.path.exists('X_meta_stats_train.npy') and os.path.exists('X_meta_stats_test.npy'):
        feats.append('meta_stats')
    for tr_path in sorted(glob.glob('X_img_train_*.npy')):
        tag = tr_path[len('X_img_train_'):-len('.npy')]
        te_path = f"X_img_test_{tag}.npy"
        if os.path.exists(te_path):
            feats.append(f"emb:{tag}")
    return feats

# Import LightGBM if available
lgbm_ok = False
try:
    import lightgbm as lgb
    lgbm_ok = True
except Exception as e:
    print('LightGBM not available:', e)

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
folds_df = pd.read_csv('folds.csv')
id2fold = dict(zip(folds_df['Id'], folds_df['fold']))
train_df['fold'] = train_df['Id'].map(id2fold)
y = train_df['Pawpularity'].values.astype(float)
y_sqrt = np.sqrt(np.clip(y, 0, None))

feature_sets = discover_feature_sets()
print('Feature sets found:', feature_sets)
assert len(feature_sets) > 0, 'No feature sets found.'

def train_feature_set(fs, X_tr, X_te, target_mode='raw'):
    use_pca = fs.startswith('emb:') and X_tr.shape[1] >= 512
    n_comp = 512 if X_tr.shape[1] >= 512 else X_tr.shape[1]
    oof = np.zeros(len(train_df), dtype=float)
    te_pred_accum = np.zeros(len(test_df), dtype=float)
    fold_rmses = []

    for fold in range(5):
        tr_idx = np.where(train_df['fold'].values != fold)[0]
        va_idx = np.where(train_df['fold'].values == fold)[0]
        X_tr_fold = X_tr[tr_idx]
        X_va_fold = X_tr[va_idx]
        X_te_fold = X_te
        y_tr_fold_raw = y[tr_idx]
        y_va_fold_raw = y[va_idx]
        if target_mode == 'sqrt':
            y_tr_fold = y_sqrt[tr_idx]
            y_va_fold_eval = y_va_fold_raw  # evaluate in raw space after inverse-transform
        else:
            y_tr_fold = y_tr_fold_raw
            y_va_fold_eval = y_va_fold_raw

        if use_pca:
            def l2norm(a):
                n = np.linalg.norm(a, axis=1, keepdims=True) + 1e-12
                return a / n
            X_tr_fold = l2norm(X_tr_fold)
            X_va_fold = l2norm(X_va_fold)
            X_te_fold = l2norm(X_te_fold)
            pca = PCA(n_components=min(n_comp, X_tr.shape[1]), whiten=False, random_state=42)
            X_tr_fold = pca.fit_transform(X_tr_fold)
            X_va_fold = pca.transform(X_va_fold)
            X_te_fold = pca.transform(X_te_fold)
            print(f'  Fold {fold}: PCA -> {X_tr_fold.shape[1]} dims')

        if lgbm_ok:
            dtrain = lgb.Dataset(X_tr_fold, label=y_tr_fold)
            if target_mode == 'sqrt':
                dvalid = lgb.Dataset(X_va_fold, label=np.sqrt(np.clip(y_va_fold_raw, 0, None)))
            else:
                dvalid = lgb.Dataset(X_va_fold, label=y_va_fold_raw)
            params = dict(objective='regression', metric='rmse', learning_rate=0.03,
                          num_leaves=64, min_data_in_leaf=40, feature_fraction=0.8,
                          bagging_fraction=0.8, bagging_freq=1, lambda_l2=1.0, verbosity=-1)
            gbm = lgb.train(params, dtrain, num_boost_round=10000, valid_sets=[dvalid],
                            valid_names=['valid'], callbacks=[lgb.early_stopping(300), lgb.log_evaluation(100)])
            va_pred = gbm.predict(X_va_fold, num_iteration=gbm.best_iteration)
            te_pred = gbm.predict(X_te_fold, num_iteration=gbm.best_iteration)
        else:
            scaler = StandardScaler(with_mean=True, with_std=True)
            X_tr_s = scaler.fit_transform(X_tr_fold)
            X_va_s = scaler.transform(X_va_fold)
            X_te_s = scaler.transform(X_te_fold)
            from sklearn.linear_model import RidgeCV
            ridge = RidgeCV(alphas=[0.1, 0.3, 1.0, 3.0, 10.0], cv=5, scoring='neg_root_mean_squared_error')
            ridge.fit(X_tr_s, y_tr_fold)
            va_pred = ridge.predict(X_va_s)
            te_pred = ridge.predict(X_te_s)

        if target_mode == 'sqrt':
            va_pred = np.clip(va_pred, 0, None) ** 2
            te_pred = np.clip(te_pred, 0, None) ** 2
        va_pred = np.clip(va_pred, 1.0, 100.0)
        te_pred = np.clip(te_pred, 1.0, 100.0)

        oof[va_idx] = va_pred
        fold_rmses.append(rmse(y_va_fold_eval, va_pred))
        te_pred_accum += te_pred
        print(f'  {fs} [{target_mode}] fold {fold} RMSE: {fold_rmses[-1]:.4f}')

    te_pred_mean = te_pred_accum / 5.0
    fs_oof_rmse = rmse(y, oof)
    return {'oof': oof, 'test_pred': te_pred_mean, 'oof_rmse': fs_oof_rmse, 'fold_rmses': fold_rmses, 'used_pca': use_pca, 'n_comp': int(n_comp) if use_pca else int(X_tr.shape[1]), 'target': target_mode}

results = {}
oof_matrix = []
test_matrix = []
names_used = []

for fs in feature_sets:
    X_tr, X_te = load_feature_set(fs)
    if X_tr is None or X_te is None:
        print(f'Skipping {fs}: files missing')
        continue
    print(f'Feature set {fs}: train {X_tr.shape}, test {X_te.shape}')

    res_raw = train_feature_set(fs, X_tr, X_te, target_mode='raw')
    res_sqrt = train_feature_set(fs, X_tr, X_te, target_mode='sqrt') if fs.startswith('emb:') else None
    cand = [res_raw] + ([res_sqrt] if res_sqrt is not None else [])
    best = min(cand, key=lambda d: d['oof_rmse'])
    print(f"{fs} best target: {best['target']} | OOF RMSE: {best['oof_rmse']:.5f}")

    # Save best
    np.save(f'oof_{fs}.npy', best['oof'])
    np.save(f'test_pred_{fs}.npy', best['test_pred'])
    results[fs] = {k: best[k] for k in ['oof_rmse','fold_rmses','used_pca','n_comp','target']}

    oof_matrix.append(best['oof'])
    test_matrix.append(best['test_pred'])
    names_used.append(fs)

# Blend via non-negative least squares (LinearRegression positive=True, no intercept)
if len(oof_matrix) == 0:
    raise RuntimeError('No successful models to blend.')
P = np.vstack(oof_matrix).T
reg = LinearRegression(fit_intercept=False, positive=True)
reg.fit(P, y)
w = reg.coef_.copy()
if w.sum() == 0:
    w = np.ones_like(w)
w = w / w.sum()
print('NNLS-like blend weights:', {n: float(wi) for n, wi in zip(names_used, w)})
Tstack = np.vstack(test_matrix)
blend_test = (w.reshape(-1,1) * Tstack).sum(axis=0)
blend_test = np.clip(blend_test, 1.0, 100.0)
sub = pd.DataFrame({'Id': test_df['Id'], 'Pawpularity': blend_test})
sub.to_csv('submission.csv', index=False)
print('Saved submission.csv. Head:')
print(sub.head())
print('Results summary:', json.dumps(results, indent=2))

Feature sets found: ['meta_stats', 'emb:convnext_tiny_in22k_224', 'emb:deit3_small_patch16_224_224', 'emb:openclip_vit_b32_224', 'emb:swin_small_patch4_window7_224_224', 'emb:swin_tiny_patch4_window7_224_224', 'emb:tf_efficientnet_b0_224', 'emb:tf_efficientnetv2_s_in21k_224', 'emb:vit_base_patch16_224_224']
Feature set meta_stats: train (8920, 22), test (992, 22)
Training until validation scores don't improve for 300 rounds
[100]	valid's rmse: 20.5344


[200]	valid's rmse: 20.6577
[300]	valid's rmse: 20.8641


Early stopping, best iteration is:
[40]	valid's rmse: 20.4286
  meta_stats [raw] fold 0 RMSE: 20.4286
Training until validation scores don't improve for 300 rounds
[100]	valid's rmse: 20.9231


[200]	valid's rmse: 21.2493
[300]	valid's rmse: 21.4906
Early stopping, best iteration is:
[2]	valid's rmse: 20.6147
  meta_stats [raw] fold 1 RMSE: 20.6147
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 20.8299
[200]	valid's rmse: 21.0454


[300]	valid's rmse: 21.3105
Early stopping, best iteration is:
[16]	valid's rmse: 20.6994
  meta_stats [raw] fold 2 RMSE: 20.6994
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 20.8168
[200]	valid's rmse: 21.028


[300]	valid's rmse: 21.2813
Early stopping, best iteration is:
[9]	valid's rmse: 20.6354
  meta_stats [raw] fold 3 RMSE: 20.6354
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 20.9255
[200]	valid's rmse: 21.1956


[300]	valid's rmse: 21.3889
Early stopping, best iteration is:
[1]	valid's rmse: 20.6571
  meta_stats [raw] fold 4 RMSE: 20.6571
meta_stats best target: raw | OOF RMSE: 20.60728
Feature set emb:convnext_tiny_in22k_224: train (8920, 768), test (992, 768)


  Fold 0: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.1726


[200]	valid's rmse: 18.0188


[300]	valid's rmse: 18.001


[400]	valid's rmse: 18.0244


[500]	valid's rmse: 18.0223


Early stopping, best iteration is:
[290]	valid's rmse: 17.995
  emb:convnext_tiny_in22k_224 [raw] fold 0 RMSE: 17.9950


  Fold 1: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6761


[200]	valid's rmse: 18.5465


[300]	valid's rmse: 18.5566


[400]	valid's rmse: 18.5669


[500]	valid's rmse: 18.5856
Early stopping, best iteration is:
[200]	valid's rmse: 18.5465
  emb:convnext_tiny_in22k_224 [raw] fold 1 RMSE: 18.5465


  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6438


[200]	valid's rmse: 18.5369


[300]	valid's rmse: 18.5684


[400]	valid's rmse: 18.5389


[500]	valid's rmse: 18.5407


Early stopping, best iteration is:
[230]	valid's rmse: 18.5235
  emb:convnext_tiny_in22k_224 [raw] fold 2 RMSE: 18.5235


  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.2821


[200]	valid's rmse: 18.1077


[300]	valid's rmse: 18.0539


[400]	valid's rmse: 18.0293


[500]	valid's rmse: 18.0282


[600]	valid's rmse: 18.0444


[700]	valid's rmse: 18.046
Early stopping, best iteration is:
[406]	valid's rmse: 18.0252
  emb:convnext_tiny_in22k_224 [raw] fold 3 RMSE: 18.0252


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6686


[200]	valid's rmse: 18.5234


[300]	valid's rmse: 18.5002


[400]	valid's rmse: 18.504


[500]	valid's rmse: 18.5081


Early stopping, best iteration is:
[286]	valid's rmse: 18.4917
  emb:convnext_tiny_in22k_224 [raw] fold 4 RMSE: 18.4917


  Fold 0: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.47438


[200]	valid's rmse: 1.46572


[300]	valid's rmse: 1.46834


[400]	valid's rmse: 1.46941


[500]	valid's rmse: 1.47102


Early stopping, best iteration is:
[244]	valid's rmse: 1.46471
  emb:convnext_tiny_in22k_224 [sqrt] fold 0 RMSE: 18.2395


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.52887


[200]	valid's rmse: 1.52601


[300]	valid's rmse: 1.52808


[400]	valid's rmse: 1.52574


Early stopping, best iteration is:
[152]	valid's rmse: 1.52335
  emb:convnext_tiny_in22k_224 [sqrt] fold 1 RMSE: 18.9617


  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.52207


[200]	valid's rmse: 1.51384


[300]	valid's rmse: 1.51715


[400]	valid's rmse: 1.5154


[500]	valid's rmse: 1.51605


Early stopping, best iteration is:
[223]	valid's rmse: 1.51331
  emb:convnext_tiny_in22k_224 [sqrt] fold 2 RMSE: 18.8214


  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.48744


[200]	valid's rmse: 1.48316


[300]	valid's rmse: 1.48199


[400]	valid's rmse: 1.48525


[500]	valid's rmse: 1.48457


Early stopping, best iteration is:
[251]	valid's rmse: 1.48091
  emb:convnext_tiny_in22k_224 [sqrt] fold 3 RMSE: 18.4233


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.5188


[200]	valid's rmse: 1.51358


[300]	valid's rmse: 1.51283


[400]	valid's rmse: 1.51427


[500]	valid's rmse: 1.51445


[600]	valid's rmse: 1.51466
Early stopping, best iteration is:
[319]	valid's rmse: 1.51205


  emb:convnext_tiny_in22k_224 [sqrt] fold 4 RMSE: 18.7885
emb:convnext_tiny_in22k_224 best target: raw | OOF RMSE: 18.31812
Feature set emb:deit3_small_patch16_224_224: train (8920, 384), test (992, 384)
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.1561


[200]	valid's rmse: 17.9559


[300]	valid's rmse: 17.9152


[400]	valid's rmse: 17.9297


[500]	valid's rmse: 17.9516


[600]	valid's rmse: 17.9632
Early stopping, best iteration is:
[323]	valid's rmse: 17.9042
  emb:deit3_small_patch16_224_224 [raw] fold 0 RMSE: 17.9042


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.7727


[200]	valid's rmse: 18.7385


[300]	valid's rmse: 18.7384


[400]	valid's rmse: 18.7632


[500]	valid's rmse: 18.7898
Early stopping, best iteration is:
[225]	valid's rmse: 18.724


  emb:deit3_small_patch16_224_224 [raw] fold 1 RMSE: 18.7240
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.8604


[200]	valid's rmse: 18.8436


[300]	valid's rmse: 18.8483


[400]	valid's rmse: 18.8754


[500]	valid's rmse: 18.8895


Early stopping, best iteration is:
[247]	valid's rmse: 18.8348
  emb:deit3_small_patch16_224_224 [raw] fold 2 RMSE: 18.8348
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.5087


[200]	valid's rmse: 18.3635


[300]	valid's rmse: 18.3268


[400]	valid's rmse: 18.3272


[500]	valid's rmse: 18.339


[600]	valid's rmse: 18.339


Early stopping, best iteration is:
[355]	valid's rmse: 18.3042
  emb:deit3_small_patch16_224_224 [raw] fold 3 RMSE: 18.3042
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.7675


[200]	valid's rmse: 18.73


[300]	valid's rmse: 18.7798


[400]	valid's rmse: 18.8048
Early stopping, best iteration is:
[123]	valid's rmse: 18.7185
  emb:deit3_small_patch16_224_224 [raw] fold 4 RMSE: 18.7185


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.48507


[200]	valid's rmse: 1.47139


[300]	valid's rmse: 1.47147


[400]	valid's rmse: 1.47137


[500]	valid's rmse: 1.47175


Early stopping, best iteration is:
[246]	valid's rmse: 1.4704
  emb:deit3_small_patch16_224_224 [sqrt] fold 0 RMSE: 18.2957
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51704


[200]	valid's rmse: 1.51097


[300]	valid's rmse: 1.51571


[400]	valid's rmse: 1.51564


Early stopping, best iteration is:
[153]	valid's rmse: 1.51023
  emb:deit3_small_patch16_224_224 [sqrt] fold 1 RMSE: 18.7560
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.5359


[200]	valid's rmse: 1.53372


[300]	valid's rmse: 1.5327


[400]	valid's rmse: 1.53393


[500]	valid's rmse: 1.53451


Early stopping, best iteration is:
[256]	valid's rmse: 1.5313
  emb:deit3_small_patch16_224_224 [sqrt] fold 2 RMSE: 19.0372
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.50639


[200]	valid's rmse: 1.49606


[300]	valid's rmse: 1.49832


[400]	valid's rmse: 1.5008


[500]	valid's rmse: 1.50087
Early stopping, best iteration is:
[200]	valid's rmse: 1.49606
  emb:deit3_small_patch16_224_224 [sqrt] fold 3 RMSE: 18.5550
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.52264


[200]	valid's rmse: 1.52357


[300]	valid's rmse: 1.52394


[400]	valid's rmse: 1.52648


Early stopping, best iteration is:
[127]	valid's rmse: 1.51933
  emb:deit3_small_patch16_224_224 [sqrt] fold 4 RMSE: 18.9123
emb:deit3_small_patch16_224_224 best target: raw | OOF RMSE: 18.50040
Feature set emb:openclip_vit_b32_224: train (8920, 512), test (992, 512)
  Fold 0: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 17.9719


[200]	valid's rmse: 17.7511


[300]	valid's rmse: 17.756


[400]	valid's rmse: 17.7378


[500]	valid's rmse: 17.7634


[600]	valid's rmse: 17.7684


Early stopping, best iteration is:
[357]	valid's rmse: 17.7307
  emb:openclip_vit_b32_224 [raw] fold 0 RMSE: 17.7307
  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.4306


[200]	valid's rmse: 18.304


[300]	valid's rmse: 18.3061


[400]	valid's rmse: 18.3353


[500]	valid's rmse: 18.3497


Early stopping, best iteration is:
[256]	valid's rmse: 18.2982
  emb:openclip_vit_b32_224 [raw] fold 1 RMSE: 18.2982
  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6622


[200]	valid's rmse: 18.5667


[300]	valid's rmse: 18.5498


[400]	valid's rmse: 18.5658


[500]	valid's rmse: 18.5633


Early stopping, best iteration is:
[296]	valid's rmse: 18.5434
  emb:openclip_vit_b32_224 [raw] fold 2 RMSE: 18.5434
  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.4948


[200]	valid's rmse: 18.3388


[300]	valid's rmse: 18.2791


[400]	valid's rmse: 18.2653


[500]	valid's rmse: 18.2935


[600]	valid's rmse: 18.294


Early stopping, best iteration is:
[351]	valid's rmse: 18.259
  emb:openclip_vit_b32_224 [raw] fold 3 RMSE: 18.2590
  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6512


[200]	valid's rmse: 18.477


[300]	valid's rmse: 18.4297


[400]	valid's rmse: 18.4101


[500]	valid's rmse: 18.4145


[600]	valid's rmse: 18.4229


Early stopping, best iteration is:
[387]	valid's rmse: 18.4009
  emb:openclip_vit_b32_224 [raw] fold 4 RMSE: 18.4009
  Fold 0: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.46079


[200]	valid's rmse: 1.44739


[300]	valid's rmse: 1.44192


[400]	valid's rmse: 1.44086


[500]	valid's rmse: 1.44079


[600]	valid's rmse: 1.44125


[700]	valid's rmse: 1.44149
Early stopping, best iteration is:
[416]	valid's rmse: 1.43998
  emb:openclip_vit_b32_224 [sqrt] fold 0 RMSE: 17.9372


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.50615


[200]	valid's rmse: 1.49747


[300]	valid's rmse: 1.49611


[400]	valid's rmse: 1.49832


[500]	valid's rmse: 1.49851


Early stopping, best iteration is:
[271]	valid's rmse: 1.49548
  emb:openclip_vit_b32_224 [sqrt] fold 1 RMSE: 18.5937
  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.52462


[200]	valid's rmse: 1.52135


[300]	valid's rmse: 1.52214


[400]	valid's rmse: 1.52472


Early stopping, best iteration is:
[168]	valid's rmse: 1.51901
  emb:openclip_vit_b32_224 [sqrt] fold 2 RMSE: 18.9227
  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.49853


[200]	valid's rmse: 1.48911


[300]	valid's rmse: 1.4841


[400]	valid's rmse: 1.48321


[500]	valid's rmse: 1.48442


[600]	valid's rmse: 1.4845


[700]	valid's rmse: 1.48518


Early stopping, best iteration is:
[437]	valid's rmse: 1.48279
  emb:openclip_vit_b32_224 [sqrt] fold 3 RMSE: 18.4133
  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51886


[200]	valid's rmse: 1.51332


[300]	valid's rmse: 1.51455


[400]	valid's rmse: 1.51425


[500]	valid's rmse: 1.51489
Early stopping, best iteration is:
[206]	valid's rmse: 1.51282
  emb:openclip_vit_b32_224 [sqrt] fold 4 RMSE: 18.7817
emb:openclip_vit_b32_224 best target: raw | OOF RMSE: 18.24854
Feature set emb:swin_small_patch4_window7_224_224: train (8920, 768), test (992, 768)


  Fold 0: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 17.8066


[200]	valid's rmse: 17.5853


[300]	valid's rmse: 17.5978


[400]	valid's rmse: 17.5732


[500]	valid's rmse: 17.5818


[600]	valid's rmse: 17.5998


[700]	valid's rmse: 17.609
Early stopping, best iteration is:
[406]	valid's rmse: 17.572
  emb:swin_small_patch4_window7_224_224 [raw] fold 0 RMSE: 17.5720


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6107


[200]	valid's rmse: 18.5136


[300]	valid's rmse: 18.5286


[400]	valid's rmse: 18.5161


[500]	valid's rmse: 18.5116


[600]	valid's rmse: 18.5112


[700]	valid's rmse: 18.5166


[800]	valid's rmse: 18.5173


Early stopping, best iteration is:
[535]	valid's rmse: 18.4969
  emb:swin_small_patch4_window7_224_224 [raw] fold 1 RMSE: 18.4969


  Fold 2: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6887


[200]	valid's rmse: 18.6311


[300]	valid's rmse: 18.636


[400]	valid's rmse: 18.63


Early stopping, best iteration is:
[167]	valid's rmse: 18.6176
  emb:swin_small_patch4_window7_224_224 [raw] fold 2 RMSE: 18.6176


  Fold 3: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.2503


[200]	valid's rmse: 18.0857


[300]	valid's rmse: 18.0509


[400]	valid's rmse: 18.0529


[500]	valid's rmse: 18.061


[600]	valid's rmse: 18.0645


Early stopping, best iteration is:
[377]	valid's rmse: 18.0416
  emb:swin_small_patch4_window7_224_224 [raw] fold 3 RMSE: 18.0416


  Fold 4: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.5697


[200]	valid's rmse: 18.3897


[300]	valid's rmse: 18.3461


[400]	valid's rmse: 18.3092


[500]	valid's rmse: 18.3063


[600]	valid's rmse: 18.3057


[700]	valid's rmse: 18.3012


[800]	valid's rmse: 18.3008


[900]	valid's rmse: 18.3006


Early stopping, best iteration is:
[642]	valid's rmse: 18.2968
  emb:swin_small_patch4_window7_224_224 [raw] fold 4 RMSE: 18.2968


  Fold 0: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.44949


[200]	valid's rmse: 1.43836


[300]	valid's rmse: 1.4405


[400]	valid's rmse: 1.44313


[500]	valid's rmse: 1.44467
Early stopping, best iteration is:
[210]	valid's rmse: 1.43801
  emb:swin_small_patch4_window7_224_224 [sqrt] fold 0 RMSE: 17.8697


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51342


[200]	valid's rmse: 1.50632


[300]	valid's rmse: 1.50657


[400]	valid's rmse: 1.50593


[500]	valid's rmse: 1.50628


Early stopping, best iteration is:
[284]	valid's rmse: 1.5052
  emb:swin_small_patch4_window7_224_224 [sqrt] fold 1 RMSE: 18.6491


  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51616


[200]	valid's rmse: 1.51136


[300]	valid's rmse: 1.5127


[400]	valid's rmse: 1.51338


Early stopping, best iteration is:
[172]	valid's rmse: 1.51081
  emb:swin_small_patch4_window7_224_224 [sqrt] fold 2 RMSE: 18.7897


  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.48779


[200]	valid's rmse: 1.47886


[300]	valid's rmse: 1.47918


[400]	valid's rmse: 1.48018


[500]	valid's rmse: 1.47962


Early stopping, best iteration is:
[239]	valid's rmse: 1.47802
  emb:swin_small_patch4_window7_224_224 [sqrt] fold 3 RMSE: 18.3576


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51104


[200]	valid's rmse: 1.50281


[300]	valid's rmse: 1.50135


[400]	valid's rmse: 1.49816


[500]	valid's rmse: 1.49636


[600]	valid's rmse: 1.49542


[700]	valid's rmse: 1.495


[800]	valid's rmse: 1.49482


[900]	valid's rmse: 1.49455


[1000]	valid's rmse: 1.49425


[1100]	valid's rmse: 1.49419


[1200]	valid's rmse: 1.49434


[1300]	valid's rmse: 1.49431


Early stopping, best iteration is:
[1075]	valid's rmse: 1.49416
  emb:swin_small_patch4_window7_224_224 [sqrt] fold 4 RMSE: 18.4783
emb:swin_small_patch4_window7_224_224 best target: raw | OOF RMSE: 18.20880
Feature set emb:swin_tiny_patch4_window7_224_224: train (8920, 768), test (992, 768)


  Fold 0: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.0222


[200]	valid's rmse: 17.8285


[300]	valid's rmse: 17.8307


[400]	valid's rmse: 17.8358


[500]	valid's rmse: 17.8122


[600]	valid's rmse: 17.8225


[700]	valid's rmse: 17.8185


[800]	valid's rmse: 17.8193
Early stopping, best iteration is:
[519]	valid's rmse: 17.8098


  emb:swin_tiny_patch4_window7_224_224 [raw] fold 0 RMSE: 17.8098


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6553


[200]	valid's rmse: 18.5693


[300]	valid's rmse: 18.6005


[400]	valid's rmse: 18.5794


Early stopping, best iteration is:
[181]	valid's rmse: 18.548
  emb:swin_tiny_patch4_window7_224_224 [raw] fold 1 RMSE: 18.5480


  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.715


[200]	valid's rmse: 18.6201


[300]	valid's rmse: 18.5951


[400]	valid's rmse: 18.5925


[500]	valid's rmse: 18.5946


[600]	valid's rmse: 18.598


Early stopping, best iteration is:
[363]	valid's rmse: 18.5831
  emb:swin_tiny_patch4_window7_224_224 [raw] fold 2 RMSE: 18.5831


  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.498


[200]	valid's rmse: 18.3998


[300]	valid's rmse: 18.4006


[400]	valid's rmse: 18.4282


[500]	valid's rmse: 18.4443


Early stopping, best iteration is:
[266]	valid's rmse: 18.3701
  emb:swin_tiny_patch4_window7_224_224 [raw] fold 3 RMSE: 18.3701


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6335


[200]	valid's rmse: 18.4939


[300]	valid's rmse: 18.5123


[400]	valid's rmse: 18.5048


Early stopping, best iteration is:
[180]	valid's rmse: 18.4854
  emb:swin_tiny_patch4_window7_224_224 [raw] fold 4 RMSE: 18.4854


  Fold 0: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.46314


[200]	valid's rmse: 1.45458


[300]	valid's rmse: 1.45489


[400]	valid's rmse: 1.4576


Early stopping, best iteration is:
[194]	valid's rmse: 1.45355
  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 0 RMSE: 18.0481


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.52531


[200]	valid's rmse: 1.52129


[300]	valid's rmse: 1.51991


[400]	valid's rmse: 1.52071


[500]	valid's rmse: 1.52192


Early stopping, best iteration is:
[255]	valid's rmse: 1.51924
  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 1 RMSE: 18.7961


  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.52038


[200]	valid's rmse: 1.51562


[300]	valid's rmse: 1.51473


[400]	valid's rmse: 1.51583


[500]	valid's rmse: 1.51685


Early stopping, best iteration is:
[271]	valid's rmse: 1.51334
  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 2 RMSE: 18.8520


  Fold 3: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.49557


[200]	valid's rmse: 1.48675


[300]	valid's rmse: 1.48798


[400]	valid's rmse: 1.48862


Early stopping, best iteration is:
[189]	valid's rmse: 1.48579
  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 3 RMSE: 18.4564


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.52474


[200]	valid's rmse: 1.52035


[300]	valid's rmse: 1.52022


[400]	valid's rmse: 1.51926


[500]	valid's rmse: 1.51922


[600]	valid's rmse: 1.51979


Early stopping, best iteration is:
[375]	valid's rmse: 1.51846
  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 4 RMSE: 18.8469
emb:swin_tiny_patch4_window7_224_224 best target: raw | OOF RMSE: 18.36148
Feature set emb:tf_efficientnet_b0_224: train (8920, 1280), test (992, 1280)


  Fold 0: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 17.9588


[200]	valid's rmse: 17.8509


[300]	valid's rmse: 17.8387


[400]	valid's rmse: 17.8555


[500]	valid's rmse: 17.8699


Early stopping, best iteration is:
[254]	valid's rmse: 17.821
  emb:tf_efficientnet_b0_224 [raw] fold 0 RMSE: 17.8210


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.4655


[200]	valid's rmse: 18.3753


[300]	valid's rmse: 18.3915


[400]	valid's rmse: 18.4137


[500]	valid's rmse: 18.43


Early stopping, best iteration is:
[255]	valid's rmse: 18.362
  emb:tf_efficientnet_b0_224 [raw] fold 1 RMSE: 18.3620


  Fold 2: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.587


[200]	valid's rmse: 18.5273


[300]	valid's rmse: 18.5044


[400]	valid's rmse: 18.5134


[500]	valid's rmse: 18.5404


[600]	valid's rmse: 18.5415
Early stopping, best iteration is:
[315]	valid's rmse: 18.4944
  emb:tf_efficientnet_b0_224 [raw] fold 2 RMSE: 18.4944


  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.1459


[200]	valid's rmse: 18.0407


[300]	valid's rmse: 18.0446


[400]	valid's rmse: 18.0584


[500]	valid's rmse: 18.0738
Early stopping, best iteration is:
[213]	valid's rmse: 18.0186
  emb:tf_efficientnet_b0_224 [raw] fold 3 RMSE: 18.0186


  Fold 4: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.4745


[200]	valid's rmse: 18.3828


[300]	valid's rmse: 18.405


[400]	valid's rmse: 18.4316


[500]	valid's rmse: 18.4254


Early stopping, best iteration is:
[226]	valid's rmse: 18.3778
  emb:tf_efficientnet_b0_224 [raw] fold 4 RMSE: 18.3778


  Fold 0: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.46265


[200]	valid's rmse: 1.46266


[300]	valid's rmse: 1.4605


[400]	valid's rmse: 1.46308


Early stopping, best iteration is:
[143]	valid's rmse: 1.4602
  emb:tf_efficientnet_b0_224 [sqrt] fold 0 RMSE: 18.2256


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51676


[200]	valid's rmse: 1.51094


[300]	valid's rmse: 1.51233


[400]	valid's rmse: 1.51258


[500]	valid's rmse: 1.51328


Early stopping, best iteration is:
[252]	valid's rmse: 1.51067
  emb:tf_efficientnet_b0_224 [sqrt] fold 1 RMSE: 18.6730


  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.50844


[200]	valid's rmse: 1.50305


[300]	valid's rmse: 1.50324


[400]	valid's rmse: 1.50417


[500]	valid's rmse: 1.50595


Early stopping, best iteration is:
[241]	valid's rmse: 1.50109
  emb:tf_efficientnet_b0_224 [sqrt] fold 2 RMSE: 18.6341


  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.49605


[200]	valid's rmse: 1.48945


[300]	valid's rmse: 1.48789


[400]	valid's rmse: 1.4898


[500]	valid's rmse: 1.49026


Early stopping, best iteration is:
[239]	valid's rmse: 1.486
  emb:tf_efficientnet_b0_224 [sqrt] fold 3 RMSE: 18.4238


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.5076


[200]	valid's rmse: 1.50793


[300]	valid's rmse: 1.51052


[400]	valid's rmse: 1.5099
Early stopping, best iteration is:
[103]	valid's rmse: 1.50635
  emb:tf_efficientnet_b0_224 [sqrt] fold 4 RMSE: 18.7689
emb:tf_efficientnet_b0_224 best target: raw | OOF RMSE: 18.21653
Feature set emb:tf_efficientnetv2_s_in21k_224: train (8920, 1280), test (992, 1280)


  Fold 0: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.0962


[200]	valid's rmse: 17.9683


[300]	valid's rmse: 17.9423


[400]	valid's rmse: 17.97


[500]	valid's rmse: 17.9813


Early stopping, best iteration is:
[241]	valid's rmse: 17.9196
  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 0 RMSE: 17.9196


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.4787


[200]	valid's rmse: 18.3528


[300]	valid's rmse: 18.3746


[400]	valid's rmse: 18.3856


[500]	valid's rmse: 18.3626


Early stopping, best iteration is:
[222]	valid's rmse: 18.3469
  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 1 RMSE: 18.3469


  Fold 2: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6234


[200]	valid's rmse: 18.5408


[300]	valid's rmse: 18.5667


[400]	valid's rmse: 18.5631


[500]	valid's rmse: 18.5589
Early stopping, best iteration is:
[201]	valid's rmse: 18.535
  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 2 RMSE: 18.5350


  Fold 3: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.3598


[200]	valid's rmse: 18.2401


[300]	valid's rmse: 18.1959


[400]	valid's rmse: 18.1954


[500]	valid's rmse: 18.1884


[600]	valid's rmse: 18.1908


Early stopping, best iteration is:
[357]	valid's rmse: 18.1832
  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 3 RMSE: 18.1832


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.5952


[200]	valid's rmse: 18.4076


[300]	valid's rmse: 18.3644


[400]	valid's rmse: 18.3713


[500]	valid's rmse: 18.3633


[600]	valid's rmse: 18.3535


[700]	valid's rmse: 18.3489


[800]	valid's rmse: 18.3539


[900]	valid's rmse: 18.3517


Early stopping, best iteration is:
[652]	valid's rmse: 18.3432
  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 4 RMSE: 18.3432


  Fold 0: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.47459


[200]	valid's rmse: 1.46862


[300]	valid's rmse: 1.46937


[400]	valid's rmse: 1.4701


[500]	valid's rmse: 1.47122


Early stopping, best iteration is:
[220]	valid's rmse: 1.46798
  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 0 RMSE: 18.3495


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51256


[200]	valid's rmse: 1.50657


[300]	valid's rmse: 1.50627


[400]	valid's rmse: 1.50836


[500]	valid's rmse: 1.50886


Early stopping, best iteration is:
[288]	valid's rmse: 1.5049
  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 1 RMSE: 18.6857


  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51437


[200]	valid's rmse: 1.50609


[300]	valid's rmse: 1.50598


[400]	valid's rmse: 1.50509


[500]	valid's rmse: 1.50476


[600]	valid's rmse: 1.50551


[700]	valid's rmse: 1.50635


Early stopping, best iteration is:
[466]	valid's rmse: 1.50394
  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 2 RMSE: 18.6662


  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.49631


[200]	valid's rmse: 1.48736


[300]	valid's rmse: 1.48454


[400]	valid's rmse: 1.48519


[500]	valid's rmse: 1.48619


[600]	valid's rmse: 1.48776


Early stopping, best iteration is:
[349]	valid's rmse: 1.48384
  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 3 RMSE: 18.4402


  Fold 4: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.51934


[200]	valid's rmse: 1.51329


[300]	valid's rmse: 1.50752


[400]	valid's rmse: 1.50718


[500]	valid's rmse: 1.50656


[600]	valid's rmse: 1.50565


[700]	valid's rmse: 1.5053


[800]	valid's rmse: 1.50522


[900]	valid's rmse: 1.50525


Early stopping, best iteration is:
[678]	valid's rmse: 1.50485
  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 4 RMSE: 18.6282
emb:tf_efficientnetv2_s_in21k_224 best target: raw | OOF RMSE: 18.26675
Feature set emb:vit_base_patch16_224_224: train (8920, 768), test (992, 768)


  Fold 0: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.119


[200]	valid's rmse: 17.9733


[300]	valid's rmse: 17.9328


[400]	valid's rmse: 17.927


[500]	valid's rmse: 17.9355


Early stopping, best iteration is:
[271]	valid's rmse: 17.9207
  emb:vit_base_patch16_224_224 [raw] fold 0 RMSE: 17.9207


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.4208


[200]	valid's rmse: 18.3061


[300]	valid's rmse: 18.2841


[400]	valid's rmse: 18.2622


[500]	valid's rmse: 18.2531


[600]	valid's rmse: 18.2561


[700]	valid's rmse: 18.2551


[800]	valid's rmse: 18.2561


Early stopping, best iteration is:
[564]	valid's rmse: 18.2472
  emb:vit_base_patch16_224_224 [raw] fold 1 RMSE: 18.2472


  Fold 2: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.8406


[200]	valid's rmse: 18.7079


[300]	valid's rmse: 18.6849


[400]	valid's rmse: 18.6634


[500]	valid's rmse: 18.6699


[600]	valid's rmse: 18.6686


[700]	valid's rmse: 18.673


[800]	valid's rmse: 18.6724


Early stopping, best iteration is:
[549]	valid's rmse: 18.6575
  emb:vit_base_patch16_224_224 [raw] fold 2 RMSE: 18.6575


  Fold 3: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.2232


[200]	valid's rmse: 18.0492


[300]	valid's rmse: 18.009


[400]	valid's rmse: 17.9849


[500]	valid's rmse: 17.9819


[600]	valid's rmse: 17.9908


Early stopping, best iteration is:
[368]	valid's rmse: 17.978
  emb:vit_base_patch16_224_224 [raw] fold 3 RMSE: 17.9780


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 18.6034


[200]	valid's rmse: 18.4966


[300]	valid's rmse: 18.4568


[400]	valid's rmse: 18.4191


[500]	valid's rmse: 18.4306


[600]	valid's rmse: 18.4178


[700]	valid's rmse: 18.4183


Early stopping, best iteration is:
[449]	valid's rmse: 18.4158
  emb:vit_base_patch16_224_224 [raw] fold 4 RMSE: 18.4158


  Fold 0: PCA -> 512 dims
Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.46954


[200]	valid's rmse: 1.45437


[300]	valid's rmse: 1.45362


[400]	valid's rmse: 1.45446


[500]	valid's rmse: 1.45379


Early stopping, best iteration is:
[256]	valid's rmse: 1.4517
  emb:vit_base_patch16_224_224 [sqrt] fold 0 RMSE: 18.1066


  Fold 1: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.50446


[200]	valid's rmse: 1.49843


[300]	valid's rmse: 1.49585


[400]	valid's rmse: 1.49703


[500]	valid's rmse: 1.4978


[600]	valid's rmse: 1.49877


Early stopping, best iteration is:
[336]	valid's rmse: 1.49517
  emb:vit_base_patch16_224_224 [sqrt] fold 1 RMSE: 18.5152


  Fold 2: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.52728


[200]	valid's rmse: 1.52182


[300]	valid's rmse: 1.51693


[400]	valid's rmse: 1.51718


[500]	valid's rmse: 1.51805


[600]	valid's rmse: 1.51852
Early stopping, best iteration is:
[315]	valid's rmse: 1.5159
  emb:vit_base_patch16_224_224 [sqrt] fold 2 RMSE: 18.7998


  Fold 3: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.49329


[200]	valid's rmse: 1.48535


[300]	valid's rmse: 1.4812


[400]	valid's rmse: 1.48047


[500]	valid's rmse: 1.47966


[600]	valid's rmse: 1.47916


[700]	valid's rmse: 1.4793


[800]	valid's rmse: 1.47922


[900]	valid's rmse: 1.47924


[1000]	valid's rmse: 1.47915


Early stopping, best iteration is:
[736]	valid's rmse: 1.47903
  emb:vit_base_patch16_224_224 [sqrt] fold 3 RMSE: 18.3887


  Fold 4: PCA -> 512 dims


Training until validation scores don't improve for 300 rounds


[100]	valid's rmse: 1.50918


[200]	valid's rmse: 1.50462


[300]	valid's rmse: 1.50589


[400]	valid's rmse: 1.50716


[500]	valid's rmse: 1.50666
Early stopping, best iteration is:
[212]	valid's rmse: 1.50284
  emb:vit_base_patch16_224_224 [sqrt] fold 4 RMSE: 18.6707
emb:vit_base_patch16_224_224 best target: raw | OOF RMSE: 18.24591
NNLS-like blend weights: {'meta_stats': 0.0, 'emb:convnext_tiny_in22k_224': 0.032384446586774765, 'emb:deit3_small_patch16_224_224': 0.05806099820648654, 'emb:openclip_vit_b32_224': 0.24248823516498855, 'emb:swin_small_patch4_window7_224_224': 0.16702355124203574, 'emb:swin_tiny_patch4_window7_224_224': 0.028105946831819956, 'emb:tf_efficientnet_b0_224': 0.19094628513304182, 'emb:tf_efficientnetv2_s_in21k_224': 0.10936125649893741, 'emb:vit_base_patch16_224_224': 0.17162928033591524}
Saved submission.csv. Head:
                                 Id  Pawpularity
0  ee51b99832f1ba868f646df93d2b6b81    51.831425
1  caddfb3f8bff9c4b95dbe022018eea21    35.857884
2  582eeabd4a448a53ebb79995888a4b0b    35.430708
3  afc1ad7f0c5eea880759d09e77f7deee    33.104743
4  d5bdf3446e86ce4ec6

In [18]:
# Compute blended OOF RMSE for current saved OOFs
import glob, json
import numpy as np, pandas as pd
from sklearn.linear_model import LinearRegression

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

train_df = pd.read_csv('train.csv')
y = train_df['Pawpularity'].values.astype(float)

# discover OOF files
oof_files = sorted(glob.glob('oof_*.npy'))
names = [f[4:-4] for f in oof_files]  # strip 'oof_' and '.npy'
if len(oof_files) == 0:
    raise SystemExit('No oof_*.npy found.')
print('OOF files:', names)

O = np.vstack([np.load(f) for f in oof_files]).T  # (n_samples, n_models)
single_scores = {n: rmse(y, O[:,i]) for i, n in enumerate(names)}
print('Single OOF RMSEs:', json.dumps(single_scores, indent=2))

# NNLS-like blend (non-negative, no intercept)
reg = LinearRegression(fit_intercept=False, positive=True)
reg.fit(O, y)
w = reg.coef_.copy()
w = w / (w.sum() if w.sum() > 0 else 1.0)
blend_oof = O @ w
blend_rmse = rmse(y, blend_oof)
print('NNLS weights:', {n: float(wi) for n, wi in zip(names, w)})
print(f'NNLS blend OOF RMSE: {blend_rmse:.5f}')

# Inverse-variance weighting
invw = np.array([1.0 / max(single_scores[n]**2, 1e-6) for n in names], dtype=float)
invw = invw / invw.sum()
blend_iv = O @ invw
blend_iv_rmse = rmse(y, blend_iv)
print('Inv-var weights:', {n: float(wi) for n, wi in zip(names, invw)})
print(f'Inv-var blend OOF RMSE: {blend_iv_rmse:.5f}')

OOF files: ['emb:convnext_tiny_in22k_224', 'emb:deit3_small_patch16_224_224', 'emb:openclip_vit_b32_224', 'emb:swin_small_patch4_window7_224_224', 'emb:swin_tiny_patch4_window7_224_224', 'emb:tf_efficientnet_b0_224', 'emb:tf_efficientnetv2_s_in21k_224', 'emb:vit_base_patch16_224_224', 'meta_stats']
Single OOF RMSEs: {
  "emb:convnext_tiny_in22k_224": 18.343681791120346,
  "emb:deit3_small_patch16_224_224": 18.519840874368242,
  "emb:openclip_vit_b32_224": 18.275847412336976,
  "emb:swin_small_patch4_window7_224_224": 18.173627077168888,
  "emb:swin_tiny_patch4_window7_224_224": 18.33451618004236,
  "emb:tf_efficientnet_b0_224": 18.212714967463256,
  "emb:tf_efficientnetv2_s_in21k_224": 18.284960028331636,
  "emb:vit_base_patch16_224_224": 18.316501053056516,
  "meta_stats": 20.603775323132393
}
NNLS weights: {'emb:convnext_tiny_in22k_224': 0.011721418495030226, 'emb:deit3_small_patch16_224_224': 0.05101623467959047, 'emb:openclip_vit_b32_224': 0.2438208130537412, 'emb:swin_small_patch4

In [29]:
# Add HGBR base learner per feature set with per-fold L2+PCA; save OOF/test for stacking
import os, glob, time, math, json
import numpy as np, pandas as pd
from sklearn.decomposition import PCA
from sklearn.ensemble import HistGradientBoostingRegressor

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

def load_feature_set(name):
    if name == 'meta_stats':
        X_tr = np.load('X_meta_stats_train.npy') if os.path.exists('X_meta_stats_train.npy') else None
        X_te = np.load('X_meta_stats_test.npy') if os.path.exists('X_meta_stats_test.npy') else None
        return X_tr, X_te
    elif name.startswith('emb:'):
        tag = name.split(':',1)[1]
        tr_path = f"X_img_train_{tag}.npy"
        te_path = f"X_img_test_{tag}.npy"
        X_tr = np.load(tr_path) if os.path.exists(tr_path) else None
        X_te = np.load(te_path) if os.path.exists(te_path) else None
        return X_tr, X_te
    else:
        return None, None

def discover_feature_sets():
    feats = []
    if os.path.exists('X_meta_stats_train.npy') and os.path.exists('X_meta_stats_test.npy'):
        feats.append('meta_stats')
    for tr_path in sorted(glob.glob('X_img_train_*.npy')):
        tag = tr_path[len('X_img_train_'):-len('.npy')]
        te_path = f"X_img_test_{tag}.npy"
        if os.path.exists(te_path):
            feats.append(f"emb:{tag}")
    return feats

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
folds_df = pd.read_csv('folds.csv')
id2fold = dict(zip(folds_df['Id'], folds_df['fold']))
train_df['fold'] = train_df['Id'].map(id2fold)
y = train_df['Pawpularity'].values.astype(float)
y_sqrt = np.sqrt(np.clip(y, 0, None))

feature_sets = discover_feature_sets()
print('Feature sets:', feature_sets)
assert len(feature_sets) > 0, 'No feature sets found.'

def l2norm(a):
    n = np.linalg.norm(a, axis=1, keepdims=True) + 1e-12
    return a / n

def train_hgbr_for_fs(fs, X_tr, X_te, target_mode='raw'):
    use_pca = fs.startswith('emb:') and X_tr.shape[1] >= 512
    n_comp = 512 if X_tr.shape[1] >= 512 else X_tr.shape[1]
    oof = np.zeros(len(train_df), dtype=float)
    te_pred_accum = np.zeros(len(test_df), dtype=float)
    fold_rmses = []
    t0 = time.time()

    for fold in range(5):
        tr_idx = np.where(train_df['fold'].values != fold)[0]
        va_idx = np.where(train_df['fold'].values == fold)[0]
        X_tr_fold = X_tr[tr_idx].copy()
        X_va_fold = X_tr[va_idx].copy()
        X_te_fold = X_te.copy()

        y_tr_fold_raw = y[tr_idx]
        y_va_fold_raw = y[va_idx]

        if target_mode == 'sqrt':
            y_tr_fold = y_sqrt[tr_idx]
            eval_y = y_va_fold_raw
        else:
            y_tr_fold = y_tr_fold_raw
            eval_y = y_va_fold_raw

        if use_pca:
            X_tr_fold = l2norm(X_tr_fold)
            X_va_fold = l2norm(X_va_fold)
            X_te_fold = l2norm(X_te_fold)
            pca = PCA(n_components=min(n_comp, X_tr.shape[1]), whiten=False, random_state=42)
            X_tr_fold = pca.fit_transform(X_tr_fold)
            X_va_fold = pca.transform(X_va_fold)
            X_te_fold = pca.transform(X_te_fold)
            print(f'  {fs} [{target_mode}] fold {fold}: PCA -> {X_tr_fold.shape[1]} dims', flush=True)

        model = HistGradientBoostingRegressor(
            learning_rate=0.04,
            max_iter=2000,
            max_leaf_nodes=31,
            min_samples_leaf=30,
            l2_regularization=1.0,
            validation_fraction=0.12,
            early_stopping=True,
            random_state=42
        )
        model.fit(X_tr_fold, y_tr_fold)
        va_pred = model.predict(X_va_fold)
        te_pred = model.predict(X_te_fold)

        if target_mode == 'sqrt':
            va_pred = np.clip(va_pred, 0, None) ** 2
            te_pred = np.clip(te_pred, 0, None) ** 2

        va_pred = np.clip(va_pred, 1.0, 100.0)
        te_pred = np.clip(te_pred, 1.0, 100.0)

        oof[va_idx] = va_pred
        f_rmse = rmse(eval_y, va_pred)
        fold_rmses.append(f_rmse)
        te_pred_accum += te_pred
        print(f'    {fs} [HGBR|{target_mode}] fold {fold} RMSE: {f_rmse:.4f} | elapsed {time.time()-t0:.1f}s', flush=True)

    te_mean = te_pred_accum / 5.0
    fs_oof_rmse = rmse(y, oof)
    return {'oof': oof, 'test_pred': te_mean, 'oof_rmse': fs_oof_rmse, 'fold_rmses': fold_rmses, 'used_pca': use_pca, 'n_comp': int(n_comp) if use_pca else int(X_tr.shape[1]), 'target': target_mode}

results_hgbr = {}
for fs in feature_sets:
    X_tr, X_te = load_feature_set(fs)
    if X_tr is None or X_te is None:
        print(f'[Skip] {fs}: features missing')
        continue
    print(f'=== HGBR training for {fs}: train {X_tr.shape}, test {X_te.shape} ===', flush=True)
    res_raw = train_hgbr_for_fs(fs, X_tr, X_te, target_mode='raw')
    res_sqrt = train_hgbr_for_fs(fs, X_tr, X_te, target_mode='sqrt') if fs.startswith('emb:') else None
    best = res_raw if (res_sqrt is None or res_raw['oof_rmse'] <= res_sqrt['oof_rmse']) else res_sqrt
    results_hgbr[fs] = {k: best[k] for k in ['oof_rmse','fold_rmses','used_pca','n_comp','target']}
    np.save(f'oof_hgb_{fs}.npy', best['oof'])
    np.save(f'test_pred_hgb_{fs}.npy', best['test_pred'])
    print(f'>>> {fs} [HGBR] best target: {best["target"]} | OOF RMSE: {best["oof_rmse"]:.5f}', flush=True)

print('HGBR results:', json.dumps(results_hgbr, indent=2))

Feature sets: ['meta_stats', 'emb:convnext_tiny_in22k_224', 'emb:deit3_small_patch16_224_224', 'emb:openclip_vit_b16_224', 'emb:openclip_vit_b32_224', 'emb:swin_small_patch4_window7_224_224', 'emb:swin_tiny_patch4_window7_224_224', 'emb:tf_efficientnet_b0_224', 'emb:tf_efficientnetv2_s_in21k_224', 'emb:vit_base_patch16_224_224']
=== HGBR training for meta_stats: train (8920, 39), test (992, 39) ===


    meta_stats [HGBR|raw] fold 0 RMSE: 20.4575 | elapsed 0.1s


    meta_stats [HGBR|raw] fold 1 RMSE: 20.6736 | elapsed 0.2s


    meta_stats [HGBR|raw] fold 2 RMSE: 20.7505 | elapsed 0.3s


    meta_stats [HGBR|raw] fold 3 RMSE: 20.6603 | elapsed 0.4s


    meta_stats [HGBR|raw] fold 4 RMSE: 20.6912 | elapsed 0.5s


>>> meta_stats [HGBR] best target: raw | OOF RMSE: 20.64686


=== HGBR training for emb:convnext_tiny_in22k_224: train (8920, 768), test (992, 768) ===


  emb:convnext_tiny_in22k_224 [raw] fold 0: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|raw] fold 0 RMSE: 18.0900 | elapsed 6.9s


  emb:convnext_tiny_in22k_224 [raw] fold 1: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|raw] fold 1 RMSE: 18.6785 | elapsed 15.1s


  emb:convnext_tiny_in22k_224 [raw] fold 2: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|raw] fold 2 RMSE: 18.6102 | elapsed 19.6s


  emb:convnext_tiny_in22k_224 [raw] fold 3: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|raw] fold 3 RMSE: 18.2815 | elapsed 23.9s


  emb:convnext_tiny_in22k_224 [raw] fold 4: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|raw] fold 4 RMSE: 18.6253 | elapsed 27.8s


  emb:convnext_tiny_in22k_224 [sqrt] fold 0: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|sqrt] fold 0 RMSE: 18.3446 | elapsed 5.0s


  emb:convnext_tiny_in22k_224 [sqrt] fold 1: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|sqrt] fold 1 RMSE: 19.0753 | elapsed 12.4s


  emb:convnext_tiny_in22k_224 [sqrt] fold 2: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|sqrt] fold 2 RMSE: 18.8151 | elapsed 19.2s


  emb:convnext_tiny_in22k_224 [sqrt] fold 3: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|sqrt] fold 3 RMSE: 18.7492 | elapsed 23.1s


  emb:convnext_tiny_in22k_224 [sqrt] fold 4: PCA -> 512 dims


    emb:convnext_tiny_in22k_224 [HGBR|sqrt] fold 4 RMSE: 18.9403 | elapsed 27.0s


>>> emb:convnext_tiny_in22k_224 [HGBR] best target: raw | OOF RMSE: 18.45852


=== HGBR training for emb:deit3_small_patch16_224_224: train (8920, 384), test (992, 384) ===


    emb:deit3_small_patch16_224_224 [HGBR|raw] fold 0 RMSE: 18.2919 | elapsed 0.9s


    emb:deit3_small_patch16_224_224 [HGBR|raw] fold 1 RMSE: 18.8236 | elapsed 2.2s


    emb:deit3_small_patch16_224_224 [HGBR|raw] fold 2 RMSE: 19.0473 | elapsed 3.2s


    emb:deit3_small_patch16_224_224 [HGBR|raw] fold 3 RMSE: 18.5428 | elapsed 4.3s


    emb:deit3_small_patch16_224_224 [HGBR|raw] fold 4 RMSE: 18.7662 | elapsed 5.7s


    emb:deit3_small_patch16_224_224 [HGBR|sqrt] fold 0 RMSE: 18.4276 | elapsed 1.0s


    emb:deit3_small_patch16_224_224 [HGBR|sqrt] fold 1 RMSE: 18.9991 | elapsed 1.9s


    emb:deit3_small_patch16_224_224 [HGBR|sqrt] fold 2 RMSE: 19.1043 | elapsed 3.1s


    emb:deit3_small_patch16_224_224 [HGBR|sqrt] fold 3 RMSE: 18.9386 | elapsed 4.0s


    emb:deit3_small_patch16_224_224 [HGBR|sqrt] fold 4 RMSE: 18.9599 | elapsed 5.2s


>>> emb:deit3_small_patch16_224_224 [HGBR] best target: raw | OOF RMSE: 18.69613


=== HGBR training for emb:openclip_vit_b16_224: train (8920, 512), test (992, 512) ===


  emb:openclip_vit_b16_224 [raw] fold 0: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|raw] fold 0 RMSE: 17.7838 | elapsed 1.6s


  emb:openclip_vit_b16_224 [raw] fold 1: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|raw] fold 1 RMSE: 18.3470 | elapsed 3.3s


  emb:openclip_vit_b16_224 [raw] fold 2: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|raw] fold 2 RMSE: 18.5573 | elapsed 5.0s


  emb:openclip_vit_b16_224 [raw] fold 3: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|raw] fold 3 RMSE: 18.1927 | elapsed 6.5s


  emb:openclip_vit_b16_224 [raw] fold 4: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|raw] fold 4 RMSE: 18.7329 | elapsed 8.1s


  emb:openclip_vit_b16_224 [sqrt] fold 0: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|sqrt] fold 0 RMSE: 18.0295 | elapsed 1.7s


  emb:openclip_vit_b16_224 [sqrt] fold 1: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|sqrt] fold 1 RMSE: 18.6063 | elapsed 3.2s


  emb:openclip_vit_b16_224 [sqrt] fold 2: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|sqrt] fold 2 RMSE: 18.8867 | elapsed 4.7s


  emb:openclip_vit_b16_224 [sqrt] fold 3: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|sqrt] fold 3 RMSE: 18.3887 | elapsed 6.5s


  emb:openclip_vit_b16_224 [sqrt] fold 4: PCA -> 512 dims


    emb:openclip_vit_b16_224 [HGBR|sqrt] fold 4 RMSE: 18.7899 | elapsed 7.9s


>>> emb:openclip_vit_b16_224 [HGBR] best target: raw | OOF RMSE: 18.32564


=== HGBR training for emb:openclip_vit_b32_224: train (8920, 512), test (992, 512) ===


  emb:openclip_vit_b32_224 [raw] fold 0: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|raw] fold 0 RMSE: 17.8975 | elapsed 1.5s


  emb:openclip_vit_b32_224 [raw] fold 1: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|raw] fold 1 RMSE: 18.4212 | elapsed 3.0s


  emb:openclip_vit_b32_224 [raw] fold 2: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|raw] fold 2 RMSE: 18.8548 | elapsed 4.4s


  emb:openclip_vit_b32_224 [raw] fold 3: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|raw] fold 3 RMSE: 18.5235 | elapsed 5.8s


  emb:openclip_vit_b32_224 [raw] fold 4: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|raw] fold 4 RMSE: 18.7496 | elapsed 7.0s


  emb:openclip_vit_b32_224 [sqrt] fold 0: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|sqrt] fold 0 RMSE: 18.1973 | elapsed 1.7s


  emb:openclip_vit_b32_224 [sqrt] fold 1: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|sqrt] fold 1 RMSE: 18.7197 | elapsed 3.2s


  emb:openclip_vit_b32_224 [sqrt] fold 2: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|sqrt] fold 2 RMSE: 19.1572 | elapsed 4.6s


  emb:openclip_vit_b32_224 [sqrt] fold 3: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|sqrt] fold 3 RMSE: 18.7546 | elapsed 5.8s


  emb:openclip_vit_b32_224 [sqrt] fold 4: PCA -> 512 dims


    emb:openclip_vit_b32_224 [HGBR|sqrt] fold 4 RMSE: 18.9996 | elapsed 7.3s


>>> emb:openclip_vit_b32_224 [HGBR] best target: raw | OOF RMSE: 18.49235


=== HGBR training for emb:swin_small_patch4_window7_224_224: train (8920, 768), test (992, 768) ===


  emb:swin_small_patch4_window7_224_224 [raw] fold 0: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|raw] fold 0 RMSE: 17.6175 | elapsed 3.5s


  emb:swin_small_patch4_window7_224_224 [raw] fold 1: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|raw] fold 1 RMSE: 18.6928 | elapsed 10.1s


  emb:swin_small_patch4_window7_224_224 [raw] fold 2: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|raw] fold 2 RMSE: 18.5159 | elapsed 15.2s


  emb:swin_small_patch4_window7_224_224 [raw] fold 3: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|raw] fold 3 RMSE: 18.1963 | elapsed 19.6s


  emb:swin_small_patch4_window7_224_224 [raw] fold 4: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|raw] fold 4 RMSE: 18.4639 | elapsed 24.7s


  emb:swin_small_patch4_window7_224_224 [sqrt] fold 0: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|sqrt] fold 0 RMSE: 18.0737 | elapsed 4.6s


  emb:swin_small_patch4_window7_224_224 [sqrt] fold 1: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|sqrt] fold 1 RMSE: 18.8576 | elapsed 9.4s


  emb:swin_small_patch4_window7_224_224 [sqrt] fold 2: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|sqrt] fold 2 RMSE: 19.0208 | elapsed 14.4s


  emb:swin_small_patch4_window7_224_224 [sqrt] fold 3: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|sqrt] fold 3 RMSE: 18.6574 | elapsed 17.9s


  emb:swin_small_patch4_window7_224_224 [sqrt] fold 4: PCA -> 512 dims


    emb:swin_small_patch4_window7_224_224 [HGBR|sqrt] fold 4 RMSE: 18.8841 | elapsed 24.5s


>>> emb:swin_small_patch4_window7_224_224 [HGBR] best target: raw | OOF RMSE: 18.30112


=== HGBR training for emb:swin_tiny_patch4_window7_224_224: train (8920, 768), test (992, 768) ===


  emb:swin_tiny_patch4_window7_224_224 [raw] fold 0: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|raw] fold 0 RMSE: 17.8739 | elapsed 7.5s


  emb:swin_tiny_patch4_window7_224_224 [raw] fold 1: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|raw] fold 1 RMSE: 18.6021 | elapsed 15.5s


  emb:swin_tiny_patch4_window7_224_224 [raw] fold 2: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|raw] fold 2 RMSE: 18.6202 | elapsed 18.7s


  emb:swin_tiny_patch4_window7_224_224 [raw] fold 3: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|raw] fold 3 RMSE: 18.4655 | elapsed 23.6s


  emb:swin_tiny_patch4_window7_224_224 [raw] fold 4: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|raw] fold 4 RMSE: 18.6933 | elapsed 31.1s


  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 0: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|sqrt] fold 0 RMSE: 18.2598 | elapsed 4.3s


  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 1: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|sqrt] fold 1 RMSE: 18.9485 | elapsed 8.4s


  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 2: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|sqrt] fold 2 RMSE: 18.8192 | elapsed 12.2s


  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 3: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|sqrt] fold 3 RMSE: 18.8308 | elapsed 16.0s


  emb:swin_tiny_patch4_window7_224_224 [sqrt] fold 4: PCA -> 512 dims


    emb:swin_tiny_patch4_window7_224_224 [HGBR|sqrt] fold 4 RMSE: 18.8900 | elapsed 20.7s


>>> emb:swin_tiny_patch4_window7_224_224 [HGBR] best target: raw | OOF RMSE: 18.45341


=== HGBR training for emb:tf_efficientnet_b0_224: train (8920, 1280), test (992, 1280) ===


  emb:tf_efficientnet_b0_224 [raw] fold 0: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|raw] fold 0 RMSE: 18.0223 | elapsed 7.4s


  emb:tf_efficientnet_b0_224 [raw] fold 1: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|raw] fold 1 RMSE: 18.4258 | elapsed 12.3s


  emb:tf_efficientnet_b0_224 [raw] fold 2: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|raw] fold 2 RMSE: 18.6001 | elapsed 17.9s


  emb:tf_efficientnet_b0_224 [raw] fold 3: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|raw] fold 3 RMSE: 18.1866 | elapsed 24.4s


  emb:tf_efficientnet_b0_224 [raw] fold 4: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|raw] fold 4 RMSE: 18.6178 | elapsed 30.1s


  emb:tf_efficientnet_b0_224 [sqrt] fold 0: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|sqrt] fold 0 RMSE: 18.2260 | elapsed 5.9s


  emb:tf_efficientnet_b0_224 [sqrt] fold 1: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|sqrt] fold 1 RMSE: 18.8042 | elapsed 10.5s


  emb:tf_efficientnet_b0_224 [sqrt] fold 2: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|sqrt] fold 2 RMSE: 18.9228 | elapsed 15.1s


  emb:tf_efficientnet_b0_224 [sqrt] fold 3: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|sqrt] fold 3 RMSE: 18.4445 | elapsed 21.0s


  emb:tf_efficientnet_b0_224 [sqrt] fold 4: PCA -> 512 dims


    emb:tf_efficientnet_b0_224 [HGBR|sqrt] fold 4 RMSE: 18.7099 | elapsed 26.6s


>>> emb:tf_efficientnet_b0_224 [HGBR] best target: raw | OOF RMSE: 18.37200


=== HGBR training for emb:tf_efficientnetv2_s_in21k_224: train (8920, 1280), test (992, 1280) ===


  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 0: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|raw] fold 0 RMSE: 18.0869 | elapsed 4.3s


  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 1: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|raw] fold 1 RMSE: 18.5339 | elapsed 8.6s


  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 2: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|raw] fold 2 RMSE: 18.7813 | elapsed 13.7s


  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 3: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|raw] fold 3 RMSE: 18.3667 | elapsed 19.8s


  emb:tf_efficientnetv2_s_in21k_224 [raw] fold 4: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|raw] fold 4 RMSE: 18.6896 | elapsed 25.6s


  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 0: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|sqrt] fold 0 RMSE: 18.4709 | elapsed 5.2s


  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 1: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|sqrt] fold 1 RMSE: 18.9131 | elapsed 9.4s


  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 2: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|sqrt] fold 2 RMSE: 19.0946 | elapsed 17.4s


  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 3: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|sqrt] fold 3 RMSE: 18.6486 | elapsed 22.2s


  emb:tf_efficientnetv2_s_in21k_224 [sqrt] fold 4: PCA -> 512 dims


    emb:tf_efficientnetv2_s_in21k_224 [HGBR|sqrt] fold 4 RMSE: 18.9927 | elapsed 28.1s


>>> emb:tf_efficientnetv2_s_in21k_224 [HGBR] best target: raw | OOF RMSE: 18.49333


=== HGBR training for emb:vit_base_patch16_224_224: train (8920, 768), test (992, 768) ===


  emb:vit_base_patch16_224_224 [raw] fold 0: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|raw] fold 0 RMSE: 17.9077 | elapsed 5.3s


  emb:vit_base_patch16_224_224 [raw] fold 1: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|raw] fold 1 RMSE: 18.6358 | elapsed 8.7s


  emb:vit_base_patch16_224_224 [raw] fold 2: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|raw] fold 2 RMSE: 18.8713 | elapsed 15.6s


  emb:vit_base_patch16_224_224 [raw] fold 3: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|raw] fold 3 RMSE: 18.3547 | elapsed 22.8s


  emb:vit_base_patch16_224_224 [raw] fold 4: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|raw] fold 4 RMSE: 18.5086 | elapsed 29.1s


  emb:vit_base_patch16_224_224 [sqrt] fold 0: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|sqrt] fold 0 RMSE: 18.3510 | elapsed 6.4s


  emb:vit_base_patch16_224_224 [sqrt] fold 1: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|sqrt] fold 1 RMSE: 18.7841 | elapsed 10.8s


  emb:vit_base_patch16_224_224 [sqrt] fold 2: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|sqrt] fold 2 RMSE: 19.1652 | elapsed 15.0s


  emb:vit_base_patch16_224_224 [sqrt] fold 3: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|sqrt] fold 3 RMSE: 18.6520 | elapsed 19.7s


  emb:vit_base_patch16_224_224 [sqrt] fold 4: PCA -> 512 dims


    emb:vit_base_patch16_224_224 [HGBR|sqrt] fold 4 RMSE: 18.9193 | elapsed 25.1s


>>> emb:vit_base_patch16_224_224 [HGBR] best target: raw | OOF RMSE: 18.45844


HGBR results: {
  "meta_stats": {
    "oof_rmse": 20.646863559898133,
    "fold_rmses": [
      20.45747953581059,
      20.67361744259402,
      20.750477198553718,
      20.6603480073514,
      20.691197728305713
    ],
    "used_pca": false,
    "n_comp": 39,
    "target": "raw"
  },
  "emb:convnext_tiny_in22k_224": {
    "oof_rmse": 18.45852255637422,
    "fold_rmses": [
      18.089962761292096,
      18.678476519274646,
      18.610188004644606,
      18.281480274905174,
      18.625289754118874
    ],
    "used_pca": true,
    "n_comp": 512,
    "target": "raw"
  },
  "emb:deit3_small_patch16_224_224": {
    "oof_rmse": 18.696130405766517,
    "fold_rmses": [
      18.29185240600516,
      18.823628435869978,
      19.047284746421806,
      18.542782911689756,
      18.76623975363846
    ],
    "used_pca": false,
    "n_comp": 384,
    "target": "raw"
  },
  "emb:openclip_vit_b16_224": {
    "oof_rmse": 18.325641171999695,
    "fold_rmses": [
      17.783822615909695,
      18.3

In [30]:
# Level-2 RidgeCV stacker with nested CV over LGBM + HGBR OOFs
import glob, json, time, os
import numpy as np, pandas as pd
from sklearn.linear_model import RidgeCV

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

# Load target and folds
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
folds_df = pd.read_csv('folds.csv')
id2fold = dict(zip(folds_df['Id'], folds_df['fold']))
train_df['fold'] = train_df['Id'].map(id2fold)
y = train_df['Pawpularity'].values.astype(float)

# Discover base OOF/test files (exclude previous L2 artifacts to avoid leakage)
all_oof = sorted(glob.glob('oof_*.npy'))
oof_lgb_files = [f for f in all_oof if not os.path.basename(f).startswith('oof_L2_')]
oof_hgb_files = sorted(glob.glob('oof_hgb_*.npy'))
def name_from(path, prefix):
    return path[len(prefix):-4]
names_lgb = [name_from(p, 'oof_') for p in oof_lgb_files]
names_hgb = [name_from(p, 'oof_hgb_') for p in oof_hgb_files]
print('Base models (LGB):', names_lgb)
print('Base models (HGB):', names_hgb)
assert len(oof_lgb_files) > 0 or len(oof_hgb_files) > 0, 'No base OOF files found.'

# Load matrices
cols = []
O_list = []
T_list = []
for p, n in zip(oof_lgb_files, names_lgb):
    O_list.append(np.load(p))
    T_list.append(np.load(f'test_pred_{n}.npy'))
    cols.append(f'lgb:{n}')
for p, n in zip(oof_hgb_files, names_hgb):
    O_list.append(np.load(p))
    T_list.append(np.load(f'test_pred_hgb_{n}.npy'))
    cols.append(f'hgb:{n}')
O = np.vstack(O_list).T  # (n_samples, n_models)
T = np.vstack(T_list).T  # (n_test, n_models)
print('L2 feature matrix shapes:', O.shape, T.shape)

# Row-wise aggregation features (simple and cheap)
def add_row_aggs(M, prefix):
    mean = M.mean(axis=1, keepdims=True)
    std = M.std(axis=1, keepdims=True)
    vmin = M.min(axis=1, keepdims=True)
    vmax = M.max(axis=1, keepdims=True)
    return np.hstack([M, mean, std, vmin, vmax]), [f'{prefix}_mean', f'{prefix}_std', f'{prefix}_min', f'{prefix}_max']

# Use combined columns for aggs
O_ext, agg_names = add_row_aggs(O, 'agg')
T_ext, _ = add_row_aggs(T, 'agg')
cols_ext = cols + agg_names

# Nested CV stacker
alphas = np.logspace(-4, 2, 20)
meta_oof = np.zeros(len(train_df), dtype=float)
meta_test_accum = np.zeros(len(test_df), dtype=float)
coefs = []
t0 = time.time()
for fold in range(5):
    tr_idx = np.where(train_df['fold'].values != fold)[0]
    va_idx = np.where(train_df['fold'].values == fold)[0]
    X_tr, X_va = O_ext[tr_idx], O_ext[va_idx]
    y_tr, y_va = y[tr_idx], y[va_idx]
    ridge = RidgeCV(alphas=alphas, fit_intercept=True, cv=5, scoring='neg_root_mean_squared_error')
    ridge.fit(X_tr, y_tr)
    va_pred = ridge.predict(X_va)
    meta_oof[va_idx] = va_pred
    fold_rmse = rmse(y_va, va_pred)
    meta_test_accum += ridge.predict(T_ext)
    coefs.append(ridge.coef_.copy())
    print(f'L2 Ridge fold {fold} RMSE: {fold_rmse:.5f} | alpha: {ridge.alpha_:.5g} | elapsed {time.time()-t0:.1f}s', flush=True)

meta_rmse = rmse(y, meta_oof)
meta_test = meta_test_accum / 5.0
print(f'L2 Ridge meta OOF RMSE: {meta_rmse:.5f}')

# Save artifacts and submission
np.save('oof_L2_ridge.npy', meta_oof)
np.save('test_pred_L2_ridge.npy', meta_test)
sub = pd.DataFrame({'Id': test_df['Id'], 'Pawpularity': np.clip(meta_test, 1.0, 100.0)})
sub.to_csv('submission.csv', index=False)
print('Saved submission.csv. Head:')
print(sub.head())

# Report average coefficients per feature for inspection
avg_coef = np.mean(np.vstack(coefs), axis=0)
coef_report = {name: float(c) for name, c in zip(cols_ext, avg_coef)}
print('Average L2 coefficients (top 12 by abs):')
top = sorted(coef_report.items(), key=lambda kv: abs(kv[1]), reverse=True)[:12]
print(json.dumps(dict(top), indent=2))

# Also print single-model RMSEs for reference
single_scores = {cols[i]: rmse(y, O[:, i]) for i in range(len(cols))}
print('Single base OOF RMSEs (subset):', json.dumps(dict(list(single_scores.items())[:8]), indent=2))

Base models (LGB): ['emb:convnext_tiny_in22k_224', 'emb:deit3_small_patch16_224_224', 'emb:openclip_vit_b16_224', 'emb:openclip_vit_b32_224', 'emb:swin_small_patch4_window7_224_224', 'emb:swin_tiny_patch4_window7_224_224', 'emb:tf_efficientnet_b0_224', 'emb:tf_efficientnetv2_s_in21k_224', 'emb:vit_base_patch16_224_224', 'hgb_emb:convnext_tiny_in22k_224', 'hgb_emb:deit3_small_patch16_224_224', 'hgb_emb:openclip_vit_b16_224', 'hgb_emb:openclip_vit_b32_224', 'hgb_emb:swin_small_patch4_window7_224_224', 'hgb_emb:swin_tiny_patch4_window7_224_224', 'hgb_emb:tf_efficientnet_b0_224', 'hgb_emb:tf_efficientnetv2_s_in21k_224', 'hgb_emb:vit_base_patch16_224_224', 'hgb_meta_stats', 'meta_stats']
Base models (HGB): ['emb:convnext_tiny_in22k_224', 'emb:deit3_small_patch16_224_224', 'emb:openclip_vit_b16_224', 'emb:openclip_vit_b32_224', 'emb:swin_small_patch4_window7_224_224', 'emb:swin_tiny_patch4_window7_224_224', 'emb:tf_efficientnet_b0_224', 'emb:tf_efficientnetv2_s_in21k_224', 'emb:vit_base_patc

L2 Ridge fold 0 RMSE: 16.93826 | alpha: 100 | elapsed 0.2s


L2 Ridge fold 1 RMSE: 17.69146 | alpha: 100 | elapsed 0.5s


L2 Ridge fold 2 RMSE: 17.89423 | alpha: 100 | elapsed 0.8s


L2 Ridge fold 3 RMSE: 17.24341 | alpha: 100 | elapsed 1.0s


L2 Ridge fold 4 RMSE: 17.69985 | alpha: 100 | elapsed 1.2s


L2 Ridge meta OOF RMSE: 17.49695
Saved submission.csv. Head:
                                 Id  Pawpularity
0  ee51b99832f1ba868f646df93d2b6b81    54.325589
1  caddfb3f8bff9c4b95dbe022018eea21    39.015959
2  582eeabd4a448a53ebb79995888a4b0b    32.744878
3  afc1ad7f0c5eea880759d09e77f7deee    31.604933
4  d5bdf3446e86ce4ec67ce7a00f1cccc2    24.989403
Average L2 coefficients (top 12 by abs):
{
  "lgb:emb:openclip_vit_b16_224": 0.40855216339259615,
  "agg_min": 0.4067674784122034,
  "lgb:meta_stats": 0.24580635827245093,
  "lgb:emb:openclip_vit_b32_224": 0.22175726418799577,
  "lgb:emb:tf_efficientnetv2_s_in21k_224": 0.19633917222157776,
  "lgb:emb:vit_base_patch16_224_224": 0.17476867150676131,
  "lgb:emb:tf_efficientnet_b0_224": 0.16076190735151252,
  "lgb:emb:deit3_small_patch16_224_224": 0.15879837958488446,
  "lgb:emb:swin_small_patch4_window7_224_224": 0.14425655602618073,
  "agg_std": 0.13294185891827248,
  "lgb:hgb_meta_stats": -0.09074945078720045,
  "hgb:meta_stats": -0.09074

In [28]:
# PCA sweep for top embeddings (per-fold L2+PCA) with LightGBM; overwrite oof_/test_pred_ if improved
import os, time, glob, json
import numpy as np, pandas as pd
from sklearn.decomposition import PCA

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

try:
    import lightgbm as lgb
    lgbm_ok = True
except Exception as e:
    print('LightGBM not available for PCA sweep:', e)
    lgbm_ok = False

if lgbm_ok:
    train_df = pd.read_csv('train.csv')
    test_df = pd.read_csv('test.csv')
    folds_df = pd.read_csv('folds.csv')
    id2fold = dict(zip(folds_df['Id'], folds_df['fold']))
    train_df['fold'] = train_df['Id'].map(id2fold)
    y = train_df['Pawpularity'].values.astype(float)

    def l2norm(a):
        n = np.linalg.norm(a, axis=1, keepdims=True) + 1e-12
        return a / n

    def load_fs(tag):
        X_tr = np.load(f'X_img_train_{tag}.npy')
        X_te = np.load(f'X_img_test_{tag}.npy')
        return X_tr, X_te

    # Targets to sweep: ViT/CLIP: 256/384/512; ConvNet: 384/512/640
    sweep_plan = {
        'openclip_vit_b32_224': [256, 384, 512],
        'openclip_vit_b16_224': [256, 384, 512],
        'swin_small_patch4_window7_224_224': [256, 384, 512],
        'tf_efficientnet_b0_224': [384, 512, 640],
        'vit_base_patch16_224_224': [256, 384, 512],
    }

    improved = {}
    for tag, ncomps in sweep_plan.items():
        tr_path = f'X_img_train_{tag}.npy'; te_path = f'X_img_test_{tag}.npy'
        if not (os.path.exists(tr_path) and os.path.exists(te_path)):
            print(f'[Skip] Missing arrays for {tag}')
            continue
        X_tr, X_te = load_fs(tag)
        base_oof_path = f'oof_emb:{tag}.npy'
        base_rmse = None
        if os.path.exists(base_oof_path):
            base_rmse = rmse(y, np.load(base_oof_path))
        else:
            print(f'[Info] No existing OOF for emb:{tag}; will treat best found as baseline')

        best = {'rmse': float('inf'), 'oof': None, 'test': None, 'n_comp': None}
        print(f'=== PCA sweep for emb:{tag} | candidates: {ncomps} ===', flush=True)
        for nc in ncomps:
            oof = np.zeros(len(train_df), dtype=float)
            te_pred_accum = np.zeros(len(test_df), dtype=float)
            t0 = time.time()
            for fold in range(5):
                tr_idx = np.where(train_df['fold'].values != fold)[0]
                va_idx = np.where(train_df['fold'].values == fold)[0]
                X_tr_fold = l2norm(X_tr[tr_idx])
                X_va_fold = l2norm(X_tr[va_idx])
                X_te_fold = l2norm(X_te)
                pca = PCA(n_components=min(nc, X_tr.shape[1]), whiten=False, random_state=42)
                X_tr_p = pca.fit_transform(X_tr_fold)
                X_va_p = pca.transform(X_va_fold)
                X_te_p = pca.transform(X_te_fold)
                dtrain = lgb.Dataset(X_tr_p, label=y[tr_idx])
                dvalid = lgb.Dataset(X_va_p, label=y[va_idx])
                params = dict(objective='regression', metric='rmse', learning_rate=0.03,
                              num_leaves=64, min_data_in_leaf=40, feature_fraction=0.8,
                              bagging_fraction=0.8, bagging_freq=1, lambda_l2=1.0, verbosity=-1)
                gbm = lgb.train(params, dtrain, num_boost_round=10000, valid_sets=[dvalid],
                                valid_names=['valid'], callbacks=[lgb.early_stopping(300), lgb.log_evaluation(200)])
                va_pred = np.clip(gbm.predict(X_va_p, num_iteration=gbm.best_iteration), 1.0, 100.0)
                te_pred = np.clip(gbm.predict(X_te_p, num_iteration=gbm.best_iteration), 1.0, 100.0)
                oof[va_idx] = va_pred
                te_pred_accum += te_pred
                print(f'  n_comp={nc} fold {fold} done | elapsed {time.time()-t0:.1f}s', flush=True)
            fs_rmse = rmse(y, oof)
            print(f'  -> n_comp={nc} OOF RMSE: {fs_rmse:.5f}', flush=True)
            if fs_rmse < best['rmse']:
                best = {'rmse': fs_rmse, 'oof': oof.copy(), 'test': (te_pred_accum/5.0).copy(), 'n_comp': nc}

        print(f'Best for emb:{tag}: n_comp={best["n_comp"]} | OOF RMSE: {best["rmse"]:.5f} | prev: {base_rmse}', flush=True)
        # Overwrite only if improvement vs existing (or if no baseline)
        if (base_rmse is None) or (best['rmse'] + 1e-6 < base_rmse):
            np.save(f'oof_emb:{tag}.npy', best['oof'])
            np.save(f'test_pred_emb:{tag}.npy', best['test'])
            improved[f'emb:{tag}'] = {'oof_rmse': best['rmse'], 'n_comp': best['n_comp']}
            print(f'  Saved improved OOF/test for emb:{tag}')
        else:
            print(f'  No improvement for emb:{tag}; kept existing files')

    print('PCA sweep improvements:', json.dumps(improved, indent=2))
else:
    print('Skipping PCA sweep: LightGBM unavailable.')

=== PCA sweep for emb:openclip_vit_b32_224 | candidates: [256, 384, 512] ===


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.7075


[400]	valid's rmse: 17.7214


Early stopping, best iteration is:
[250]	valid's rmse: 17.6792
  n_comp=256 fold 0 done | elapsed 3.4s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.2634


[400]	valid's rmse: 18.3034


Early stopping, best iteration is:
[196]	valid's rmse: 18.2574
  n_comp=256 fold 1 done | elapsed 6.5s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.643


[400]	valid's rmse: 18.6648


Early stopping, best iteration is:
[250]	valid's rmse: 18.6138
  n_comp=256 fold 2 done | elapsed 9.8s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.1074


[400]	valid's rmse: 18.1178


Early stopping, best iteration is:
[226]	valid's rmse: 18.0813
  n_comp=256 fold 3 done | elapsed 13.0s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.4492


[400]	valid's rmse: 18.4104


[600]	valid's rmse: 18.4303
Early stopping, best iteration is:
[314]	valid's rmse: 18.391
  n_comp=256 fold 4 done | elapsed 16.7s


  -> n_comp=256 OOF RMSE: 18.20728


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.7513


[400]	valid's rmse: 17.7263


[600]	valid's rmse: 17.7571
Early stopping, best iteration is:
[322]	valid's rmse: 17.7138
  n_comp=384 fold 0 done | elapsed 5.2s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3766


[400]	valid's rmse: 18.3589


[600]	valid's rmse: 18.3544


Early stopping, best iteration is:
[337]	valid's rmse: 18.3478
  n_comp=384 fold 1 done | elapsed 10.4s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.6832


[400]	valid's rmse: 18.6432


[600]	valid's rmse: 18.6449


Early stopping, best iteration is:
[334]	valid's rmse: 18.6399
  n_comp=384 fold 2 done | elapsed 15.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.1919


[400]	valid's rmse: 18.1375


Early stopping, best iteration is:
[280]	valid's rmse: 18.1303
  n_comp=384 fold 3 done | elapsed 20.3s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.4123


[400]	valid's rmse: 18.3831


[600]	valid's rmse: 18.3613


[800]	valid's rmse: 18.3785


Early stopping, best iteration is:
[636]	valid's rmse: 18.3575
  n_comp=384 fold 4 done | elapsed 27.9s


  -> n_comp=384 OOF RMSE: 18.24045


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.7511


[400]	valid's rmse: 17.7378


[600]	valid's rmse: 17.7684


Early stopping, best iteration is:
[357]	valid's rmse: 17.7307
  n_comp=512 fold 0 done | elapsed 7.0s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.304


[400]	valid's rmse: 18.3353


Early stopping, best iteration is:
[256]	valid's rmse: 18.2982
  n_comp=512 fold 1 done | elapsed 13.1s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.5667


[400]	valid's rmse: 18.5658


Early stopping, best iteration is:
[296]	valid's rmse: 18.5434
  n_comp=512 fold 2 done | elapsed 19.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3388


[400]	valid's rmse: 18.2653


[600]	valid's rmse: 18.294


Early stopping, best iteration is:
[351]	valid's rmse: 18.259
  n_comp=512 fold 3 done | elapsed 26.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.477


[400]	valid's rmse: 18.4101


[600]	valid's rmse: 18.4229


Early stopping, best iteration is:
[387]	valid's rmse: 18.4009
  n_comp=512 fold 4 done | elapsed 34.1s


  -> n_comp=512 OOF RMSE: 18.24854


Best for emb:openclip_vit_b32_224: n_comp=256 | OOF RMSE: 18.20728 | prev: 18.207280286371326


  No improvement for emb:openclip_vit_b32_224; kept existing files
[Info] No existing OOF for emb:openclip_vit_b16_224; will treat best found as baseline
=== PCA sweep for emb:openclip_vit_b16_224 | candidates: [256, 384, 512] ===


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.5574


[400]	valid's rmse: 17.5463


Early stopping, best iteration is:
[257]	valid's rmse: 17.5235
  n_comp=256 fold 0 done | elapsed 3.3s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.0246


[400]	valid's rmse: 18.0084


[600]	valid's rmse: 18.0334


Early stopping, best iteration is:
[412]	valid's rmse: 18.0035
  n_comp=256 fold 1 done | elapsed 7.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.4248


[400]	valid's rmse: 18.4463


Early stopping, best iteration is:
[183]	valid's rmse: 18.4118
  n_comp=256 fold 2 done | elapsed 10.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.7777


[400]	valid's rmse: 17.7735


Early stopping, best iteration is:
[254]	valid's rmse: 17.7416
  n_comp=256 fold 3 done | elapsed 13.9s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.2827


[400]	valid's rmse: 18.312


Early stopping, best iteration is:
[210]	valid's rmse: 18.2691
  n_comp=256 fold 4 done | elapsed 17.0s


  -> n_comp=256 OOF RMSE: 17.99287


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.5014


[400]	valid's rmse: 17.4763


[600]	valid's rmse: 17.4831


Early stopping, best iteration is:
[329]	valid's rmse: 17.4669
  n_comp=384 fold 0 done | elapsed 5.1s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.1087


[400]	valid's rmse: 18.0488


[600]	valid's rmse: 18.0648


Early stopping, best iteration is:
[412]	valid's rmse: 18.045
  n_comp=384 fold 1 done | elapsed 10.9s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3029


[400]	valid's rmse: 18.3392


Early stopping, best iteration is:
[236]	valid's rmse: 18.2949
  n_comp=384 fold 2 done | elapsed 15.2s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.9246


[400]	valid's rmse: 17.9216


[600]	valid's rmse: 17.9197
Early stopping, best iteration is:
[305]	valid's rmse: 17.8988
  n_comp=384 fold 3 done | elapsed 20.1s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3339


[400]	valid's rmse: 18.3573


Early stopping, best iteration is:
[236]	valid's rmse: 18.3238
  n_comp=384 fold 4 done | elapsed 24.5s


  -> n_comp=384 OOF RMSE: 18.00860


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.5513


[400]	valid's rmse: 17.539


Early stopping, best iteration is:
[238]	valid's rmse: 17.5133
  n_comp=512 fold 0 done | elapsed 5.7s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.2388


[400]	valid's rmse: 18.2454


[600]	valid's rmse: 18.2519
Early stopping, best iteration is:
[306]	valid's rmse: 18.193
  n_comp=512 fold 1 done | elapsed 12.3s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.4699


[400]	valid's rmse: 18.4593


Early stopping, best iteration is:
[287]	valid's rmse: 18.4553
  n_comp=512 fold 2 done | elapsed 18.7s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.9655


[400]	valid's rmse: 17.9435


[600]	valid's rmse: 17.955


Early stopping, best iteration is:
[346]	valid's rmse: 17.936
  n_comp=512 fold 3 done | elapsed 25.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.2974


[400]	valid's rmse: 18.2969


[600]	valid's rmse: 18.2881


Early stopping, best iteration is:
[333]	valid's rmse: 18.28
  n_comp=512 fold 4 done | elapsed 32.5s


  -> n_comp=512 OOF RMSE: 18.07847


Best for emb:openclip_vit_b16_224: n_comp=256 | OOF RMSE: 17.99287 | prev: None


  Saved improved OOF/test for emb:openclip_vit_b16_224
=== PCA sweep for emb:swin_small_patch4_window7_224_224 | candidates: [256, 384, 512] ===


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.4771


[400]	valid's rmse: 17.4546


[600]	valid's rmse: 17.4513
Early stopping, best iteration is:
[306]	valid's rmse: 17.4268
  n_comp=256 fold 0 done | elapsed 5.0s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3685


[400]	valid's rmse: 18.3485


[600]	valid's rmse: 18.343


[800]	valid's rmse: 18.3528


Early stopping, best iteration is:
[604]	valid's rmse: 18.3417
  n_comp=256 fold 1 done | elapsed 11.9s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.5449


[400]	valid's rmse: 18.5715


Early stopping, best iteration is:
[179]	valid's rmse: 18.5304
  n_comp=256 fold 2 done | elapsed 16.2s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.0568


[400]	valid's rmse: 18.0561


[600]	valid's rmse: 18.0979
Early stopping, best iteration is:
[323]	valid's rmse: 18.0505
  n_comp=256 fold 3 done | elapsed 21.3s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.218


[400]	valid's rmse: 18.1993


[600]	valid's rmse: 18.2157


Early stopping, best iteration is:
[373]	valid's rmse: 18.1831
  n_comp=256 fold 4 done | elapsed 26.8s


  -> n_comp=256 OOF RMSE: 18.11041


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.4742


[400]	valid's rmse: 17.4355


Early stopping, best iteration is:
[292]	valid's rmse: 17.4202
  n_comp=384 fold 0 done | elapsed 6.7s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3448


[400]	valid's rmse: 18.3467


Early stopping, best iteration is:
[294]	valid's rmse: 18.3159
  n_comp=384 fold 1 done | elapsed 13.3s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.5108


[400]	valid's rmse: 18.4993


Early stopping, best iteration is:
[273]	valid's rmse: 18.4736
  n_comp=384 fold 2 done | elapsed 19.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.0519


[400]	valid's rmse: 18.0539


Early stopping, best iteration is:
[183]	valid's rmse: 18.0401
  n_comp=384 fold 3 done | elapsed 25.5s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3559


[400]	valid's rmse: 18.3043


[600]	valid's rmse: 18.2957


[800]	valid's rmse: 18.2865


[1000]	valid's rmse: 18.2812


[1200]	valid's rmse: 18.2801


[1400]	valid's rmse: 18.28


Early stopping, best iteration is:
[1266]	valid's rmse: 18.2795
  n_comp=384 fold 4 done | elapsed 39.5s


  -> n_comp=384 OOF RMSE: 18.10964


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.5853


[400]	valid's rmse: 17.5732


[600]	valid's rmse: 17.5998


Early stopping, best iteration is:
[406]	valid's rmse: 17.572
  n_comp=512 fold 0 done | elapsed 9.5s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.5136


[400]	valid's rmse: 18.5161


[600]	valid's rmse: 18.5112


[800]	valid's rmse: 18.5173


Early stopping, best iteration is:
[535]	valid's rmse: 18.4969
  n_comp=512 fold 1 done | elapsed 21.3s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.6311


[400]	valid's rmse: 18.63


Early stopping, best iteration is:
[167]	valid's rmse: 18.6176
  n_comp=512 fold 2 done | elapsed 28.8s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.0857


[400]	valid's rmse: 18.0529


[600]	valid's rmse: 18.0645


Early stopping, best iteration is:
[377]	valid's rmse: 18.0416
  n_comp=512 fold 3 done | elapsed 41.0s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3897


[400]	valid's rmse: 18.3092


[600]	valid's rmse: 18.3057


[800]	valid's rmse: 18.3008


Early stopping, best iteration is:
[642]	valid's rmse: 18.2968
  n_comp=512 fold 4 done | elapsed 54.1s


  -> n_comp=512 OOF RMSE: 18.20880


Best for emb:swin_small_patch4_window7_224_224: n_comp=384 | OOF RMSE: 18.10964 | prev: 18.109638128206022


  No improvement for emb:swin_small_patch4_window7_224_224; kept existing files
=== PCA sweep for emb:tf_efficientnet_b0_224 | candidates: [384, 512, 640] ===


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.8597


[400]	valid's rmse: 17.8955


Early stopping, best iteration is:
[198]	valid's rmse: 17.8566
  n_comp=384 fold 0 done | elapsed 6.0s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.4146


[400]	valid's rmse: 18.4552


Early stopping, best iteration is:
[205]	valid's rmse: 18.4098
  n_comp=384 fold 1 done | elapsed 13.2s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3492


[400]	valid's rmse: 18.387


Early stopping, best iteration is:
[263]	valid's rmse: 18.3433
  n_comp=384 fold 2 done | elapsed 21.1s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.9957


[400]	valid's rmse: 17.9786


[600]	valid's rmse: 17.9948


Early stopping, best iteration is:
[360]	valid's rmse: 17.9582
  n_comp=384 fold 3 done | elapsed 33.9s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.4098


[400]	valid's rmse: 18.4531


Early stopping, best iteration is:
[213]	valid's rmse: 18.3934
  n_comp=384 fold 4 done | elapsed 41.4s


  -> n_comp=384 OOF RMSE: 18.19377


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.8509


[400]	valid's rmse: 17.8555


Early stopping, best iteration is:
[254]	valid's rmse: 17.821
  n_comp=512 fold 0 done | elapsed 9.5s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3753


[400]	valid's rmse: 18.4137


Early stopping, best iteration is:
[255]	valid's rmse: 18.362
  n_comp=512 fold 1 done | elapsed 17.9s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.5273


[400]	valid's rmse: 18.5134


[600]	valid's rmse: 18.5415
Early stopping, best iteration is:
[315]	valid's rmse: 18.4944
  n_comp=512 fold 2 done | elapsed 29.5s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.0407


[400]	valid's rmse: 18.0584


Early stopping, best iteration is:
[213]	valid's rmse: 18.0186
  n_comp=512 fold 3 done | elapsed 38.7s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3828


[400]	valid's rmse: 18.4316


Early stopping, best iteration is:
[226]	valid's rmse: 18.3778
  n_comp=512 fold 4 done | elapsed 47.7s


  -> n_comp=512 OOF RMSE: 18.21653


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.8313


[400]	valid's rmse: 17.857


Early stopping, best iteration is:
[220]	valid's rmse: 17.8252
  n_comp=640 fold 0 done | elapsed 9.2s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3758


[400]	valid's rmse: 18.3389


[600]	valid's rmse: 18.3551


Early stopping, best iteration is:
[414]	valid's rmse: 18.3332
  n_comp=640 fold 1 done | elapsed 23.4s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.5485


[400]	valid's rmse: 18.5733


Early stopping, best iteration is:
[230]	valid's rmse: 18.5282
  n_comp=640 fold 2 done | elapsed 33.4s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.1504


[400]	valid's rmse: 18.1349


[600]	valid's rmse: 18.1576


Early stopping, best iteration is:
[362]	valid's rmse: 18.1215
  n_comp=640 fold 3 done | elapsed 51.7s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.4397


[400]	valid's rmse: 18.4786


Early stopping, best iteration is:
[197]	valid's rmse: 18.4288
  n_comp=640 fold 4 done | elapsed 60.5s


  -> n_comp=640 OOF RMSE: 18.24910


Best for emb:tf_efficientnet_b0_224: n_comp=384 | OOF RMSE: 18.19377 | prev: 18.193774521161647


  No improvement for emb:tf_efficientnet_b0_224; kept existing files
=== PCA sweep for emb:vit_base_patch16_224_224 | candidates: [256, 384, 512] ===


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.8036


[400]	valid's rmse: 17.8215


Early stopping, best iteration is:
[232]	valid's rmse: 17.7872
  n_comp=256 fold 0 done | elapsed 4.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3253


[400]	valid's rmse: 18.3278


Early stopping, best iteration is:
[243]	valid's rmse: 18.3008
  n_comp=256 fold 1 done | elapsed 9.7s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.6123


[400]	valid's rmse: 18.6048


Early stopping, best iteration is:
[282]	valid's rmse: 18.5874
  n_comp=256 fold 2 done | elapsed 14.8s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.0177


[400]	valid's rmse: 17.9955


[600]	valid's rmse: 18.0137


Early stopping, best iteration is:
[376]	valid's rmse: 17.9903
  n_comp=256 fold 3 done | elapsed 20.6s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3495


[400]	valid's rmse: 18.319


Early stopping, best iteration is:
[251]	valid's rmse: 18.3115
  n_comp=256 fold 4 done | elapsed 25.3s


  -> n_comp=256 OOF RMSE: 18.19758


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.9112


[400]	valid's rmse: 17.925


Early stopping, best iteration is:
[238]	valid's rmse: 17.8833
  n_comp=384 fold 0 done | elapsed 8.3s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.2839


[400]	valid's rmse: 18.2318


[600]	valid's rmse: 18.2309


Early stopping, best iteration is:
[431]	valid's rmse: 18.2198
  n_comp=384 fold 1 done | elapsed 17.2s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.7266


[400]	valid's rmse: 18.7163


Early stopping, best iteration is:
[296]	valid's rmse: 18.6804
  n_comp=384 fold 2 done | elapsed 23.8s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.053


[400]	valid's rmse: 18.0189


[600]	valid's rmse: 18.0124


[800]	valid's rmse: 18.0057


Early stopping, best iteration is:
[648]	valid's rmse: 18.0021
  n_comp=384 fold 3 done | elapsed 35.0s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3699


[400]	valid's rmse: 18.3263


[600]	valid's rmse: 18.3274


[800]	valid's rmse: 18.3247


Early stopping, best iteration is:
[563]	valid's rmse: 18.3152
  n_comp=384 fold 4 done | elapsed 43.9s


  -> n_comp=384 OOF RMSE: 18.22226


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 17.9733


[400]	valid's rmse: 17.927


Early stopping, best iteration is:
[271]	valid's rmse: 17.9207
  n_comp=512 fold 0 done | elapsed 8.2s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.3061


[400]	valid's rmse: 18.2622


[600]	valid's rmse: 18.2561


[800]	valid's rmse: 18.2561


Early stopping, best iteration is:
[564]	valid's rmse: 18.2472
  n_comp=512 fold 1 done | elapsed 19.8s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.7079


[400]	valid's rmse: 18.6634


[600]	valid's rmse: 18.6686


[800]	valid's rmse: 18.6724


Early stopping, best iteration is:
[549]	valid's rmse: 18.6575
  n_comp=512 fold 2 done | elapsed 31.9s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.0492


[400]	valid's rmse: 17.9849


[600]	valid's rmse: 17.9908


Early stopping, best iteration is:
[368]	valid's rmse: 17.978
  n_comp=512 fold 3 done | elapsed 42.5s


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 18.4966


[400]	valid's rmse: 18.4191


[600]	valid's rmse: 18.4178


Early stopping, best iteration is:
[449]	valid's rmse: 18.4158
  n_comp=512 fold 4 done | elapsed 52.9s


  -> n_comp=512 OOF RMSE: 18.24591


Best for emb:vit_base_patch16_224_224: n_comp=256 | OOF RMSE: 18.19758 | prev: 18.197577009031072


  No improvement for emb:vit_base_patch16_224_224; kept existing files
PCA sweep improvements: {
  "emb:openclip_vit_b16_224": {
    "oof_rmse": 17.992867001924974,
    "n_comp": 256
  }
}


In [25]:
# Level-2 LightGBM stacker (nested CV) over LGBM + HGBR OOFs
import glob, json, time
import numpy as np, pandas as pd

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

try:
    import lightgbm as lgb
    lgbm_ok = True
except Exception as e:
    print('LightGBM not available for L2 stacker:', e)
    lgbm_ok = False

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
folds_df = pd.read_csv('folds.csv')
id2fold = dict(zip(folds_df['Id'], folds_df['fold']))
train_df['fold'] = train_df['Id'].map(id2fold)
y = train_df['Pawpularity'].values.astype(float)

# Load base OOF/Test matrices
oof_lgb_files = sorted(glob.glob('oof_*.npy'))
oof_hgb_files = sorted(glob.glob('oof_hgb_*.npy'))
def name_from(path, prefix):
    return path[len(prefix):-4]
names_lgb = [name_from(p, 'oof_') for p in oof_lgb_files]
names_hgb = [name_from(p, 'oof_hgb_') for p in oof_hgb_files]
cols = []
O_list, T_list = [], []
for p, n in zip(oof_lgb_files, names_lgb):
    O_list.append(np.load(p))
    T_list.append(np.load(f'test_pred_{n}.npy'))
    cols.append(f'lgb:{n}')
for p, n in zip(oof_hgb_files, names_hgb):
    O_list.append(np.load(p))
    T_list.append(np.load(f'test_pred_hgb_{n}.npy'))
    cols.append(f'hgb:{n}')
if len(O_list) == 0:
    raise SystemExit('No base models found for L2 stacker')
O = np.vstack(O_list).T
T = np.vstack(T_list).T
print('L2 LightGBM: base features:', len(cols), '| shapes:', O.shape, T.shape)

# Row-wise aggregation features
def add_row_aggs(M):
    mean = M.mean(axis=1, keepdims=True)
    std = M.std(axis=1, keepdims=True)
    vmin = M.min(axis=1, keepdims=True)
    vmax = M.max(axis=1, keepdims=True)
    return np.hstack([M, mean, std, vmin, vmax])
O_ext = add_row_aggs(O)
T_ext = add_row_aggs(T)

if lgbm_ok:
    meta_oof = np.zeros(len(train_df), dtype=float)
    meta_test_accum = np.zeros(len(test_df), dtype=float)
    t0 = time.time()
    for fold in range(5):
        tr_idx = np.where(train_df['fold'].values != fold)[0]
        va_idx = np.where(train_df['fold'].values == fold)[0]
        X_tr, X_va = O_ext[tr_idx], O_ext[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]
        dtrain = lgb.Dataset(X_tr, label=y_tr)
        dvalid = lgb.Dataset(X_va, label=y_va)
        params = dict(
            objective='regression', metric='rmse', learning_rate=0.03,
            num_leaves=31, min_data_in_leaf=60, feature_fraction=0.8,
            bagging_fraction=0.8, bagging_freq=1, lambda_l2=2.0, verbosity=-1
        )
        gbm = lgb.train(params, dtrain, num_boost_round=20000, valid_sets=[dvalid], valid_names=['valid'],
                        callbacks=[lgb.early_stopping(150), lgb.log_evaluation(200)])
        va_pred = gbm.predict(X_va, num_iteration=gbm.best_iteration)
        meta_oof[va_idx] = va_pred
        meta_test_accum += gbm.predict(T_ext, num_iteration=gbm.best_iteration)
        print(f'L2 LGBM fold {fold} RMSE: {rmse(y_va, va_pred):.5f} | iters: {gbm.best_iteration} | elapsed {time.time()-t0:.1f}s', flush=True)

    meta_rmse = rmse(y, meta_oof)
    meta_test = meta_test_accum / 5.0
    print(f'L2 LGBM meta OOF RMSE: {meta_rmse:.5f}')
    np.save('oof_L2_lgbm.npy', meta_oof)
    np.save('test_pred_L2_lgbm.npy', meta_test)
    sub = pd.DataFrame({'Id': test_df['Id'], 'Pawpularity': np.clip(meta_test, 1.0, 100.0)})
    sub.to_csv('submission.csv', index=False)
    print('Saved submission.csv (L2 LGBM). Head:')
    print(sub.head())
else:
    print('Skipping L2 LightGBM: LightGBM not available')

L2 LightGBM: base features: 28 | shapes: (8920, 28) (992, 28)
Training until validation scores don't improve for 150 rounds
[200]	valid's rmse: 17.3565


Early stopping, best iteration is:
[90]	valid's rmse: 17.199
L2 LGBM fold 0 RMSE: 17.19895 | iters: 90 | elapsed 0.2s


Training until validation scores don't improve for 150 rounds
[200]	valid's rmse: 18.0293


Early stopping, best iteration is:
[66]	valid's rmse: 17.9094
L2 LGBM fold 1 RMSE: 17.90943 | iters: 66 | elapsed 0.4s


Training until validation scores don't improve for 150 rounds
[200]	valid's rmse: 18.1887


Early stopping, best iteration is:
[78]	valid's rmse: 18.0441
L2 LGBM fold 2 RMSE: 18.04406 | iters: 78 | elapsed 0.6s


Training until validation scores don't improve for 150 rounds
[200]	valid's rmse: 17.5849


Early stopping, best iteration is:
[99]	valid's rmse: 17.5192
L2 LGBM fold 3 RMSE: 17.51919 | iters: 99 | elapsed 0.9s


Training until validation scores don't improve for 150 rounds
[200]	valid's rmse: 18.157


Early stopping, best iteration is:
[75]	valid's rmse: 17.963


L2 LGBM fold 4 RMSE: 17.96302 | iters: 75 | elapsed 1.1s


L2 LGBM meta OOF RMSE: 17.72981
Saved submission.csv (L2 LGBM). Head:
                                 Id  Pawpularity
0  ee51b99832f1ba868f646df93d2b6b81    61.325353
1  caddfb3f8bff9c4b95dbe022018eea21    35.590551
2  582eeabd4a448a53ebb79995888a4b0b    33.021473
3  afc1ad7f0c5eea880759d09e77f7deee    30.657687
4  d5bdf3446e86ce4ec67ce7a00f1cccc2    28.041714


In [26]:
# Enrich meta features (cheap image stats) and retrain meta models (LGBM + HGBR); overwrite OOF/test for meta_stats
import os, time, json, math
import numpy as np, pandas as pd, cv2
from sklearn.ensemble import HistGradientBoostingRegressor

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

def colorfulness_bgr(img):
    # img BGR uint8
    B, G, R = cv2.split(img.astype(np.float32))
    rg = R - G
    yb = 0.5 * (R + G) - B
    std_rg, mean_rg = np.std(rg), np.mean(rg)
    std_yb, mean_yb = np.std(yb), np.mean(yb)
    return float(np.sqrt(std_rg**2 + std_yb**2) + 0.3 * np.sqrt(mean_rg**2 + mean_yb**2))

def entropy_channel(ch):
    # ch uint8 0..255
    hist = cv2.calcHist([ch], [0], None, [256], [0,256]).ravel()
    p = hist / (np.sum(hist) + 1e-12)
    p = p[p > 0]
    return float(-np.sum(p * np.log2(p)))

def center_brightness_ratio(gray):
    h, w = gray.shape
    ch0, ch1 = int(h*0.25), int(h*0.75)
    cw0, cw1 = int(w*0.25), int(w*0.75)
    center = gray[ch0:ch1, cw0:cw1]
    g_mean = float(np.mean(gray)) + 1e-6
    c_mean = float(np.mean(center))
    return float(c_mean / g_mean)

def otsu_foreground_fraction(gray):
    _, th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    return float(np.mean(th > 0))

def compute_features_row(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    if img is None:
        return None
    h, w = img.shape[:2]
    aspect = (w / max(h,1.0)) if h>0 else 0.0
    # original stats
    bgr_means = img.reshape(-1,3).mean(axis=0).tolist()
    bgr_stds = img.reshape(-1,3).std(axis=0).tolist()
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    lap_var = float(cv2.Laplacian(gray, cv2.CV_64F).var())
    # enrichments
    gray_mean = float(np.mean(gray))
    gray_std = float(np.std(gray))
    p10 = float(np.percentile(gray, 10))
    p50 = float(np.percentile(gray, 50))
    p90 = float(np.percentile(gray, 90))
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    H, S, V = cv2.split(hsv)
    hsv_means = [float(np.mean(H)), float(np.mean(S)), float(np.mean(V))]
    hsv_stds = [float(np.std(H)), float(np.std(S)), float(np.std(V))]
    ent_s = entropy_channel(S)
    ent_v = entropy_channel(V)
    edges = cv2.Canny(gray, 100, 200)
    edge_density = float(np.mean(edges > 0))
    colorful = colorfulness_bgr(img)
    center_ratio = center_brightness_ratio(gray)
    otsu_frac = otsu_foreground_fraction(gray)
    feats = [w, h, aspect] + bgr_means + bgr_stds + [lap_var,
             gray_mean, gray_std, p10, p50, p90] + hsv_means + hsv_stds + [ent_s, ent_v,
             edge_density, colorful, center_ratio, otsu_frac]
    return np.array(feats, dtype=np.float32)

meta_cols = ['Subject Focus','Eyes','Face','Near','Action','Accessory','Group','Collage','Human','Occlusion','Info','Blur']
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
y = train_df['Pawpularity'].values.astype(float)

def build_meta_matrix(df, folder, log_every=1000):
    Xm = df[meta_cols].astype(np.float32).values
    feats = []
    ids = df['Id'].tolist()
    t0 = time.time()
    for i, id_ in enumerate(ids):
        f = compute_features_row(os.path.join(folder, f'{id_}.jpg'))
        if f is None:
            f = np.zeros(3+6+1+5+6+2+1+1+1, dtype=np.float32)  # fallback to zeros for enrichments+orig stats
        feats.append(f)
        if (i+1) % log_every == 0:
            print(f'  {folder}: {i+1}/{len(ids)} | elapsed {time.time()-t0:.1f}s', flush=True)
    Xf = np.vstack(feats)
    X = np.concatenate([Xm, Xf], axis=1)
    return X

print('Building enriched meta features...')
t0_all = time.time()
X_tr = build_meta_matrix(train_df, 'train')
X_te = build_meta_matrix(test_df, 'test')
np.save('X_meta_stats_train.npy', X_tr)
np.save('X_meta_stats_test.npy', X_te)
print('Saved enriched X_meta_stats_train.npy', X_tr.shape, '| X_meta_stats_test.npy', X_te.shape, '| time', f'{time.time()-t0_all:.1f}s')

# Retrain meta_stats models: LightGBM and HGBR
folds_df = pd.read_csv('folds.csv')
train_df = train_df.merge(folds_df, on='Id', how='left')

# Train LGBM for meta_stats
try:
    import lightgbm as lgb
    lgb_ok = True
except Exception as e:
    print('LightGBM not available for meta retrain:', e); lgb_ok = False

if lgb_ok:
    oof = np.zeros(len(train_df), dtype=float)
    te_acc = np.zeros(len(test_df), dtype=float)
    for fold in range(5):
        tr_idx = np.where(train_df['fold'].values != fold)[0]
        va_idx = np.where(train_df['fold'].values == fold)[0]
        dtrain = lgb.Dataset(X_tr[tr_idx], label=y[tr_idx])
        dvalid = lgb.Dataset(X_tr[va_idx], label=y[va_idx])
        params = dict(objective='regression', metric='rmse', learning_rate=0.03,
                      num_leaves=64, min_data_in_leaf=40, feature_fraction=0.8,
                      bagging_fraction=0.8, bagging_freq=1, lambda_l2=1.0, verbosity=-1)
        gbm = lgb.train(params, dtrain, num_boost_round=10000, valid_sets=[dvalid],
                        valid_names=['valid'], callbacks=[lgb.early_stopping(300), lgb.log_evaluation(200)])
        va_pred = gbm.predict(X_tr[va_idx], num_iteration=gbm.best_iteration)
        te_pred = gbm.predict(X_te, num_iteration=gbm.best_iteration)
        oof[va_idx] = np.clip(va_pred, 1.0, 100.0)
        te_acc += np.clip(te_pred, 1.0, 100.0)
        print(f'  meta_stats [LGBM] fold {fold} RMSE: {rmse(y[va_idx], oof[va_idx]):.4f}', flush=True)
    te_mean = te_acc / 5.0
    print('meta_stats [LGBM] OOF RMSE:', f'{rmse(y, oof):.5f}')
    np.save('oof_meta_stats.npy', oof)
    np.save('test_pred_meta_stats.npy', te_mean)

# Train HGBR for meta_stats
oof_h = np.zeros(len(train_df), dtype=float)
te_acc_h = np.zeros(len(test_df), dtype=float)
hgbr = HistGradientBoostingRegressor(learning_rate=0.05, max_iter=2500, max_leaf_nodes=31,
                                     min_samples_leaf=30, l2_regularization=1.0,
                                     validation_fraction=0.12, early_stopping=True, random_state=42)
for fold in range(5):
    tr_idx = np.where(train_df['fold'].values != fold)[0]
    va_idx = np.where(train_df['fold'].values == fold)[0]
    hgbr.fit(X_tr[tr_idx], y[tr_idx])
    va_pred = np.clip(hgbr.predict(X_tr[va_idx]), 1.0, 100.0)
    te_pred = np.clip(hgbr.predict(X_te), 1.0, 100.0)
    oof_h[va_idx] = va_pred
    te_acc_h += te_pred
    print(f'  meta_stats [HGBR] fold {fold} RMSE: {rmse(y[va_idx], va_pred):.4f}', flush=True)
te_mean_h = te_acc_h / 5.0
print('meta_stats [HGBR] OOF RMSE:', f'{rmse(y, oof_h):.5f}')
np.save('oof_hgb_meta_stats.npy', oof_h)
np.save('test_pred_hgb_meta_stats.npy', te_mean_h)
print('Meta retrain complete.')

Building enriched meta features...


  train: 1000/8920 | elapsed 95.9s


  train: 2000/8920 | elapsed 183.3s


  train: 3000/8920 | elapsed 277.4s


  train: 4000/8920 | elapsed 376.7s


  train: 5000/8920 | elapsed 482.6s


  train: 6000/8920 | elapsed 588.2s


  train: 7000/8920 | elapsed 693.3s


  train: 8000/8920 | elapsed 798.3s


Saved enriched X_meta_stats_train.npy (8920, 39) | X_meta_stats_test.npy (992, 39) | time 997.0s
Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 20.7459


Early stopping, best iteration is:
[43]	valid's rmse: 20.4775
  meta_stats [LGBM] fold 0 RMSE: 20.4775


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 21.0927


Early stopping, best iteration is:
[9]	valid's rmse: 20.6157
  meta_stats [LGBM] fold 1 RMSE: 20.6157


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 21.0516


Early stopping, best iteration is:
[3]	valid's rmse: 20.7187
  meta_stats [LGBM] fold 2 RMSE: 20.7187


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 20.9148


Early stopping, best iteration is:
[40]	valid's rmse: 20.639
  meta_stats [LGBM] fold 3 RMSE: 20.6390


Training until validation scores don't improve for 300 rounds


[200]	valid's rmse: 20.9158


Early stopping, best iteration is:
[26]	valid's rmse: 20.6338
  meta_stats [LGBM] fold 4 RMSE: 20.6338


meta_stats [LGBM] OOF RMSE: 20.61709
  meta_stats [HGBR] fold 0 RMSE: 20.4368


  meta_stats [HGBR] fold 1 RMSE: 20.6704


  meta_stats [HGBR] fold 2 RMSE: 20.7867


  meta_stats [HGBR] fold 3 RMSE: 20.6797


  meta_stats [HGBR] fold 4 RMSE: 20.6813


meta_stats [HGBR] OOF RMSE: 20.65130
Meta retrain complete.


In [27]:
# Extract extra embeddings: OpenCLIP ViT-B/16 and BEiT-Base (in22k) @224 on CPU
import os, time, gc, math, subprocess, sys
import numpy as np, pandas as pd
import torch
import timm
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from timm.data import resolve_data_config, create_transform

torch.set_num_threads(8)

def ensure_openclip():
    try:
        import open_clip  # noqa
    except Exception:
        print("Installing open-clip-torch (no-deps)...", flush=True)
        subprocess.run([sys.executable, '-m', 'pip', 'install', '--no-deps', '--upgrade-strategy', 'only-if-needed', 'open-clip-torch'], check=True)
    for pkg in ['ftfy', 'regex']:
        try:
            __import__(pkg)
        except Exception:
            print(f"Installing {pkg}...", flush=True)
            subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade-strategy', 'only-if-needed', pkg], check=True)
    return True

class ImageDataset(Dataset):
    def __init__(self, ids, folder, transform):
        self.ids = ids
        self.folder = folder
        self.transform = transform
    def __len__(self):
        return len(self.ids)
    def __getitem__(self, idx):
        img_id = self.ids[idx]
        path = os.path.join(self.folder, f"{img_id}.jpg")
        img = Image.open(path).convert('RGB')
        img = self.transform(img)
        return img

def extract_timm_embeddings(model_name, img_size=224, batch_size=128, num_workers=8):
    tr_out = f"X_img_train_{model_name.replace('/', '_')}_{img_size}.npy"
    te_out = f"X_img_test_{model_name.replace('/', '_')}_{img_size}.npy"
    if os.path.exists(tr_out) and os.path.exists(te_out):
        print(f"[Skip] {model_name} exists: {tr_out}, {te_out}")
        return
    t0 = time.time()
    print(f"\n[Emb] {model_name} @ {img_size}", flush=True)
    train_df = pd.read_csv('train.csv')
    test_df = pd.read_csv('test.csv')
    train_ids = train_df['Id'].tolist()
    test_ids = test_df['Id'].tolist()

    model = timm.create_model(model_name, pretrained=True, num_classes=0, global_pool='avg')
    model.eval().to('cpu')

    cfg = resolve_data_config({}, model=model)
    cfg['input_size'] = (3, img_size, img_size)
    transform = create_transform(**cfg, is_training=False)

    train_ds = ImageDataset(train_ids, 'train', transform)
    test_ds = ImageDataset(test_ids, 'test', transform)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=False)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=False)

    with torch.no_grad():
        sample = next(iter(train_loader))[:1]
        emb_dim = model(sample).shape[1]
    print(f"Embedding dim: {emb_dim}")

    def run_loader(loader, n_items):
        X = np.zeros((n_items, emb_dim), dtype=np.float32)
        i0 = 0
        with torch.no_grad():
            for i, xb in enumerate(loader):
                feats = model(xb).cpu().numpy().astype(np.float32)
                X[i0:i0+feats.shape[0]] = feats
                i0 += feats.shape[0]
                if (i+1) % 20 == 0:
                    print(f"  Batches {i+1}/{math.ceil(n_items/loader.batch_size)} | rows {i0}/{n_items} | elapsed {time.time()-t0:.1f}s", flush=True)
        return X

    X_tr = run_loader(train_loader, len(train_ds))
    X_te = run_loader(test_loader, len(test_ds))
    np.save(tr_out, X_tr); np.save(te_out, X_te)
    print(f"Saved {tr_out} {X_tr.shape}, {te_out} {X_te.shape} | time {time.time()-t0:.1f}s")
    del model, X_tr, X_te, train_loader, test_loader, train_ds, test_ds
    gc.collect()

def extract_openclip_vitb16(img_size=224, batch_size=128, num_workers=8):
    tag = f"openclip_vit_b16_{img_size}"
    tr_out = f"X_img_train_{tag}.npy"
    te_out = f"X_img_test_{tag}.npy"
    if os.path.exists(tr_out) and os.path.exists(te_out):
        print(f"[Skip] OpenCLIP ViT-B/16 exists: {tr_out}, {te_out}")
        return
    ensure_openclip()
    import open_clip
    from torchvision import transforms as T
    t0 = time.time()
    print(f"\n[Emb] OpenCLIP ViT-B/16 @ {img_size}", flush=True)
    model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion2b_s34b_b88k', device='cpu')
    model.eval()
    preprocess = T.Compose([
        T.Resize(img_size, interpolation=T.InterpolationMode.BICUBIC),
        T.CenterCrop(img_size),
        T.ToTensor(),
        T.Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711)),
    ])

    class OpenClipDS(Dataset):
        def __init__(self, ids, folder):
            self.ids = ids; self.folder = folder
        def __len__(self): return len(self.ids)
        def __getitem__(self, idx):
            img = Image.open(os.path.join(self.folder, f"{self.ids[idx]}.jpg")).convert('RGB')
            return preprocess(img)

    train_df = pd.read_csv('train.csv'); test_df = pd.read_csv('test.csv')
    train_ids = train_df['Id'].tolist(); test_ids = test_df['Id'].tolist()
    train_loader = DataLoader(OpenClipDS(train_ids, 'train'), batch_size=batch_size, shuffle=False, num_workers=num_workers)
    test_loader = DataLoader(OpenClipDS(test_ids, 'test'), batch_size=batch_size, shuffle=False, num_workers=num_workers)

    with torch.no_grad():
        sample = next(iter(train_loader))[:1]
        emb_dim = model.encode_image(sample).shape[1]
    print(f"Embedding dim: {emb_dim}")

    def run_loader(loader, n_items):
        X = np.zeros((n_items, emb_dim), dtype=np.float32)
        i0 = 0
        with torch.no_grad():
            for i, xb in enumerate(loader):
                feats = model.encode_image(xb).float().cpu().numpy().astype(np.float32)
                X[i0:i0+feats.shape[0]] = feats
                i0 += feats.shape[0]
                if (i+1) % 20 == 0:
                    print(f"  Batches {i+1}/{math.ceil(n_items/loader.batch_size)} | rows {i0}/{n_items} | elapsed {time.time()-t0:.1f}s", flush=True)
        return X

    X_tr = run_loader(train_loader, len(train_ids))
    X_te = run_loader(test_loader, len(test_ids))
    np.save(tr_out, X_tr); np.save(te_out, X_te)
    print(f"Saved {tr_out} {X_tr.shape}, {te_out} {X_te.shape} | time {time.time()-t0:.1f}s")
    del model, X_tr, X_te, train_loader, test_loader
    gc.collect()

# Execute extractions
try:
    extract_openclip_vitb16(img_size=224, batch_size=128, num_workers=8)
except Exception as e:
    print(f"[Warn] OpenCLIP ViT-B/16 extraction failed: {e}")

try:
    extract_timm_embeddings('beit_base_patch16_224_in22k', img_size=224, batch_size=128, num_workers=8)
except Exception as e:
    print(f"[Warn] BEiT-Base extraction failed: {e}")

print('Extra embeddings extraction complete.')


[Emb] OpenCLIP ViT-B/16 @ 224


  Batches 20/70 | rows 2560/8920 | elapsed 127.7s


  Batches 40/70 | rows 5120/8920 | elapsed 248.6s


  Batches 60/70 | rows 7680/8920 | elapsed 370.1s


Saved X_img_train_openclip_vit_b16_224.npy (8920, 512), X_img_test_openclip_vit_b16_224.npy (992, 512) | time 477.3s



[Emb] beit_base_patch16_224_in22k @ 224


[Warn] BEiT-Base extraction failed: Unknown model (beit_base_patch16_224_in22k)
Extra embeddings extraction complete.
