In [None]:
# Kaggle GPU pivot: reassemble cache768, set paths, verify GPU
import os, sys, glob, subprocess, shutil, time
from pathlib import Path

IN_BASE = Path('/kaggle/input')
WK = Path('/kaggle/working')
WK.mkdir(parents=True, exist_ok=True)

# 1) Locate cache768.tar.part** files from added Dataset
part_paths = sorted([p for p in IN_BASE.rglob('cache768.tar.part*') if p.is_file()])
assert len(part_paths) >= 1, 'Add the aptos-cache768 dataset with cache768.tar.part** files to this notebook (Add Data).'
print('Found parts:', len(part_paths))
for p in part_paths[:5]:
    print('  ', p)

# 2) Reassemble into /kaggle/working/cache768.tar
tar_path = WK / 'cache768.tar'
if not tar_path.exists():
    with open(tar_path, 'wb') as fout:
        for p in part_paths:
            with open(p, 'rb') as fin:
                shutil.copyfileobj(fin, fout)
    print('Wrote', tar_path, 'size:', round(tar_path.stat().st_size / 1024**3, 2), 'GB')
else:
    print('Exists:', tar_path)

# 3) Extract tar into /kaggle/working
cache_dir = WK / 'cache768'
if not cache_dir.exists():
    import tarfile
    t0 = time.time()
    with tarfile.open(tar_path, mode='r') as tar:
        tar.extractall(path=WK)
    print('Extracted to', cache_dir, 'in', f'{time.time()-t0:.1f}s')
else:
    print('Cache dir already exists:', cache_dir)

assert cache_dir.exists(), 'cache768 directory missing after extract'
print('CACHE_DIR ready:', cache_dir)

# 4) Verify GPU availability
try:
    import torch
    print('torch:', torch.__version__, 'cuda:', getattr(torch.version, 'cuda', None))
    assert torch.cuda.is_available(), 'GPU not available. Enable GPU accelerator in Notebook Settings.'
    print('CUDA device count:', torch.cuda.device_count())
    print('GPU name:', torch.cuda.get_device_name(0))
except Exception as e:
    raise SystemExit(f'GPU check failed: {e}')

# 5) Paths for training
CACHE_DIR = str(cache_dir)
OUTPUT_DIR = str(WK)
print('Set CACHE_DIR=', CACHE_DIR)
print('Set OUTPUT_DIR=', OUTPUT_DIR)

# Next steps:
# - Add the competition dataset in Add Data (APTOS 2019), ensure /kaggle/input/aptos2019-blindness-detection has train/test csv if needed.
# - Copy/paste training pipeline from next24h_plan.ipynb: tf_efficientnetv2_l @768px, AMP on, batch_size 8-12.
# - Save OOF/test preds to /kaggle/working and build submission with CDF5 logic if desired.

# Kaggle GPU Pivot Checklist

1) Notebook settings
- Accelerator: GPU (T4/P100).
- Internet: Off.
- High-RAM: On (if available).

2) Add Data
- Competition: APTOS 2019 Blindness Detection.
- Dataset: aptos-cache768 (the one you uploaded with cache768.tar.part**).

3) Reassemble cache
- Run the first code cell in this notebook to locate parts, reassemble, and extract to /kaggle/working/cache768.
- Ensure `torch.cuda.is_available()` prints True and the GPU name.

4) Paths
- Set `CACHE_DIR = "/kaggle/working/cache768"`.
- Set `OUTPUT_DIR = "/kaggle/working"`.

5) Train (tf_efficientnetv2_l @768px)
- Library: timm + PyTorch AMP.
- Batch size: 8–12 (reduce if OOM).
- num_workers: 4–6, pin_memory=True, persistent_workers=True.
- Optim: AdamW, wd≈1e-5, cosine with warmup.
- Loss heads: train both regression (SmoothL1/Huber) and ordinal (cumulative BCE).
- Folds: Stratified 5-fold, 8–15 epochs; consider 2 seeds.
- Progressive resize optional: 640 → 768 (lower LR for the upsize).

6) Inference
- TTA: 4–8 (flips/rotations).
- Save OOF and test EVs per model/fold to /kaggle/working.
- Calibrate per model with fold-aware isotonic on OOF, apply to test.
- Blend EVs (simple average or weight by OOF QWK).

7) Submission
- Use your CDF5 postprocessing on test:
  - Base: 0.7*iso + 0.3*spline.
  - CDF-align alpha: 0.85 (map test to OOF quantiles).
  - Add 0.01 * rank nudge.
  - Tie-breaker: rank-average z-score of [l2xgb_te_ev, test_reg_preds, test_ev_b5_ordinal] (use what’s available).
  - Counts (V5): [178, 47, 86, 44, 12] (auto-adjust to M and clip class-4 10–15).
- Write submission.csv and submit from the notebook.

8) If OOM or slow
- Lower batch size first; keep AMP on.
- Use gradient accumulation (e.g., accum=2).
- Reduce TTA to 4.

9) Targets
- Aim OOF QWK > 0.92 before LB.
- If borderline on LB, add 1–2 more epochs at 768px or an additional strong backbone (e.g., resnet200d or seresnext101_32x8d) and re-blend.

10) Repro tips
- Save checkpoints and logs to /kaggle/working.
- Print fold times and progress.
- Verify submission.csv head/tail and class counts before submitting.

In [None]:
# CDF5 post-processing utility (alpha=0.85, V5 counts) for use after training on Kaggle
import numpy as np, pandas as pd
from pathlib import Path

def cdf5_build_submission(oof_ev_path, te_ev_path, ids_csv='test.csv', out_csv='submission.csv',
                          tie_paths=('l2xgb_te_ev.npy','test_reg_preds.npy','test_ev_b5_ordinal.npy'),
                          target_counts=(178,47,86,44,12), alpha=0.85):
    assert Path(oof_ev_path).exists() and Path(te_ev_path).exists(), 'Missing EV arrays'
    oof_ev = np.load(oof_ev_path).astype('float64').ravel()
    te_ev = np.load(te_ev_path).astype('float64').ravel()
    ids = pd.read_csv(ids_csv)['id_code'].values
    M = len(ids)
    assert te_ev.shape[0] == M, f'test len mismatch: {te_ev.shape[0]} vs {M}'

    # CDF alignment: map test EV distribution to OOF quantiles, then blend with raw (alpha to ref quantiles)
    ranks = te_ev.argsort().argsort() / max(1, len(te_ev)-1)
    ref_q = np.quantile(oof_ev, ranks, method='linear')
    s = (alpha * ref_q + (1.0 - alpha) * te_ev).astype('float64')
    # small monotonic rank nudge
    s = s + 0.01 * ranks

    # Tie-breaker: rank-avg z of available arrays
    arrs = []
    for p in tie_paths:
        if Path(p).exists():
            a = np.load(p).astype('float64').ravel()
            if a.shape[0] == M:
                mu = float(a.mean()); sd = float(a.std() + 1e-9)
                arrs.append((a - mu)/sd)
    tie = np.mean(np.stack(arrs, 1), 1) if len(arrs) >= 2 else te_ev

    # Counts adjustment with guard on class 4
    tgt = np.array(target_counts, int).copy()
    tgt[4] = int(min(max(tgt[4], 10), 15))
    for i in range(4):
        if tgt[i] < 1: tgt[i] = 1
    diff = int(tgt.sum() - M)
    prio = [2, 0, 3, 1]
    i = 0; guard = 20000
    while diff != 0 and guard > 0:
        j = prio[i % len(prio)]
        if diff > 0:
            if tgt[j] > 1: tgt[j] -= 1; diff -= 1
        else:
            tgt[j] += 1; diff += 1
        i += 1; guard -= 1

    # Assign by lexsort order
    order = np.lexsort((tie, s))
    c0,c1,c2,c3,c4 = tgt.tolist()
    cls = np.zeros(M, dtype=np.int64)
    cls[order[:c0]] = 0
    cls[order[c0:c0+c1]] = 1
    cls[order[c0+c1:c0+c1+c2]] = 2
    cls[order[c0+c1+c2:c0+c1+c2+c3]] = 3
    cls[order[c0+c1+c2+c3:]] = 4

    sub = pd.DataFrame({'id_code': ids, 'diagnosis': cls})
    sub.to_csv(out_csv, index=False)
    print('Wrote', out_csv, 'counts:', sub['diagnosis'].value_counts().sort_index().to_dict())
    return sub

# Example usage on Kaggle after saving EV arrays:
# cdf5_build_submission('/kaggle/working/oof_ev_tfefnv2l_768.npy',
#                       '/kaggle/working/test_ev_tfefnv2l_768.npy',
#                       ids_csv='/kaggle/input/aptos2019-blindness-detection/test.csv',
#                       out_csv='/kaggle/working/submission.csv')

In [None]:
# Quick visual sanity-check: verify circle-cropped cache images (train/test)
import os, random, glob, cv2
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

WK = Path('/kaggle/working')
CACHE_DIR = WK / 'cache768'
assert CACHE_DIR.exists(), 'Run reassembly first to create /kaggle/working/cache768'

def show_samples(split='train', n=8):
    paths = sorted(glob.glob(str(CACHE_DIR / split / '*.png')))
    assert len(paths) > 0, f'No images found in {CACHE_DIR/split}'
    sel = random.sample(paths, min(n, len(paths)))
    cols = 4
    rows = int(np.ceil(len(sel) / cols))
    plt.figure(figsize=(3*cols, 3*rows))
    for i, p in enumerate(sel):
        img = cv2.imread(p, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if img is not None else None
        ax = plt.subplot(rows, cols, i+1); ax.axis('off')
        ax.set_title(Path(p).name[:12])
        if img is not None:
            plt.imshow(img)
        else:
            ax.text(0.5, 0.5, 'read error', ha='center', va='center')
    plt.tight_layout(); plt.show()

def border_black_ratio(img, border_px=16, thr=10):
    # fraction of near-black pixels in a border frame of width border_px
    if img.ndim == 3:
        g = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    else:
        g = img
    h, w = g.shape
    mask = np.zeros_like(g, dtype=bool)
    mask[:border_px, :] = True; mask[-border_px:, :] = True
    mask[:, :border_px] = True; mask[:, -border_px:] = True
    border = g[mask]
    return float((border < thr).mean()) if border.size else 0.0

def audit_black_borders(split='train', k=32, border_px=16, thr=10):
    paths = sorted(glob.glob(str(CACHE_DIR / split / '*.png')))
    sel = random.sample(paths, min(k, len(paths)))
    ratios = []
    for p in sel:
        img = cv2.imread(p, cv2.IMREAD_COLOR)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        ratios.append(border_black_ratio(img, border_px=border_px, thr=thr))
    ratios = np.array(ratios) if ratios else np.array([0.0])
    print(f'{split}: mean border-black ratio={ratios.mean():.4f}, max={ratios.max():.4f}, samples={len(ratios)}')
    return ratios

print('Showing random train samples...')
show_samples('train', n=8)
print('Showing random test samples...')
show_samples('test', n=8)
print('Auditing border black ratios (lower is better; expect small if circle-cropped)')
audit_black_borders('train', k=48, border_px=16, thr=10)
audit_black_borders('test', k=48, border_px=16, thr=10)

# Quick run sequence (after reassembly)

1) Open your training notebook (kaggle_train_tfefnv2l_768.ipynb) in the same Kaggle session, add the same Datasets, and run:
- Verify GPU in Cell 1 and confirm CACHE_DIR=/kaggle/working/cache768
- Then execute:
```python
fold_scores, oof_all, y_all, te_mean = run_all_folds()
```

2) Build submission (CDF5, alpha=0.85, auto-adjust counts to test size, rankavg-z tie-break):
```python
build_and_save_submission_from_artifacts(
    OUTPUT_DIR,
    alpha=0.85,
    target_counts=(178,47,86,44,12),
    tie_break='rankavgz',
    out_name='submission.csv'
)
```

3) If LB < CV noticeably, re-run step 2 with alpha=0.80:
```python
build_and_save_submission_from_artifacts(OUTPUT_DIR, alpha=0.80, target_counts=(178,47,86,44,12), tie_break='rankavgz')
```

Tips
- If DataLoader stalls, set persistent_workers=False and/or prefetch_factor=1.
- If OOM, set batch_size=6 and grad_accum=3 in CFG, then re-run the fold.
- Ensure competition dataset slug is present: /kaggle/input/aptos2019-blindness-detection (or aptos-2019-blindness-detection).
- Optional: add a timm-pretrained-models dataset and set hub dir (already guarded in the training notebook).

## Optional: Threshold-based submission variant

- After training in `kaggle_train_tfefnv2l_768.ipynb`, also build a threshold-optimized submission based on OOF EV.
- In that notebook, run:
```python
sub_thr, edges, oof_qwk_thr = build_and_save_submission_thresholds(OUTPUT_DIR, out_name='submission_thr.csv')
```
- Submit both:
  - CDF5: `submission.csv` with alpha in [0.80, 0.85] (start 0.85).
  - Thresholds: `submission_thr.csv`.
- Pick the better LB score. If LB underperforms OOF by >0.01–0.015, rebuild CDF5 with alpha=0.80.