In [2]:
# Export assets for Kaggle GPU pivot: package cache768 and small metadata bundle
import os, sys, tarfile, hashlib, math, time, shutil, json
from pathlib import Path

BASE = Path('.').resolve()
CACHE_DIR = BASE / 'cache768'
OUT_DIR = BASE / 'kaggle_export'
OUT_DIR.mkdir(parents=True, exist_ok=True)

def dir_size_bytes(path: Path) -> int:
    total = 0
    for p in path.rglob('*'):
        if p.is_file():
            try:
                total += p.stat().st_size
            except Exception:
                pass
    return total

def human(n):
    for u in ['B','KB','MB','GB','TB']:
        if n < 1024 or u=='TB':
            return f"{n:.2f} {u}"
        n /= 1024

def sha256_file(p: Path) -> str:
    h = hashlib.sha256()
    with p.open('rb') as f:
        for chunk in iter(lambda: f.read(1024*1024), b''):
            h.update(chunk)
    return h.hexdigest()

print('== Kaggle Export Start ==')
print('CWD:', BASE)
assert CACHE_DIR.exists(), f'Missing {CACHE_DIR}'

# 1) Report sizes
size_cache = dir_size_bytes(CACHE_DIR)
print('cache768 size:', human(size_cache))

# 2) Create a tar (uncompressed) to split safely in Kaggle UI
tar_path = OUT_DIR / 'cache768.tar'
if not tar_path.exists():
    t0 = time.time()
    with tarfile.open(tar_path, mode='w') as tar:
        tar.add(CACHE_DIR, arcname='cache768')
    print('Created', tar_path, 'size:', human(tar_path.stat().st_size), 'in', f'{time.time()-t0:.1f}s')
else:
    print('Exists:', tar_path, 'size:', human(tar_path.stat().st_size))

# 3) Split into ~1.9GB parts
PART_BYTES = 1_900_000_000  # ~1.9 GB per part
parts = []
with tar_path.open('rb') as fin:
    idx = 0
    while True:
        chunk = fin.read(PART_BYTES)
        if not chunk:
            break
        part_path = OUT_DIR / f'cache768.tar.part{idx:02d}'
        with part_path.open('wb') as fout:
            fout.write(chunk)
        parts.append(part_path)
        print('Wrote', part_path, 'size:', human(part_path.stat().st_size))
        idx += 1

# Optional: remove original tar to save space after splitting
try:
    tar_bytes = tar_path.stat().st_size
    if len(parts) >= 1 and all(p.exists() for p in parts):
        tar_path.unlink(missing_ok=True)
        print('Removed original tar to save space (', human(tar_bytes), ')')
except Exception as e:
    print('Could not remove tar:', e)

# 4) Compute checksums manifest
manifest = {
    'created_at': time.strftime('%Y-%m-%d %H:%M:%S'),
    'parts': []
}
for p in parts:
    h = sha256_file(p)
    manifest['parts'].append({'file': p.name, 'bytes': p.stat().st_size, 'sha256': h})
man_path = OUT_DIR / 'cache768_parts_manifest.json'
man_path.write_text(json.dumps(manifest, indent=2))
print('Wrote manifest:', man_path)

# 5) Small metadata bundle (upload as a separate small dataset or alongside parts)
small_files = [
    'folds.csv',
    'next24h_plan.ipynb',
    'competition_best_practices.md',
    'requirements.txt',
    'kaggle_train_tfefnv2l_768.ipynb',
    'kaggle_gpu_pivot_checklist.ipynb'
]
SMALL_OUT = OUT_DIR / 'aptos_kaggle_small_bundle'
SMALL_OUT.mkdir(parents=True, exist_ok=True)
readme = (
    'README.txt',
    'Kaggle GPU Pivot Instructions\n'
    '- Upload all cache768.tar.part** files together as one Kaggle Dataset (aptos-cache768).\n'
    '- In Kaggle Notebook, reassemble: cat cache768.tar.part* > cache768.tar; then: tar -xf cache768.tar -C /kaggle/working\n'
    '- CACHE_DIR = "/kaggle/working/cache768"\n'
    '- Add competition data as input; ensure torch.cuda.is_available() is True.\n'
    '- Open kaggle_train_tfefnv2l_768.ipynb and run: run_all_folds(); then build submissions (CDF5 and thresholds).\n'
    '- See kaggle_gpu_pivot_checklist.ipynb for step-by-step run/submit sequence.\n'
)
Path(SMALL_OUT / readme[0]).write_text(readme[1])
for fn in small_files:
    p = BASE / fn
    if p.exists():
        shutil.copy2(p, SMALL_OUT / p.name)
        print('Added to small bundle:', p.name)
    else:
        print('Missing (skipped):', p)

# Zip small bundle
small_zip = shutil.make_archive(str(OUT_DIR / 'aptos_kaggle_small_bundle'), 'zip', root_dir=SMALL_OUT)
print('Wrote small bundle zip:', small_zip)

print('== Export complete ==')

== Kaggle Export Start ==
CWD: /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204
cache768 size: 4.01 GB


Created /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204/kaggle_export/cache768.tar size: 4.02 GB in 4.8s


Wrote /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204/kaggle_export/cache768.tar.part00 size: 1.77 GB


Wrote /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204/kaggle_export/cache768.tar.part01 size: 1.77 GB


Wrote /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204/kaggle_export/cache768.tar.part02 size: 492.07 MB


Removed original tar to save space ( 4.02 GB )


Wrote manifest: /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204/kaggle_export/cache768_parts_manifest.json
Added to small bundle: folds.csv
Added to small bundle: next24h_plan.ipynb
Missing (skipped): /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204/competition_best_practices.md
Added to small bundle: requirements.txt
Added to small bundle: kaggle_train_tfefnv2l_768.ipynb
Added to small bundle: kaggle_gpu_pivot_checklist.ipynb
Wrote small bundle zip: /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204/kaggle_export/aptos_kaggle_small_bundle.zip
== Export complete ==


In [3]:
# Sync latest Kaggle notebooks into small bundle and rebuild zip
import os, shutil, zipfile
from pathlib import Path

root = Path('.').resolve()
bundle_dir = root / 'kaggle_export' / 'aptos_kaggle_small_bundle'
bundle_dir.mkdir(parents=True, exist_ok=True)

src_train_nb = root / 'kaggle_train_tfefnv2l_768.ipynb'
src_check_nb = root / 'kaggle_gpu_pivot_checklist.ipynb'
src_folds    = root / 'folds.csv'

assert src_train_nb.exists(), f'Missing {src_train_nb}'
assert src_check_nb.exists(), f'Missing {src_check_nb}'
assert src_folds.exists(), f'Missing {src_folds}'

# Copy updated notebooks and folds.csv into bundle
shutil.copy2(src_train_nb, bundle_dir / src_train_nb.name)
shutil.copy2(src_check_nb, bundle_dir / src_check_nb.name)
shutil.copy2(src_folds,    bundle_dir / src_folds.name)
print('Synced files into bundle:', [p.name for p in [src_train_nb, src_check_nb, src_folds]])

# Rebuild zip
zip_path = root / 'kaggle_export' / 'aptos_kaggle_small_bundle.zip'
if zip_path.exists():
    zip_path.unlink()

with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED) as zf:
    for p in ['README.txt', 'folds.csv', 'kaggle_train_tfefnv2l_768.ipynb', 'kaggle_gpu_pivot_checklist.ipynb', 'requirements.txt']:
        fp = bundle_dir / p
        if not fp.exists():
            print('Warning: missing in bundle:', fp)
            continue
        zf.write(fp, arcname=f'aptos_kaggle_small_bundle/{p}')
print('Rebuilt:', zip_path, 'size:', round(zip_path.stat().st_size/1024**2, 2), 'MB')

Synced files into bundle: ['kaggle_train_tfefnv2l_768.ipynb', 'kaggle_gpu_pivot_checklist.ipynb', 'folds.csv']
Rebuilt: /app/agent_run_states/aptos2019-blindness-detection-spray-20250912-181204/kaggle_export/aptos_kaggle_small_bundle.zip size: 0.05 MB
