In [None]:
from pathlib import Path
import os
import glob


def is_repo_root(path: Path) -> bool:
    return (
        (path / 'requirements.txt').exists()
        and (path / 'configs').exists()
        and (path / 'trainers').exists()
    )


def find_repo_root() -> Path:
    # 1) Current directory and its parents (handles running inside notebooks/)
    for candidate in [Path.cwd(), *Path.cwd().parents]:
        if is_repo_root(candidate):
            return candidate

    # 2) Common runtime roots
    search_patterns = [
        '/content/**/requirements.txt',
        '/kaggle/working/**/requirements.txt',
        '/workspace/**/requirements.txt',
    ]
    for pattern in search_patterns:
        for req in glob.glob(pattern, recursive=True):
            candidate = Path(req).parent
            if is_repo_root(candidate):
                return candidate

    raise FileNotFoundError(
        'Repo root not found. Move to repository root, or clone/upload full repo '        '(requirements.txt + configs + trainers).'
    )


repo_root = find_repo_root()
os.chdir(repo_root)

print('repo_root:', repo_root)
print('cwd:', Path.cwd())
print('has_requirements:', Path('requirements.txt').exists())
print('has_configs:', Path('configs').exists())
print('has_trainers:', Path('trainers').exists())
print('top_entries:', sorted([p.name for p in Path.cwd().iterdir()])[:20])


In [None]:
from pathlib import Path
import subprocess
import sys
import shlex


def run(cmd):
    print('$', ' '.join(shlex.quote(x) for x in cmd))
    proc = subprocess.run(cmd, text=True, capture_output=True)
    if proc.stdout:
        print(proc.stdout)
    if proc.returncode != 0:
        if proc.stderr:
            print(proc.stderr)
        raise RuntimeError(f"Command failed ({proc.returncode}): {' '.join(cmd)}")


if not Path('requirements.txt').exists():
    raise FileNotFoundError(
        f"requirements.txt not found in cwd={Path.cwd()}. Run cell 1 first or fix repo path."
    )

run([sys.executable, '-m', 'pip', 'install', '-U', 'pip'])
run([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])


In [None]:
import torch

print('torch:', torch.__version__)
print('cuda_available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('cuda_device_count:', torch.cuda.device_count())
    print('cuda_device_0:', torch.cuda.get_device_name(0))


In [None]:
import subprocess
import sys

cmd = [sys.executable, '-m', 'trainers.train_swinmae_ssl', '--config', 'configs/swinmae_ssl.yaml']
print('$', ' '.join(cmd))
result = subprocess.run(cmd)
if result.returncode != 0:
    raise RuntimeError(f"Training failed with exit code {result.returncode}")


In [None]:
from pathlib import Path

checkpoint_path = Path('checkpoints/swinmae_ssl.pt')
print('checkpoint_exists:', checkpoint_path.exists(), checkpoint_path)
assert checkpoint_path.exists(), f'Missing checkpoint: {checkpoint_path}'
print('checkpoint_size_bytes:', checkpoint_path.stat().st_size)
