**Prerequisites**
- Git repository that contains this project (push your local changes somewhere accessible).
- Hugging Face access token with permission to pull SAM3 weights.
- Kaggle API token (kaggle.json) if you want the notebook to download the fruits dataset automatically, otherwise mount Google Drive and point to an existing copy of `data/fruits`.
- Colab runtime set to GPU (Runtime → Change runtime type → Hardware accelerator → GPU).

In [None]:
!nvidia-smi

In [None]:
#@title Runtime configuration
REPO_URL = "https://github.com/<your-account>/readme.git"  #@param {type:"string"}
PROJECT_DIR = "/content/readme"  #@param {type:"string"}
MOUNT_DRIVE = False  #@param {type:"boolean"}
DRIVE_DATASET_PATH = "/content/drive/MyDrive/datasets/fruits"  #@param {type:"string"}
DOWNLOAD_WITH_KAGGLE = True  #@param {type:"boolean"}
TRAIN_DEVICE = "cuda"  #@param ["cuda", "cpu"]
CALIBRATION_LIMIT = 0  #@param {type:"integer"}
EVAL_LIMIT = 0  #@param {type:"integer"}
PIPELINES = ["p2", "p3"]
assert REPO_URL, "Set REPO_URL to your Git repository URL."

In [None]:
#@title Enter authentication tokens (input hidden after execution)
import getpass
HF_TOKEN = getpass.getpass("Hugging Face token (leave blank to skip): " ).strip()
print("Token captured?", bool(HF_TOKEN))

In [None]:
#@title Optional: mount Google Drive
if MOUNT_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')
else:
    print('Skipping Google Drive mount.')

In [None]:
#@title Clone (or re-clone) the repository
import pathlib
import shutil
project_path = pathlib.Path(PROJECT_DIR)
if project_path.exists():
    print(f'Removing existing directory: {project_path}')
    shutil.rmtree(project_path)
!git clone {REPO_URL} {PROJECT_DIR}
%cd {PROJECT_DIR}

In [None]:
#@title Install Python dependencies (includes SAM3 + Triton)
%pip install -q -r requirements.txt
%pip install -q -e external/sam3[train]
%pip install -q triton==2.3.0

In [None]:
#@title Authenticate with Hugging Face
if HF_TOKEN:
    from huggingface_hub import login
    login(token=HF_TOKEN, add_to_git_credential=False)
else:
    print('HF token not provided; assuming checkpoints already cached.')

In [None]:
#@title Prepare the fruits dataset
import os
import zipfile
from pathlib import Path
import shutil
project_path = Path(PROJECT_DIR)
data_root = project_path / 'data' / 'fruits'
data_root.mkdir(parents=True, exist_ok=True)
if DOWNLOAD_WITH_KAGGLE:
    %pip install -q kaggle
    kaggle_dir = Path.home() / '.kaggle'
    kaggle_dir.mkdir(exist_ok=True)
    cred_path = kaggle_dir / 'kaggle.json'
    if not cred_path.exists():
        from google.colab import files
        print('Upload kaggle.json (Kaggle API token) when prompted.')
        uploaded = files.upload()
        cred_name = next(iter(uploaded))
        cred_path.write_bytes(uploaded[cred_name])
        !chmod 600 /root/.kaggle/kaggle.json
    !kaggle datasets download afsananadia/fruits-images-dataset-object-detection -p {data_root} -o
    zip_path = data_root / 'fruits-images-dataset-object-detection.zip'
    if zip_path.exists():
        with zipfile.ZipFile(zip_path, 'r') as archive:
            archive.extractall(data_root)
else:
    source_path = Path(DRIVE_DATASET_PATH)
    if source_path.exists():
        print(f'Copying dataset from {source_path} ...')
        if data_root.resolve() != source_path.resolve():
            shutil.copytree(source_path, data_root, dirs_exist_ok=True)
    else:
        print('Drive dataset path missing; ensure data/fruits is populated manually.')

In [None]:
#@title Build train/val/test splits (idempotent)
import pathlib
split_index = pathlib.Path(PROJECT_DIR) / 'data' / 'fruits' / 'splits.json'
if split_index.exists():
    print(f'Splits already exist at {split_index}')
else:
    !python scripts/split_data.py data/fruits --train-ratio 0.7 --val-ratio 0.15

In [None]:
#@title Train + evaluate pipelines
import os
import shlex
import subprocess
cal_limit = CALIBRATION_LIMIT if CALIBRATION_LIMIT > 0 else None
eval_limit = EVAL_LIMIT if EVAL_LIMIT > 0 else None
env = os.environ.copy()
env['PYTHONPATH'] = PROJECT_DIR
def run_cmd(cmd: str):
    print(f'\n>>> {cmd}')
    subprocess.run(shlex.split(cmd), cwd=PROJECT_DIR, env=env, check=True)
for pipeline in PIPELINES:
    train_cmd = f'python scripts/train.py --pipeline {pipeline} --device {TRAIN_DEVICE}'
    if cal_limit:
        train_cmd += f' --limit {cal_limit}'
    run_cmd(train_cmd)
    eval_cmd = f'python scripts/evaluate.py --pipeline {pipeline} --device {TRAIN_DEVICE} --split val'
    if eval_limit:
        eval_cmd += f' --limit {eval_limit}'
    run_cmd(eval_cmd)

In [None]:
#@title Inspect generated result artifacts
from pathlib import Path
results_dir = Path(PROJECT_DIR) / 'results'
if not results_dir.exists():
    print('No results directory yet.')
else:
    for artifact in sorted(results_dir.rglob('*.json')):
        rel_path = artifact.relative_to(Path(PROJECT_DIR))
        print(rel_path)