# BraTS Data Ingestion

Ingests BraTS sample data into a RadiObject. Run this **once** before notebooks 01-04.

- Check if RadiObject exists (skip if so)
- Load NIfTIs, split 4D into modalities
- Create RadiObject with subject metadata

**Configuration:** Edit `config.py` to change URI (S3 or local).

In [1]:
import shutil
import tempfile
from pathlib import Path

import nibabel as nib
import numpy as np
import pandas as pd

from radiobject.ctx import (
    CompressionConfig,
    Compressor,
    S3Config,
    SliceOrientation,
    TileConfig,
    configure,
)
from radiobject.data import S3_REGION, get_brats_nifti_path, get_brats_uri
from radiobject.radi_object import RadiObject

BRATS_URI = get_brats_uri()
print(f"Target URI: {BRATS_URI}")

Target URI: /Users/samueldsouza/Desktop/Code/RadiObject/data/brats_radiobject


In [2]:
# Configure S3 if using S3 URI
if BRATS_URI.startswith("s3://"):
    configure(s3=S3Config(region=S3_REGION))
    print(f"S3 configured for region: {S3_REGION}")

# Configure TileDB storage
# ---------------------------------------------------------------------------
# TILE ORIENTATION: Choose based on your PRIMARY use case
#
# | Orientation | Best For                                      |
# |-------------|-----------------------------------------------|
# | AXIAL       | Slice-by-slice viewing (radiologist review)   |
# | SAGITTAL    | Sagittal plane analysis                       |
# | CORONAL     | Coronal plane analysis                        |
# | ISOTROPIC   | 3D ROI extraction (ML training, tumor crops)  |
#
# This choice is IMMUTABLE after ingestion. For most neuroimaging
# workflows, AXIAL is optimal for viewing. For ML training pipelines
# that extract 3D patches, consider ISOTROPIC.
# ---------------------------------------------------------------------------
configure(
    tile=TileConfig(orientation=SliceOrientation.AXIAL),
    compression=CompressionConfig(algorithm=Compressor.ZSTD, level=3),
)

In [3]:
def uri_exists(uri: str) -> bool:
    """Check if RadiObject exists at URI."""
    try:
        radi = RadiObject(uri)
        _ = radi.collection_names  # Force validation by accessing group metadata
        return True
    except Exception:
        return False


if uri_exists(BRATS_URI):
    print(f"RadiObject already exists at {BRATS_URI}")
    print("Skipping ingestion. Delete the URI to re-ingest.")
    SKIP_INGESTION = True
else:
    print(f"No RadiObject found at {BRATS_URI}")
    print("Proceeding with ingestion...")
    SKIP_INGESTION = False

No RadiObject found at /Users/samueldsouza/Desktop/Code/RadiObject/data/brats_radiobject
Proceeding with ingestion...


In [4]:
if not SKIP_INGESTION:
    # Get BraTS NIfTI data (downloads if not cached)
    NIFTI_DIR = get_brats_nifti_path()

    # Load manifest
    import json

    manifest_path = NIFTI_DIR / "manifest.json"
    with open(manifest_path) as f:
        manifest = json.load(f)

    print(f"Found {len(manifest)} BraTS samples")

Found 484 BraTS samples


In [5]:
if not SKIP_INGESTION:
    N_SUBJECTS = 5
    MODALITIES = ["FLAIR", "T1w", "T1gd", "T2w"]

    subjects = manifest[:N_SUBJECTS]
    subject_ids = [s["sample_id"] for s in subjects]

    # Create temp directory for split NIfTIs
    TEMP_DIR = tempfile.mkdtemp(prefix="brats_ingest_")
    split_dir = Path(TEMP_DIR) / "split_niftis"
    split_dir.mkdir(exist_ok=True)

    nifti_list = []
    for entry in subjects:
        img = nib.load(NIFTI_DIR / entry["image_path"])
        data_4d = np.asarray(img.dataobj, dtype=np.float32)

        for mod_idx, modality in enumerate(MODALITIES):
            nifti_path = split_dir / f"{entry['sample_id']}_{modality}.nii.gz"
            nib.save(nib.Nifti1Image(data_4d[..., mod_idx], img.affine), nifti_path)
            nifti_list.append((nifti_path, entry["sample_id"]))

    print(f"Prepared {len(nifti_list)} NIfTI files")
    print(f"Subjects: {subject_ids}")
    print(f"Modalities: {MODALITIES}")

Prepared 20 NIfTI files
Subjects: ['BRATS_001', 'BRATS_002', 'BRATS_003', 'BRATS_004', 'BRATS_005']
Modalities: ['FLAIR', 'T1w', 'T1gd', 'T2w']


In [6]:
if not SKIP_INGESTION:
    # Create subject-level metadata with reproducible random values
    np.random.seed(42)
    obs_meta_df = pd.DataFrame(
        {
            "obs_subject_id": subject_ids,
            "obs_id": subject_ids,
            "dataset": "BraTS",
            "tumor_grade": np.random.choice(["LGG", "HGG"], N_SUBJECTS),
            "age": np.random.randint(30, 70, N_SUBJECTS),
        }
    )
    print("Subject metadata:")
    display(obs_meta_df)

Subject metadata:


Unnamed: 0,obs_subject_id,obs_id,dataset,tumor_grade,age
0,BRATS_001,BRATS_001,BraTS,LGG,37
1,BRATS_002,BRATS_002,BraTS,HGG,50
2,BRATS_003,BRATS_003,BraTS,LGG,68
3,BRATS_004,BRATS_004,BraTS,LGG,48
4,BRATS_005,BRATS_005,BraTS,LGG,52


In [7]:
if not SKIP_INGESTION:
    print(f"Creating RadiObject at: {BRATS_URI}")

    radi = RadiObject.from_niftis(
        uri=BRATS_URI,
        niftis=nifti_list,
        obs_meta=obs_meta_df,
    )

    print(f"\nCreated: {radi}")

Creating RadiObject at: /Users/samueldsouza/Desktop/Code/RadiObject/data/brats_radiobject



Created: RadiObject(5 subjects, 4 collections: [T2w, T1gd, T1w, FLAIR])


In [8]:
if not SKIP_INGESTION:
    radi.validate()
    print("Validation passed")

    # Display summary
    print(f"\nCollections: {radi.collection_names}")
    print(f"Subjects: {radi.obs_subject_ids}")
    print("\nobs_meta:")
    display(radi.obs_meta.read())

Validation passed

Collections: ('T2w', 'T1gd', 'T1w', 'FLAIR')
Subjects: ['BRATS_001', 'BRATS_002', 'BRATS_003', 'BRATS_004', 'BRATS_005']

obs_meta:


Unnamed: 0,obs_subject_id,obs_id,dataset,tumor_grade,age
0,BRATS_001,BRATS_001,BraTS,LGG,37
1,BRATS_002,BRATS_002,BraTS,HGG,50
2,BRATS_003,BRATS_003,BraTS,LGG,68
3,BRATS_004,BRATS_004,BraTS,LGG,48
4,BRATS_005,BRATS_005,BraTS,LGG,52


In [9]:
if not SKIP_INGESTION:
    shutil.rmtree(TEMP_DIR)
    print(f"Cleaned up temp directory: {TEMP_DIR}")

Cleaned up temp directory: /var/folders/dj/0_0s64j55hn0gk7rrvj09zf80000gn/T/brats_ingest_0vz6ioe5


In [10]:
# Load from URI (works whether we just created it or it already existed)
radi = RadiObject(BRATS_URI)

print(f"Loaded: {radi}")
print(f"Collections: {radi.collection_names}")
print(f"Subjects: {len(radi)}")

# Quick data check
vol = radi.FLAIR.iloc[0]
print(f"\nSample volume: {vol}")
print(f"Axial slice shape: {vol.axial(z=77).shape}")

Loaded: RadiObject(5 subjects, 4 collections: [T2w, T1gd, T1w, FLAIR])
Collections: ('T2w', 'T1gd', 'T1w', 'FLAIR')
Subjects: 5

Sample volume: Volume(shape=240x240x155, dtype=float32, obs_id='BRATS_001_FLAIR')
Axial slice shape: (240, 240)


## Next Steps

The RadiObject is now available at `BRATS_URI`. Proceed to the tutorial notebooks:

- [01_radi_object.ipynb](./01_radi_object.ipynb) - RadiObject operations
- [02_volume_collection.ipynb](./02_volume_collection.ipynb) - Working with volume groups
- [03_volume.ipynb](./03_volume.ipynb) - Single volume operations
- [04_storage_configuration.ipynb](./04_storage_configuration.ipynb) - Tile orientation and compression