# Data loading pipeline examples

The purpose of this notebook is to illustrate reading Nifti files and test speed of different methods.

In [1]:
%matplotlib inline

import os
import sys
from glob import glob
import tempfile

import numpy as np
import nibabel as nib


import torch
from torch.utils.data import DataLoader
from torch.multiprocessing import Pool, Process, set_start_method
try:
     set_start_method('spawn')
except RuntimeError:
    pass

import monai
from monai.data import NiftiDataset
from monai.transforms import (Compose, AddChannel, ScaleIntensity, ToTensor, 
                              RandUniformPatch, Rotate, RandAffine)

monai.config.print_config()

MONAI version: 0.1a1.dev8+6.gb3c5761.dirty
Python version: 3.6.9 |Anaconda, Inc.| (default, Jul 30 2019, 19:07:31)  [GCC 7.3.0]
Numpy version: 1.18.1
Pytorch version: 1.4.0
Ignite version: 0.3.0


### 0. Preparing input data (nifti images)

Create a number of test Nifti files, 3d single channel images with spatial size (256, 256, 256) voxels.

In [2]:
tempdir = tempfile.mkdtemp()

for i in range(5):
    im, seg = monai.data.synthetic.create_test_image_3d(256,256,256)
    
    n = nib.Nifti1Image(im, np.eye(4))
    nib.save(n, os.path.join(tempdir, 'im%i.nii.gz'%i))
    
    n = nib.Nifti1Image(seg, np.eye(4))
    nib.save(n, os.path.join(tempdir, 'seg%i.nii.gz'%i))

In [3]:
# prepare list of image names and segmentation names
images = sorted(glob(os.path.join(tempdir,'im*.nii.gz')))
segs = sorted(glob(os.path.join(tempdir,'seg*.nii.gz')))

### 1. Test image loading with minimal preprocessing

In [4]:
imtrans = Compose([
    AddChannel(),
    ToTensor()
])    

segtrans = Compose([
    AddChannel(),
    ToTensor()
])    
    
ds = NiftiDataset(images, segs, transform=imtrans, seg_transform=segtrans)
loader = DataLoader(ds, batch_size=3, num_workers=8)

im, seg = monai.utils.misc.first(loader)
print(im.shape, seg.shape)

torch.Size([3, 1, 256, 256, 256]) torch.Size([3, 1, 256, 256, 256])


In [5]:
%timeit data = next(iter(loader))

3.73 s ± 111 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### 2. Test image-patch loading with CPU multi-processing:

- rotate (256, 256, 256)-voxel in the plane axes=(1, 2)
- extract random (64, 64, 64) patches
- implemented in MONAI using ` scipy.ndimage.rotate`

In [6]:
images = sorted(glob(os.path.join(tempdir,'im*.nii.gz')))
segs = sorted(glob(os.path.join(tempdir,'seg*.nii.gz')))

imtrans = Compose([
    ScaleIntensity(),
    AddChannel(),
    Rotate(angle=45.),
    RandUniformPatch((64, 64, 64)),
    ToTensor()
])    

segtrans = Compose([
    AddChannel(),
    Rotate(angle=45.),
    RandUniformPatch((64, 64, 64)),
    ToTensor()
])    
    
ds = NiftiDataset(images, segs, transform=imtrans, seg_transform=segtrans)
loader = DataLoader(ds, batch_size=3, num_workers=8, pin_memory=torch.cuda.is_available())

im, seg = monai.utils.misc.first(loader)
print(im.shape, seg.shape)

torch.Size([3, 1, 64, 64, 64]) torch.Size([3, 1, 64, 64, 64])


In [7]:
%timeit -n 3 data = next(iter(loader))

10.1 s ± 83.1 ms per loop (mean ± std. dev. of 7 runs, 3 loops each)


(the above results were based on a 2.9 GHz 6-Core Intel Core i9)

### 3. Test image-patch loading with preprocessing on GPU:

- random rotate (256, 256, 256)-voxel in the plane axes=(1, 2)
- extract random (64, 64, 64) patches
- implemented in MONAI using native pytorch resampling

In [8]:
images = sorted(glob(os.path.join(tempdir,'im*.nii.gz')))
segs = sorted(glob(os.path.join(tempdir,'seg*.nii.gz')))

# same parameter with different interpolation mode for image and segmentation
rand_affine_img = RandAffine(prob=1.0, rotate_range=np.pi/4, translate_range=(96, 96, 96),
                             spatial_size=(64, 64, 64), mode='bilinear',
                             as_tensor_output=True, device=torch.device('cuda:0'))
rand_affine_seg = RandAffine(prob=1.0, rotate_range=np.pi/4, translate_range=(96, 96, 96),
                             spatial_size=(64, 64, 64), mode='nearest',
                             as_tensor_output=True, device=torch.device('cuda:0'))
    
imtrans = Compose([
    ScaleIntensity(),
    AddChannel(),
    rand_affine_img,
    ToTensor()
])    

segtrans = Compose([
    AddChannel(),
    rand_affine_seg,
    ToTensor()
])    
    
ds = NiftiDataset(images, segs, transform=imtrans, seg_transform=segtrans)
loader = DataLoader(ds, batch_size=3, num_workers=0)

im, seg = monai.utils.misc.first(loader)

print(im.shape, seg.shape)

torch.Size([3, 1, 64, 64, 64]) torch.Size([3, 1, 64, 64, 64])


In [9]:
%timeit -n 3 data = next(iter(loader))

2.05 s ± 668 µs per loop (mean ± std. dev. of 7 runs, 3 loops each)


In [10]:
print(torch.cuda.get_device_name(0))
print(torch.cuda.memory_summary(0, abbreviated=True))

TITAN Xp COLLECTORS EDITION
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    6144 KB |  157696 KB |   17250 MB |   17244 MB |
|---------------------------------------------------------------------------|
| Active memory         |    6144 KB |  157696 KB |   17250 MB |   17244 MB |
|---------------------------------------------------------------------------|
| GPU reserved memory   |  225280 KB |  225280 KB |  225280 KB |       0 B  |
|---------------------------------------------------------------------------|
| Non-releasable memory |   14336 KB |   77823 KB |   11789 MB |   11775 MB |
|-----------------------------------

In [11]:
!rm -rf {tempdir}