In [9]:
from worktable.modules import NIFTIDataset
from worktable.utils import *
from torchvision import transforms
from torch.utils.data import DataLoader

## NIFTIDataset Basic Example

### Create a NIFTIDataset
Specify the base_dir folder where all the data is stored. The individual paths to images and labels should be passed in as lists or alternatively specified in a CSV file located inside base_dir.

In [2]:
dataset = NIFTIDataset(base_dir="/home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small",
                       images="images", # name of images column in data.csv
                       labels="labels", # name of labels column in data.csv
                       file_name_or_dataframe="/home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small/data.csv"
                       )

### Calculate dataset statistics

In [3]:
dataset.calculate_statistics(percentiles=[10, 50, 90])
transformations = [transforms.CenterCrop(10), transforms.ToTensor()]
dataset.attach_transformations(transformations, train=False)
print(dataset.get_profile()) # print the profile to see our dataset information

{'base_dir': '/home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small', 'images': ['./1/data.nii.gz', './2/data.nii.gz', './3/data.nii.gz'], 'labels': ['./1/label.nii.gz', './2/label.nii.gz', './3/label.nii.gz'], 'get_item_as_dict': False, 'get_item_keys': {'image_key': None, 'label_key': None}, 'file_path': 'data.csv', 'statistics': {'foreground_threshold': 0, 'sampling_interval': 1, 'image_statistics': [{'image': './1/data.nii.gz', 'label': './1/label.nii.gz', 'image_shape': [512, 512, 96], 'label_shape': [512, 512, 96], 'max': 3071.0, 'min': -1024.0, 'mean': -247.43211920529802, 'std': 222.7260820915533, 'percentile': [10, 50, 90], 'percentile_values': [-567.1, -230.0, 14.0], 'median': -230.0}, {'image': './2/data.nii.gz', 'label': './2/label.nii.gz', 'image_shape': [512, 512, 80], 'label_shape': [512, 512, 80], 'max': 1453.0, 'min': -1024.0, 'mean': -68.48429188829788, 'std': 173.99407963090667, 'percentile': [10, 50, 90], 'percentile_values': [-333.89999999999986,

### Save dataset as a Worktable dataset

In [4]:
dataset.save(name="first_dataset")

Profile saved at:  /home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small/.worktable


### Create a new copy/version of the dataset on disk
The new dataset will have all the data properties, transformations, and settings of the original. You can give it a new name.

In [5]:
dataset.create_new_version(new_base_dir="/home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small_2",
                          name="second_dataset")

Profile saved at:  /home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small_2/.worktable
New dataset version has been created at:  /home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small_2


### Load the new version and apply preprocessing to it, and save it

In [7]:
# Note: loaded file path must be directly to a .worktable directory or a directory that contains a .worktable directory
dataset_new = NIFTIDataset()
dataset_new.load("/home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small_2")
preprocessing = [HistogramClipping(percent=True), CenterCrop3D(512, 512, 10)]
dataset_new.apply_changes(preprocessing=preprocessing)
dataset_new.save(name="second_dataset") # save the changes we applied and keep the same name when created

Preprocessing complete.
Profile saved at:  /home/kshen3778/Desktop/Projects/workbench_notebooks/data/Lung_GTV_small_2/.worktable


### Create a Pytorch dataloader from a Worktable dataset

In [11]:
loader = DataLoader(dataset_new, batch_size=1)