# Generate dataset 9_112_224

### prepares dataset with (9, 112, 224)
Synthetic dataset was produced at (9, 64, 64). The images are interpolated back to (9, 112, 224).
We are partly working and interpolating at (9, 112, 224) because the 2D UMAP projections proved to be more human understandable.

In [1]:
import sys
import os
import h5py
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.functional import interpolate

from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Allocate to GPU if available
# Start relative path at the /src/ folder

gpu = torch.cuda.is_available()
if gpu:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
    
if os.path.basename(os.getcwd()) != 'BDI-imaging':  # change dir to ROOT
    os.chdir("../../")
sys.path.insert(0, "src")

print('============ Demo information ============')
print('- Working directory: /{}'.format(os.getcwd().split('/')[-1]))
print('- Cuda device: {}'.format(device))
print('==========================================')

- Working directory: /src
- Cuda device: cpu


In [3]:
# load batchers for each dataset
from manuscript.Train.restricted.train_dataset import F2305Dataset, spineNet_split
from manuscript.Train.restricted.test_dataset import A2209Dataset
from manuscript.Train.restricted.synthetic_dataset import SynthDataset
from manuscript.Train.batchers import F2305Batcher, A2209Batcher, SynthBatcher

In [4]:
# Hyper parameters
image_shape = (9, 112, 224)
batch_size = 1

# Train set

In [5]:
image_list = spineNet_split()['train']
print("Training using F2305...")
traindata_f = F2305Dataset(shape=image_shape)
traindata_f.prepare(label_by="reader2", types=('T1',), subset_fraction=1.0, image_list=image_list)

vu_loader_f = DataLoader(F2305Batcher(traindata_f.dataset, traindata_f.scan_path), batch_size=batch_size,
                                    shuffle=True, num_workers=1)

Training using F2305...


In [6]:
# Initailize array with images and the one with labels
train_dataset = np.zeros([len(vu_loader_f), 9, 112, 224])
train_region = np.zeros([len(vu_loader_f), 1])

for index, sample in tqdm(enumerate(vu_loader_f)):
    im = sample['im']
    region = sample['region']
    train_dataset[index,:,:,:] = im
    train_region[index,:] = region

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




In [7]:
# Save training set as a hdf5 file format so it can be sliced without having to load everything in memory
h5f = h5py.File('./manuscript/Diversity/diversity_saves/train_set.h5', 'w')
h5f.create_dataset('images', data=train_dataset)
h5f.create_dataset('regions', data=train_region)
h5f.close()

# Val set

In [14]:
image_list = spineNet_split()['val'] + spineNet_split()['test']
print("Val using F2305...")
valdata = F2305Dataset(shape=image_shape)
valdata.prepare(label_by="reader2", types=('T1',), subset_fraction=1.0, image_list=image_list)

vu_loader_val = DataLoader(F2305Batcher(valdata.dataset, valdata.scan_path), batch_size=batch_size,
                                    shuffle=True, num_workers=1)

Val using F2305...


In [15]:
# Initailize array with images and the one with labels
val_dataset = np.zeros([len(vu_loader_val), 9, 112, 224])
val_region = np.zeros([len(vu_loader_val), 1])

for index, sample in tqdm(enumerate(vu_loader_val)):
    im = sample['im']
    region = sample['region']
    val_dataset[index,:,:,:] = im
    val_region[index,:] = region

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




In [16]:
# Save training set as a hdf5 file format so it can be sliced without having to load everything in memory
h5f = h5py.File('./manuscript/Diversity/diversity_saves/val_set.h5', 'w')
h5f.create_dataset('images', data=val_dataset)
h5f.create_dataset('regions', data=val_region)
h5f.close()

<HDF5 dataset "regions": shape (2205, 1), type "<f8">

# Test set

In [7]:
testdata = A2209Dataset(shape=image_shape)
print("Test using A2209...")
testdata.prepare(label_by="berlin_clinical", types=('T1',), subset_fraction=1.0)

vu_loader_test = DataLoader(A2209Batcher(testdata.dataset, testdata.scan_path), batch_size=batch_size, 
                        shuffle=True, num_workers=1)

Test using A2209...


In [8]:
# Initailize array with images and the one with labels
test_dataset = np.zeros([len(vu_loader_test), 9, 112, 224])
test_region = np.zeros([len(vu_loader_test), 1])

for index, sample in tqdm(enumerate(vu_loader_test)):
    im = sample['im']
    region = sample['region']
    test_dataset[index,:,:,:] = im
    test_region[index,:] = region

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




In [9]:
# Save training set as a hdf5 file format so it can be sliced without having to load everything in memory
h5f = h5py.File('./manuscript/Diversity/diversity_saves/test_set.h5', 'w')
h5f.create_dataset('images', data=test_dataset)
h5f.create_dataset('regions', data=test_region)
h5f.close()

# Synth set

In [7]:
print("Synth dataset...")
traindata_synth = SynthDataset()
vu_loader_synth = DataLoader(SynthBatcher(traindata_synth.dataset, traindata_synth.scan_path), batch_size=batch_size,
                                    shuffle=True, num_workers=1)

Synth dataset...


In [8]:
# Load synthetic dataset
synth_dataset = np.zeros([len(vu_loader_synth), 9, 112, 224])
synth_region = np.zeros([len(vu_loader_synth), 1])
for index, sample in tqdm(enumerate(vu_loader_synth)):
    im = sample['im']
    im = interpolate(im, size=image_shape[1:], mode='bicubic')
    region = sample['region']
    synth_dataset[index,:,:,:] = im
    synth_region[index,:] = region

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




In [9]:
h5f = h5py.File('./manuscript/Diversity/diversity_saves/synth_set.h5', 'w')
h5f.create_dataset('images', data=synth_dataset)
h5f.create_dataset('regions', data=synth_region)
h5f.close()