In [None]:
%load_ext autoreload
%autoreload 2

from context import uncertify

In [None]:
import logging
from uncertify.log import setup_logging
setup_logging()
LOG = logging.getLogger(__name__)

# Matplotlib DEBUG logging spits out a whole bunch of crap
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)

In [None]:
import h5py

from uncertify.data.hdf5 import print_dataset_information
from uncertify.common import DATA_DIR_PATH

# Explore HDF5 dataset
**As created by scripts/preprocess_brats.py**  
Constist of two major types of objects: Datasets and Groups.

Datasets: multidimensional arrays of homogeneous types.
Groups: Hierarchical structures (file system-like).

In [None]:
PROCESSED_DIR_PATH = DATA_DIR_PATH / 'processed'
brats_test_path = PROCESSED_DIR_PATH / 'brats17_t1_hm_bc_scale_l3.hdf5'

camcan_train_path =  PROCESSED_DIR_PATH / 'camcan_train_t2_hm_scale_l10_xe.hdf5'
camcan_test_path =  PROCESSED_DIR_PATH / 'camcan_val_t2_hm_scale_l10_xe.hdf5'

# Expand when needed
dataset_paths = [
    camcan_train_path,
    camcan_test_path
]

print_dataset_information(dataset_paths=dataset_paths)

In [None]:
from uncertify.visualization.datasets import plot_samples

n_samples = 5
plot_samples(h5py.File(camcan_train_path, 'r'), n_samples=n_samples, cmap='viridis', dataset_length=187, vmin=0, vmax=1)
#plot_samples(h5py.File(brats_t1_path, 'r'), n_samples=n_samples, dataset_length=310, cmap='Greys')
#plot_samples(h5py.File(camcan_val_path, 'r'), n_samples=n_samples, cmap='Greys')

# Explore Dataset using Dataset and Dataloader in PyTorch

In [None]:
from uncertify.data.dataloaders import dataloader_factory, DatasetType
from uncertify.visualization.datasets import  plot_camcan_batches
from uncertify.visualization.datasets import plot_patient_histograms

## Using the dataset factory

In [None]:
batch_size = 32
plot_n_batches = 5
nrow=8

camcan_train_dataloader, camcan_val_dataloader  = dataloader_factory(DatasetType.CAMCAN, batch_size=batch_size, 
                                                 train_set_path=camcan_train_path, val_set_path=camcan_test_path, shuffle_train=True)

plot_camcan_batches(camcan_train_dataloader, plot_n_batches, cmap='viridis', figsize=(18, 12), nrow=nrow, vmin=0, vmax=1)
plot_camcan_batches(camcan_val_dataloader, plot_n_batches, cmap='viridis', figsize=(18, 12), nrow=nrow, vmin=0, vmax=1)

plot_patient_histograms(camcan_train_dataloader, n_batches=plot_n_batches, accumulate_batches=False, bins=30)
plot_patient_histograms(camcan_train_dataloader, n_batches=20, accumulate_batches=True, bins=30)

# Analyze normal / abnormal pixel distribution

In [None]:
from uncertify.evaluation.datasets import get_samples_without_lesions
from uncertify.visualization.datasets import plot_fraction_of_abnormal_pixels
from uncertify.visualization.datasets import plot_abnormal_pixel_distribution
from uncertify.visualization.datasets import boxplot_abnormal_pixel_fraction

In [None]:
fig, _ = plot_abnormal_pixel_distribution(brats_val_dataloader, figsize=(12, 5), 
                                          hist_kwargs=dict(bins=30, density=True))
fig.savefig(DATA_DIR_PATH / 'plots' / 'normal_abnormal_n_pixel_distribution.png')

In [None]:
fig, _ = plot_fraction_of_abnormal_pixels(brats_val_dataloader, figsize=(12, 5), 
                                          hist_kwargs=dict(bins=80, density=True))
fig.savefig(DATA_DIR_PATH / 'plots' / 'abnormal_pixel_fraction.png')

In [None]:
fig, ax = boxplot_abnormal_pixel_fraction(data_loader=brats_val_dataloader, figsize=(2.5, 5))
fig.savefig(DATA_DIR_PATH / 'plots' / 'boxplot_abnormal_pixel_fraction.png')

In [None]:
n_samples_without_lesions, n_higher_ratio_threshold, n_samples_total = get_samples_without_lesions(brats_val_dataloader, pixel_ratio_threshold=0.01)
print(f'{n_samples_without_lesions} / {n_samples_total} samples have lesional pixels. {n_higher_ratio_threshold} / {n_samples_total} to have more than 0.01% of lesional pixels within the brain mask.')