# Creating a custom datablock for 3D images and fastai

In [1]:
import SimpleITK as sitk
import re
import pathlib
import torchvision

from fastai.basics import *
from fastai.medical.imaging import *
from fastai.vision.all import *
from fastai.callback.all import *
import fastai
from faimed3d import *

## Create Pytorch Dataloaders

### Get data paths and labels

Data is divided into a train, valid and test dataset of different patients, which either have prostate cancer or are healthy. 
Each patient has three MRI sequences: T2, T1map and ADC. These sequences differ in number of slices and resolution. 

At first the paths to the data are specified: 

In [2]:
train = pathlib.Path('../../data/train')
valid = pathlib.Path('../../data/valid')
test = pathlib.Path('../../data/test')

At the moment, I am only interested into the DICOM data, not in the NIfTI segmentations. 

In [3]:
train_files = list(train.rglob('DICOM'))
valid_files = list(valid.rglob('DICOM'))
test_files = list(test.rglob('DICOM'))

To reduce complexity of the data, only the T2 map will be used for the first runs. 

In [4]:
# take only T2 and T1 images for noe
subset_train =[]
for f in train_files: 
    m = re.search(r'T2', str(f)) 
    if hasattr(m, 'string'): subset_train.append(Path(m.string))
        
subset_valid =[]
for f in valid_files: 
    m = re.search(r'T2', str(f)) 
    if hasattr(m, 'string'): subset_valid.append(Path(m.string))
        
subset_test = []
for f in test_files: 
    m = re.search(r'T2', str(f)) 
    if hasattr(m, 'string'): subset_test.append(Path(m.string))

If the patient has prostate cancer or not, can be extracted from the file path. 

In [5]:
labels = ['Gesund', 'ProstataCa']
def label_func(fn): return re.findall(r'(Gesund|ProstataCa)', str(fn))[0]

## Adapt fastai dataloaders



In [6]:
im = read_medical_3d_image(subset_train[0])

In [7]:
pc3d = ResizeCrop3D(crop_by = (0., 0.1, 0.1), resize_to = (10, 50, 50), perc_crop = True),

In [8]:
pc3d

(ResizeCrop3D -- {'crop_by': (0.0, 0.1, 0.1), 'resize_to': (10, 50, 50), 'perc_crop': True, 'p': 1}:
 encodes: (TensorDicom3D,object) -> encodes
 decodes: ,)

In [9]:
mris = DataBlock(
    blocks = (ImageBlock(cls=TensorDicom3D), 
              CategoryBlock),
    get_x = lambda x: x,
    get_y = label_func, 
    item_tfms = ResizeCrop3D(crop_by = (0., 0.1, 0.1), resize_to = (10, 50, 50), perc_crop = True),
    splitter = RandomSplitter())

In [10]:
mris.summary(subset_test)

Setting-up type transforms pipelines
Collecting items from [Path('../../data/test/Gesund/A0041126717/T2/DICOM'), Path('../../data/test/Gesund/A0041720206/T2/DICOM'), Path('../../data/test/Gesund/A0041983224/T2/DICOM'), Path('../../data/test/Gesund/A0041886483/T2/DICOM'), Path('../../data/test/Gesund/A0040860335/T2/DICOM'), Path('../../data/test/Gesund/A0001504907/T2/DICOM'), Path('../../data/test/Gesund/A0041839275/T2/DICOM'), Path('../../data/test/Gesund/A0040169930/T2/DICOM'), Path('../../data/test/Gesund/A0001047141/T2/DICOM'), Path('../../data/test/Gesund/A0042256547/T2/DICOM'), Path('../../data/test/ProstataCa/A0071496463/T2/DICOM'), Path('../../data/test/ProstataCa/A0041794296/T2/DICOM'), Path('../../data/test/ProstataCa/A0041945145/T2/DICOM'), Path('../../data/test/ProstataCa/A0041324313/T2/DICOM'), Path('../../data/test/ProstataCa/A0041877685/T2/DICOM'), Path('../../data/test/ProstataCa/A0042184416/T2/DICOM'), Path('../../data/test/ProstataCa/A0001436600/T2/DICOM'), Path('../..

In [11]:
dls = dls.cuda()
def loss_func(out, targ):
    return CrossEntropyLossFlat()(out, targ.long())

NameError: name 'dls' is not defined

In [None]:
roc = RocAucBinary()

In [None]:
learn = Learner(dls, AlexNet_3D(), opt_func = SGD, loss_func = loss_func, metrics = [error_rate, roc])
learn = learn.to_fp16()
#learn = learn.to_parallel()

In [None]:
#learn.lr_find()

In [None]:
learn.fit_one_cycle(n_epoch = 1, lr_max = 0.1)

In [None]:
p,t = learn.tta()

In [None]:
preds, target = learn.get_preds()

In [None]:
preds = F.softmax(preds, dim = 1)[:, 1].numpy()

It's usually because your network is not complex enough to find a pattern between your input vectors and your output vectors, and therefore, your last output layer is converging towards the average vector of all the outputs in your dataset.

To overcome this there are a few techniques:

1. Try to do some more preprocessing to your inputs, perhaps a PCA on your attributes.
2. Visualize your layers, try to add random vectors as your input and check the outputs of each layer. There must be just one layer which would be outputting almost the same vector everytime, causing problems for your higher level neurons.
3. Reduce your learning rate.
4. Reduce your batch size.
5. Stack more layers.
6. Check if your model is actually learning : send random noise as your data, and the network loss should not be decreasing.

In [None]:
from sklearn.metrics import roc_curve
ns_fpr, ns_tpr, _ = roc_curve(target.numpy(), preds)
plt.plot(ns_fpr, ns_tpr, linestyle='--', label='ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()

In [None]:
preds

In [None]:
target

In [None]:
learn.fit_one_cycle(n_epoch = 100, lr_max = 0.001)

In [None]:
11520/24