# Training a 3D CNN

In [1]:
import SimpleITK as sitk
import re
import pathlib
import torchvision

from fastai.basics import *
from fastai.medical.imaging import *
from fastai.vision.all import *
from fastai.callback.all import *
import fastai
from faimed3d import *

```python
print(sitk.Version())
for name, val in globals().items():
    if isinstance(val, types.ModuleType):
        try: 
            print(val.__name__ + ': ' + val.__version__)
        except:
            pass
            
SimpleITK Version: 2.0.0rc3 (ITK 5.1)
Compiled: Aug 25 2020 15:43:37

re: 2.2.1
torchvision: 0.7.0
platform: 1.0.8
numpy: 1.19.1
csv: 1.0
json: 2.0.9
matplotlib: 3.3.1
requests: 2.24.0
yaml: 5.3.1
pandas: 1.1.1
scipy: 1.5.2
scipy.ndimage: 2.0
torch: 1.6.0
PIL.Image: 7.2.0
fastai: 2.0.10
```

## Designing a 3D CNN

In [2]:
# https://github.com/Ontheroad123/ImageNet/blob/master/torch-alexnet-3D.py

class AlexNet_3D(nn.Module):

    def __init__(self, num_classes=2):
        super(AlexNet_3D, self).__init__()
        self.features = nn.Sequential(
            nn.Conv3d(7, 512, kernel_size=(5,5,1), stride=(2,2,1), padding=(2,2,1)),
            nn.LeakyReLU(inplace = True), #    nn.ReLU(inplace=True),
            nn.MaxPool3d(kernel_size=(3,3,1), stride=(2,2,1)),

            nn.Conv3d(512, 256, kernel_size=(5,5,3), padding=(2,2,1)),
            nn.LeakyReLU(inplace = True), #    nn.ReLU(inplace=True),
            nn.Dropout(p = 0.8),
            nn.MaxPool3d(kernel_size=(3,3,1), stride=(2,2,1)),
            
            nn.BatchNorm3d(256),
            nn.Conv3d(256, 128, kernel_size=(5,5,3), padding=(2,2,1)),
            nn.LeakyReLU(inplace = True), #    nn.ReLU(inplace=True),
            nn.Dropout(p = 0.8),
            nn.MaxPool3d(kernel_size=(3,3,1), stride=(2,2,1)),
            
            nn.BatchNorm3d(128),
            nn.Conv3d(128, 384, kernel_size=(3,3,3), padding=(1,1,1)),
            nn.LeakyReLU(inplace = True), #    nn.ReLU(inplace=True),
           
            nn.BatchNorm3d(384),
            nn.Conv3d(384, 256, kernel_size=(3,3,3), padding=(1,1,1)),
            nn.LeakyReLU(inplace = True), #    nn.ReLU(inplace=True),
            
            nn.BatchNorm3d(256),
            nn.Conv3d(256, 256, kernel_size=(3,3,3), padding=(1,1,1)),
            nn.LeakyReLU(inplace = True), #    nn.ReLU(inplace=True),
            nn.MaxPool3d(kernel_size=(3,3,1), stride=(2,2,1)),
        )
        self.classifier = nn.Sequential(
            nn.Linear(11520, 4096), #6 * 6* 4, 4096),
            nn.LeakyReLU(inplace = True), #    nn.ReLU(inplace=True),
            nn.Linear(4096, 512),
            nn.LeakyReLU(inplace = True), #    nn.ReLU(inplace=True),
            nn.Linear(512, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0),x.size(1)*x.size(2)*x.size(3)*x.size(4) ) #6 * 6 * 4)
        x = self.classifier(x)
        return x
    
    
def alexnet_3d(pretrained=False, progress=True, **kwargs):
    r"""3D AlexNet model architecture, adapted from https://github.com/Ontheroad123/ImageNet/blob/master/torch-alexnet-3D.py
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = AlexNet_3D(**kwargs)
    if pretrained:
        "currently no pretained weights for 3D Alexnet available"
        pass 
    return model

def make_pseudo_color(t): 
    '''
    The 3D CNN still expects color images, so a pseudo color image needs to be created as long as I don't adapt the 3D CNN
    '''
    if t.size(0) % 3 == 0:
        t_col = torch.stack((t[slice(0,t.size(0)-2,3), :, : ], 
                             t[slice(1,t.size(0)-1,3), :, : ], 
                             t[slice(2,t.size(0)-0,3), :, : ])).permute( 1, 2, 3, 0)
    else:
        import warnings
        warnings.warn('slice number is not divisible by 3, will stack the same tensor three times to create the color channels,')
        t_col = torch.stack((t, t, t)).permute( 1, 2, 3, 0) # important step, ensuring rigth format of tensors
    return t_col.float() 

## Create Pytorch Dataloaders

### Get data paths and labels

In [3]:
train = pathlib.Path('../../data/train')
valid = pathlib.Path('../../data/valid')
test = pathlib.Path('../../data/test')

train_files = list(train.rglob('DICOM'))
valid_files = list(valid.rglob('DICOM'))
test_files = list(test.rglob('DICOM'))

# take only T2 and T1 images for noe
subset_train =[]
for f in train_files: 
    m = re.search(r'T2', str(f)) 
    if hasattr(m, 'string'): subset_train.append(Path(m.string))
        
subset_valid =[]
for f in valid_files: 
    m = re.search(r'T2', str(f)) 
    if hasattr(m, 'string'): subset_valid.append(Path(m.string))
        
subset_test = []
for f in test_files: 
    m = re.search(r'T2', str(f)) 
    if hasattr(m, 'string'): subset_test.append(Path(m.string))
        
        
def label_func(fn):
    return re.findall(r'(Gesund|ProstataCa)', str(fn))[0]
labels = ['Gesund', 'ProstataCa']

In [4]:
# oversampling of healthy patients (id 0:33 of train dataset to create a class balance)
oversampled_train = subset_train*10 + random.choices(subset_train[slice(0, 33)], k = 23*10)

In [5]:
random.shuffle(oversampled_train) # shuffles the list in place

In [6]:
load_and_presize = Pipeline([partial(read_medical_3d_image, return_scaled=True, return_normalized=True), 
                             partial(crop_3d_tensor, margins=(0.0, 0.15, 0.15), perc_margins=True), 
                             partial(resize_3d_tensor, new_shape=(27, 200, 200))])

In [7]:
Cropper = RandomCrop3D()
Cropper.setup(items = ((3,25,25), (2,10,10))) # final size after cropping will be (21, 100, 100), which will then be stacked to (7, 100, 100, 3) by make_pseudo_color

tfms = [Pipeline([RandomBrightness3D(p=0.9), Cropper], split_idx = 0), 
        Pipeline([RandomContrast3D(p=0.9), Cropper], split_idx = 0), 
        Pipeline([RandomWarp3D(p=0.9), Cropper], split_idx = 0), 
        Pipeline([RandomDihedral3D(p=0.9), Cropper], split_idx = 0), 
        Pipeline([RandomDihedral3D(p=0.9), Cropper], split_idx = 0), 
        Pipeline([RandomDihedral3D(p=0.9), Cropper], split_idx = 0), 
        Pipeline([RandomNoise3D(p=0.9), Cropper], split_idx = 0), 
        Pipeline([RandomRotate3DBy(p=0.9), Cropper], split_idx = 0)]

In [8]:
class ProstateDataset(torch.utils.data.Dataset):
    def __init__(self, files):
        self.files = files
        self.labels = [label_func(f) for f in files]
        self.tcat = Categorize(vocab=labels)

        
    def __getitem__(self, i):
        file = self.files[i]
        cls = self.tcat(self.labels[i])
        f = load_and_presize(file)
        i = random.randint(0, len(tfms)-1)
        f = tfms[i](f)
        return (make_pseudo_color(f), torch.Tensor([cls]).squeeze())

#    def __getitem__(self, i):
#        file = self.files[i]
#        cls = self.tcat(self.labels[i])
#        f = self.preprocessing(file)
#        f = self.get_tfm()(f)
#        return (make_pseudo_color(f), torch.Tensor([cls]).squeeze())
    
    def set_tfms(self, tfms):
        self.tfms = tfms
        
    def set_preprocessing(self, func):
        self.preprocessing = func
    
    def get_tfm(self):
        i = random.randint(0, len(self.tfms)-1)
        return tfms[i]

    def __len__(self): return len(self.files)
    
train_ds = ProstateDataset(oversampled_train) # fewer epochs, less clutter on the screen during training. Significant increase in accuracy does appear after 100-150 epochs (with single training subset)
valid_ds = ProstateDataset(subset_valid)
test_ds = ProstateDataset(subset_test)

#train_ds.set_preprocessing(load_and_presize)
#train_ds.set_tfms(tfms)

#valid_ds.set_preprocessing(load_and_presize)
#valid_ds.set_tfms(Pipeline([Cropper])

#test_ds.set_preprocessing(load_and_presize)
#test_ds.set_tfms(Pipeline([Cropper]))


In [9]:
train_dl = DataLoader(train_ds, 
          batch_size = 24, 
          pin_memory = False, 
          num_workers = 48)
valid_dl = DataLoader(valid_ds, 
          batch_size = 20, 
          pin_memory = False, 
          num_workers = 48)
test_dl = DataLoader(test_ds, 
          batch_size = 20, 
          pin_memory = False, 
          num_workers = 48)

dls = DataLoaders(train_dl, valid_dl, test_dl)

In [10]:
dls = dls.cuda()
def loss_func(out, targ):
    return CrossEntropyLossFlat()(out, targ.long())

In [11]:
roc = RocAucBinary()

In [12]:
learn = Learner(dls, AlexNet_3D(), opt_func = SGD, loss_func = loss_func, metrics = [error_rate, roc])
learn = learn.to_fp16()
#learn = learn.to_parallel()

In [13]:
#learn.lr_find()

In [14]:
learn.fit_one_cycle(n_epoch = 1, lr_max = 0.1)

epoch,train_loss,valid_loss,error_rate,roc_auc_score,time
0,0.70771,0.693286,0.5,0.605,03:17


In [None]:
preds, target = learn.get_preds()

In [None]:
preds = F.softmax(preds, dim = 1)[:, 1].numpy()

It's usually because your network is not complex enough to find a pattern between your input vectors and your output vectors, and therefore, your last output layer is converging towards the average vector of all the outputs in your dataset.

To overcome this there are a few techniques:

1. Try to do some more preprocessing to your inputs, perhaps a PCA on your attributes.
2. Visualize your layers, try to add random vectors as your input and check the outputs of each layer. There must be just one layer which would be outputting almost the same vector everytime, causing problems for your higher level neurons.
3. Reduce your learning rate.
4. Reduce your batch size.
5. Stack more layers.
6. Check if your model is actually learning : send random noise as your data, and the network loss should not be decreasing.

In [None]:
from sklearn.metrics import roc_curve
ns_fpr, ns_tpr, _ = roc_curve(target.numpy(), preds)
plt.plot(ns_fpr, ns_tpr, linestyle='--', label='ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()

In [None]:
preds

In [None]:
target

In [None]:
learn.fit_one_cycle(n_epoch = 100, lr_max = 0.001)

In [None]:
11520/24