# Processed LIDC data can be found at: https://drive.google.com/drive/folders/1TLpPvR_9hfNdUbD9dFIXNpJ7m50VmD19?usp=sharing

In [None]:
!pip install -r requirements.txt

In [14]:
#!/usr/bin/env python
# coding: utf-8
from __future__ import print_function
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'  # or any {'0', '1', '2'}
import keras
print("keras = {}".format(keras.__version__))
import tensorflow as tf
print("tensorflow-gpu = {}".format(tf.__version__))
try:
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
except:
    pass
import random
import shutil
import argparse
import sklearn
from pathlib import Path
from utils import *
from unet3d import *
from config import *
from ncs_data import *

class set_args():
    gpu = 0
    data = None
    apps = 'ncs'
    run = 4
    cv = None
    subsetting = None
    suffix = 'genesis'
    task = 'segmentation'
    
args = set_args()

if args.gpu is not None:
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    

conf = ncs_config(args)

keras = 2.2.4
tensorflow-gpu = 1.15.0


In [2]:
# key = '32x64x64-10-shift-8'
key = '32x64x64-10'
input_roots = [
            os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Malignant', 'crop', key, 'positive', 'Image'),
            # os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Malignant', 'crop', key, 'positive', 'Image'),
        os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Benign', 'crop', key, 'positive', 'Image'),
        # os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Benign', 'crop', key, 'positive', 'Image'),
            os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Malignant', 'crop', key, 'negative', 'Image'),
        #     os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Malignant', 'crop', key, 'negative', 'Image'),
        os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Benign', 'crop', key, 'negative', 'Image'),
        # os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Benign', 'crop', key, 'negative', 'Image'),
            ]
target_roots = [
            os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Malignant', 'crop', key, 'positive', 'Mask'),
            # os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Malignant', 'crop', key, 'positive', 'Mask'),
            os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Benign', 'crop', key, 'positive', 'Mask'),
            # os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Benign', 'crop', key, 'positive', 'Mask'),
            os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Malignant', 'crop', key, 'negative', 'Mask'),
            # os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Malignant', 'crop', key, 'negative', 'Mask'),
            os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Benign', 'crop', key, 'negative', 'Mask'),
            # os.path.join(rf'C:\Users\test\Desktop\Leon\Datasets\ASUS_Nodules-preprocess\ASUS-Benign', 'crop', key, 'negative', 'Mask'),
                ]


In [17]:
train_file_keys = [f'1m{idx:04d}' for idx in range(1, 37)] + [f'1B{idx:04d}' for idx in range(1, 21)]
valid_file_keys = [f'1m{i:04d}' for i in range(37, 39)] + [f'1B{idx:04d}' for idx in range(21, 23)]
test_file_keys = [f'1m{i:04d}' for i in range(37, 45)] + [f'1B{idx:04d}' for idx in range(21, 26)]


train_input_samples = get_samples(input_roots, train_file_keys)   
train_target_samples = get_samples(target_roots, train_file_keys) 
x_train, y_train = load_data(train_input_samples, train_target_samples, remove_zeros=True)
x_train = x_train[:,np.newaxis]
y_train = y_train[:,np.newaxis]

valid_input_samples = get_samples(input_roots, valid_file_keys)   
valid_target_samples = get_samples(target_roots, valid_file_keys) 
x_valid, y_valid = load_data(valid_input_samples, valid_target_samples, remove_zeros=conf.remove_zeros)
x_valid = x_valid[:,np.newaxis]
y_valid = y_valid[:,np.newaxis]

test_input_samples = get_samples(input_roots, test_file_keys)   
test_target_samples = get_samples(target_roots, test_file_keys) 
# for x in test_input_samples:
#     print(os.path.split(x)[1])
x_test, y_test = load_data(test_input_samples, test_target_samples, remove_zeros=True)
x_test = x_test[:,np.newaxis]
y_test = y_test[:,np.newaxis]

In [18]:
print('x_train: {} | {} ~ {}'.format(x_train.shape, np.min(x_train), np.max(x_train)))
print('y_train: {} | {} ~ {}'.format(y_train.shape, np.min(y_train), np.max(y_train)))

print('x_valid: {} | {} ~ {}'.format(x_valid.shape, np.min(x_valid), np.max(x_valid)))
print('y_valid: {} | {} ~ {}'.format(y_valid.shape, np.min(y_valid), np.max(y_valid)))

print('x_test: {} | {} ~ {}'.format(x_test.shape, np.min(x_test), np.max(x_test)))
print('y_test: {} | {} ~ {}'.format(y_test.shape, np.min(y_test), np.max(y_test)))

x_train: (830, 1, 64, 64, 32) | 0.0 ~ 1.0
y_train: (830, 1, 64, 64, 32) | 0 ~ 1
x_valid: (44, 1, 64, 64, 32) | 0.0 ~ 1.0
y_valid: (44, 1, 64, 64, 32) | 0 ~ 1
x_test: (19, 1, 64, 64, 32) | 0.0 ~ 1.0
y_test: (19, 1, 64, 64, 32) | 0 ~ 1


### Inspect zeros

In [5]:
train_zeros, valid_zeros, test_zeros = 0, 0, 0
for y in y_train:
    if np.sum(y) <= 0:
        train_zeros += 1

for y in y_valid:
    if np.sum(y) <= 0:
        valid_zeros += 1

for y in y_test:
    if np.sum(y) <= 0:
        test_zeros += 1

print('Zeros sample')
print(f'Train {train_zeros} Valid {valid_zeros} Test {test_zeros}')

Zeros sample
Train 762 Valid 37 Test 163


# Fine-tune Models Genesis

In [6]:
args.suffix = 'genesis'
conf = ncs_config(args)
conf.display()
conf.save()


Configurations:
arch                           Vnet
batch_size                     16
data                           data/ncs
exp_name                       Vnet-genesis
input_cols                     64
input_deps                     32
input_rows                     64
logs_path                      models/ncs\run_5\logs
lr                             0.001
max_queue_size                 1
model_path                     models/ncs\run_5
nb_epoch                       10000
optimizer                      adam
patience                       50
remove_zeros                   False
verbose                        1
weights                        pretrained_weights/Genesis_Chest_CT.h5
workers                        1




### Train

In [7]:
model = unet_model_3d((1,conf.input_rows,conf.input_cols,conf.input_deps), batch_normalization=True)
if conf.weights is not None:
    print("[INFO] Load pre-trained weights from {}".format(conf.weights))
    model.load_weights(conf.weights)
model, callbacks = model_setup(model, conf, task=args.task)

while conf.batch_size > 1:
    # To find a largest batch size that can be fit into GPU
    try:
        model.fit(x_train, y_train,
                  validation_data=(x_valid, y_valid),
                  batch_size=conf.batch_size,
                  epochs=conf.nb_epoch, 
                  verbose=conf.verbose, 
                  shuffle=True,
                  callbacks=callbacks)
        break
    except tf.errors.ResourceExhaustedError as e:
        conf.batch_size = int(conf.batch_size - 2)
        print("\n> Batch size = {}".format(conf.batch_size))

[INFO] Load pre-trained weights from pretrained_weights/Genesis_Chest_CT.h5
Train on 830 samples, validate on 44 samples
Epoch 1/10000

> Batch size = 14
Train on 830 samples, validate on 44 samples
Epoch 1/10000

> Batch size = 12
Train on 830 samples, validate on 44 samples
Epoch 1/10000

> Batch size = 10
Train on 830 samples, validate on 44 samples
Epoch 1/10000

> Batch size = 8
Train on 830 samples, validate on 44 samples
Epoch 1/10000
  8/830 [..............................] - ETA: 19:22 - loss: 1.0000 - mean_iou: 0.4958 - dice_coef: 7.5583e-06
> Batch size = 6
Train on 830 samples, validate on 44 samples
Epoch 1/10000

Epoch 00001: val_loss improved from inf to 0.98512, saving model to models/ncs\run_5\Vnet-genesis.h5
Epoch 2/10000

Epoch 00002: val_loss improved from 0.98512 to 0.49298, saving model to models/ncs\run_5\Vnet-genesis.h5
Epoch 3/10000

Epoch 00003: val_loss improved from 0.49298 to 0.27302, saving model to models/ncs\run_5\Vnet-genesis.h5
Epoch 4/10000

Epoch 000

KeyboardInterrupt: 

### Test

In [15]:
x_data, y_data = x_train, y_train
model = unet_model_3d((1,conf.input_rows,conf.input_cols,conf.input_deps), batch_normalization=True)
print("[INFO] Load trained model from {}".format( os.path.join(conf.model_path, conf.exp_name+".h5") ))
model.load_weights( os.path.join(conf.model_path, conf.exp_name+".h5") )

p_test = segmentation_model_evaluation(model=model, config=conf, x=x_data, y=y_data, note=conf.exp_name)

[INFO] Load trained model from models/ncs\run_4\Vnet-genesis.h5
Dice 51.11 %
[INFO] Vnet-genesis
x:  (19, 1, 64, 64, 32) | 0.0 ~ 1.0
y:  (19, 1, 64, 64, 32) | 0.0 ~ 1.0
p:  (19, 1, 64, 64, 32) | 0.0 ~ 0.9
[EVAL] Dice = 65.10%
[EVAL] IoU  = 11.46%


### Visualization

In [None]:
import os
print('CWD', os.getcwd())
p_test = np.squeeze(p_test)
for i in range(0, x_test.shape[0], 1):
    plot_image_truth_prediction(x_test[i], y_test[i], p_test[i], rows=5, cols=5, name=f'figures/tmh/img{i:03d}.png')

# Train from scratch

In [None]:
args.suffix = 'random'
conf = ncs_config(args)
conf.display()

### Train

In [None]:
model = unet_model_3d((1,conf.input_rows,conf.input_cols,conf.input_deps), batch_normalization=True)
if conf.weights is not None:
    print("[INFO] Load pre-trained weights from {}".format(conf.weights))
    model.load_weights(conf.weights)
model, callbacks = model_setup(model, conf, task=args.task)

while conf.batch_size > 1:
    # To find a largest batch size that can be fit into GPU
    try:
        model.fit(x_train, y_train,
                  validation_data=(x_valid, y_valid),
                  batch_size=conf.batch_size,
                  epochs=conf.nb_epoch, 
                  verbose=conf.verbose, 
                  shuffle=True,
                  callbacks=callbacks)
        break
    except tf.errors.ResourceExhaustedError as e:
        conf.batch_size = int(conf.batch_size - 2)
        print("\n> Batch size = {}".format(conf.batch_size))

### Test

In [None]:
model = unet_model_3d((1,conf.input_rows,conf.input_cols,conf.input_deps), batch_normalization=True)
print("[INFO] Load trained model from {}".format( os.path.join(conf.model_path, conf.exp_name+".h5") ))
model.load_weights( os.path.join(conf.model_path, conf.exp_name+".h5") )

p_test = segmentation_model_evaluation(model=model, config=conf, x=x_test, y=y_test, note=conf.exp_name)

### Visualization

In [None]:
p_test = np.squeeze(p_test)
for i in range(0, x_test.shape[0], 1):
    plot_image_truth_prediction(x_test[i], y_test[i], p_test[i], rows=5, cols=5)