In [None]:
import cv2
import glob
import ntpath
import random
import numpy

from keras.models import Model
from keras.optimizers import SGD 
from keras.metrics import binary_accuracy, binary_crossentropy
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.layers import Input, AveragePooling3D, Convolution3D, MaxPooling3D, Flatten

In [None]:
batch_size = 32

trainset, testset = load_data()

train_gen = data_generator(batch_size, trainset, need_augment=True)

test_gen = data_generator(batch_size, testset, need_augment=False)

learnrate_scheduler = LearningRateScheduler(step_decay)

model = get_net(load_weight_path=None)

checkpoint = ModelCheckpoint("G:/LungCancerPredict/model/" + "{epoch:02d}-{val_loss:.4f}.hd5", monitor='val_loss', period=1)

model.fit_generator(train_gen, int(len(trainset)/batch_size), 10, validation_data=test_gen, nb_val_samples=int(len(testset)/batch_size), callbacks=[checkpoint, learnrate_scheduler])

model.save("G:/LungCancerPredict/model/the_end.hd5")

In [None]:
def load_data(test_ratio=20, neg_per_pos=20):
    
    #load positive
    pos_samples = glob.glob("G:/LungCancerPredict/generated/luna16_positive_cube/*.png")
    random.shuffle(pos_samples)
    print(len(pos_samples))

    #load negative
    neg_samples_tn = glob.glob("G:/LungCancerPredict/generated/luna16_negative_cube/*_tn.png")
    print(len(neg_samples_tn))
    neg_samples_fp = glob.glob("G:/LungCancerPredict/generated/luna16_negative_cube/*_fp.png")
    print(len(neg_samples_fp))
    neg_samples_candidates = glob.glob("G:/LungCancerPredict/generated/luna16_negative_cube/*_candidates.png")
    print(len(neg_samples_candidates))
    neg_samples = neg_samples_fp + neg_samples_tn + neg_samples_candidates + neg_samples_candidates + neg_samples_candidates
    random.shuffle(neg_samples)
    print(len(neg_samples))

    # 正负样本混合
    pos_index = 0
    dataset = []
    for index, neg_sample in enumerate(neg_samples):
        
        dataset.append((neg_sample, 0, 0))
        
        if index % neg_per_pos == 0:
            
            pos_sample = pos_samples[pos_index]
            
            file_name = ntpath.basename(pos_sample)
            
            parts = file_name.split("_")
            class_label = int(parts[-2])
            size_label = int(parts[-3])
            
            dataset.append((pos_sample, class_label, size_label))
            pos_index += 1
            pos_index %= len(pos_samples)

    random.shuffle(dataset)
    counter = int(len(dataset) * ((100-test_ratio) / 100.0))
    dataset_train = dataset
    dataset_test = dataset[counter:]
                
    print("Train count: ", len(dataset_train), ", Test count: ", len(dataset_test))
    
    return dataset_train, dataset_test

In [None]:
def data_generator(batch_size, record_list, need_augment):

    random_state = numpy.random.RandomState(999)
    
    if need_augment: random.shuffle(record_list)

    #按照batch_size动态生成数据
    batch_counter, crop_size, img_list, class_list, size_list = 0, 32, [], [], []
        
    for index, record in enumerate(record_list):
        
        class_label, size_label = record[1], record[2]
        
        if class_label == 0:
            
            cube_image = load_cube_img(record[0], 6, 8, 48)

            indent_x, indent_y, indent_z = 0, 0, 0
          
            wiggle = 48 - crop_size - 1

            indent_x, indent_y, indent_z = (random.randint(0, wiggle) for _ in range(3))

            cube_image = cube_image[indent_z:indent_z + crop_size, indent_y:indent_y + crop_size, indent_x:indent_x + crop_size]
        
            if need_augment:   
                if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image)
                if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image)
                if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1]
                if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :]

        if class_label == 1:
            
            cube_image = load_cube_img(record[0], 8, 8, 64)
            
            if need_augment:
                wiggle_indent = crop_size / 4
                wiggle = 64 - crop_size - crop_size / 2 - 1
                indent_x = int(wiggle_indent + random.randint(0, wiggle))
                indent_y = int(wiggle_indent + random.randint(0, wiggle))
                indent_z = int(wiggle_indent + random.randint(0, wiggle))
            else:
                indent_x = int((64 - crop_size) / 2)
                indent_y = int((64 - crop_size) / 2)
                indent_z = int((64 - crop_size) / 2)

            cube_image = cube_image[indent_z:indent_z + crop_size, indent_y:indent_y + crop_size, indent_x:indent_x + crop_size]

            if need_augment:
                if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image)
                if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image)
                if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1]
                if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :]
                    
        img3d = normalize_and_expand(cube_image)

        img_list.append(img3d), class_list.append(class_label), size_list.append(size_label)

        batch_counter += 1
        
        if batch_counter >= batch_size:
            
            x, y_class, y_size = numpy.vstack(img_list), numpy.vstack(class_list), numpy.vstack(size_list)

            yield x, {"out_class": y_class, "out_malignancy": y_size}
            
            img_list, class_list, size_list = [], [], []
            
            batch_counter = 0

def load_cube_img(src_path, rows, cols, size):
    
    img = cv2.imread(src_path, cv2.IMREAD_GRAYSCALE)
    res = numpy.zeros((rows * cols, size, size))

    img_height = size
    img_width = size

    for row in range(rows):
        
        for col in range(cols):
            
            src_y = row * img_height
            src_x = col * img_width
            res[row * cols + col] = img[src_y:src_y + img_height, src_x:src_x + img_width]

    return res

def normalize_and_expand(img):
    
    img = img.astype(numpy.float32)
    img -= 41  # 41 is MEAN_PIXEL_VALUE
    img /= 255.
    img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2], 1)
    
    return img

In [None]:
def step_decay(epoch):
    
    res = 0.001
    
    if epoch > 5:
        
        res = 0.0001
        
    print("learnrate: ", res, " epoch: ", epoch)
    
    return res

In [None]:
def get_net(input_shape=(32, 32, 32, 1), load_weight_path=None) -> Model:  
    
    inputs = Input(shape=input_shape, name="input_1")
    x = inputs
    x = AveragePooling3D(pool_size=(2, 1, 1), strides=(2, 1, 1), border_mode="same")(x)
    x = Convolution3D(64, 3, 3, 3, activation='relu', border_mode='same', name='conv1', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), border_mode='valid', name='pool1')(x)

    # 2nd layer group
    x = Convolution3D(128, 3, 3, 3, activation='relu', border_mode='same', name='conv2', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool2')(x)

    # 3rd layer group
    x = Convolution3D(256, 3, 3, 3, activation='relu', border_mode='same', name='conv3a', subsample=(1, 1, 1))(x)
    x = Convolution3D(256, 3, 3, 3, activation='relu', border_mode='same', name='conv3b', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool3')(x)

    # 4th layer group
    x = Convolution3D(512, 3, 3, 3, activation='relu', border_mode='same', name='conv4a', subsample=(1, 1, 1))(x)
    x = Convolution3D(512, 3, 3, 3, activation='relu', border_mode='same', name='conv4b', subsample=(1, 1, 1),)(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool4')(x)

    # output
    last64 = Convolution3D(64, 2, 2, 2, activation="relu", name="last_64")(x)
    out_class = Convolution3D(1, 1, 1, 1, activation="sigmoid", name="out_class_last")(last64)
    out_class = Flatten(name="out_class")(out_class)

    model = Model(input=inputs, output=out_class)
    
    if load_weight_path is not None: model.load_weights(load_weight_path, by_name=False)
    
    optimizer = SGD(lr=0.001, momentum=0.9, nesterov=True)
    loss = {"out_class": "binary_crossentropy"}
    metrics = {"out_class": [binary_accuracy, binary_crossentropy]}
    
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    model.summary(line_length=140)

    return model