In [None]:
import cv2
import glob
import ntpath
import random
import numpy

from keras.models import Model
from keras.optimizers import SGD 
from keras.metrics import mean_absolute_error
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.layers import Input, AveragePooling3D, Convolution3D, MaxPooling3D, Flatten

In [None]:
batch_size = 16

trainset, testset = load_data()

train_gen = data_generator(batch_size, trainset, need_augment=True)

test_gen = data_generator(batch_size, testset, need_augment=False)

learnrate_scheduler = LearningRateScheduler(step_decay)

model = get_net(load_weight_path=None)

checkpoint = ModelCheckpoint("G:/LungCancerPredict/model/" + "{epoch:02d}-{val_loss:.4f}.hd5", monitor='val_loss', period=1)

model.fit_generator(train_gen, int(len(trainset)/batch_size), 10, validation_data=test_gen, nb_val_samples=int(len(testset)/batch_size), callbacks=[checkpoint, learnrate_scheduler])

model.save("G:/LungCancerPredict/model/the_end.hd5")

In [None]:
def load_data(test_ratio=20):
    
    samples = glob.glob("G:/LungCancerPredict/extracted/lidc_extracted_image/*.png")
    random.shuffle(samples)
    print(len(samples))

    #从文件名解析标签
    dataset = []
    for index, sample_path in enumerate(samples):    
    
        file_name = ntpath.basename(sample_path)

        parts = file_name.split("_")
    
        subtlety_label = float(parts[-3]);      lobulation_label = float(parts[-4]); internal_structure_label = float(parts[-5])
        calcification_label = float(parts[-6]); texture_label = float(parts[-7]);    spiculation_label = float(parts[-8])
        margin_label = float(parts[-9]);        sphericiy_label = float(parts[-10]); malignacy_label = float(parts[-11])
        diameter_label = float(parts[-12])

        dataset.append((sample_path, diameter_label, malignacy_label, sphericiy_label, margin_label, spiculation_label, texture_label, calcification_label, internal_structure_label, lobulation_label, subtlety_label))

    counter = int(len(dataset) * ((100-test_ratio) / 100.0))
    dataset_train = dataset
    dataset_test = dataset[counter:]
    
    print("Train count: ", len(dataset_train), ", Test count: ", len(dataset_test))
   
    return dataset_train, dataset_test

In [None]:
def data_generator(batch_size, record_list, need_augment):
    
    random_state = numpy.random.RandomState(999)

    if need_augment: random.shuffle(record_list)
    
    #按照batch_size动态生成数据    
    img_list = [];           diameter_list = [];   
    subtlety_list = [];      lobulation_list = []; internal_structure_list = []
    calcification_list = []; texture_list = [];    spiculation_list = []
    margin_list = [];        sphericiy_list = [];  malignacy_list = []
    crop_size = 32;          batch_counter = 0

    for index, record in enumerate(record_list):
        
        subtlety_label = record[10];     lobulation_label = record[9]; internal_structure_label = record[8] 
        calcification_label = record[7]; texture_label = record[6];    spiculation_label = record[5] 
        margin_label = record[4];        sphericiy_label = record[3];  malignacy_label = record[2] 
        diameter_label = round(record[1],4) 
                     
        cube_image = load_cube_img(record[0], 8, 8, 64)

        if need_augment:
            wiggle_indent = crop_size / 4
            wiggle = 64 - crop_size - crop_size / 2 - 1
            indent_x = int(wiggle_indent + random.randint(0, wiggle))
            indent_y = int(wiggle_indent + random.randint(0, wiggle))
            indent_z = int(wiggle_indent + random.randint(0, wiggle))
        else:
            indent_x = int((64 - crop_size) / 2)
            indent_y = int((64 - crop_size) / 2)
            indent_z = int((64 - crop_size) / 2)

        cube_image = cube_image[indent_z:indent_z + crop_size, indent_y:indent_y + crop_size, indent_x:indent_x + crop_size]

        if need_augment:
            if random.randint(0, 100) > 50: cube_image = numpy.fliplr(cube_image)
            if random.randint(0, 100) > 50: cube_image = numpy.flipud(cube_image)
            if random.randint(0, 100) > 50: cube_image = cube_image[:, :, ::-1]
            if random.randint(0, 100) > 50: cube_image = cube_image[:, ::-1, :]
        
        img3d = normalize_and_expand(cube_image)
                
        img_list.append(img3d);                        diameter_list.append(diameter_label)  
        subtlety_list.append(subtlety_label);          lobulation_list.append(lobulation_label); internal_structure_list.append(internal_structure_label) 
        calcification_list.append(calcification_label);texture_list.append(texture_label);       spiculation_list.append(spiculation_label)  
        margin_list.append(margin_label);              sphericiy_list.append(sphericiy_label);   malignacy_list.append(malignacy_label)  
        
        batch_counter += 1
        
        if batch_counter >= batch_size:
            
            x = numpy.vstack(img_list);                     y_diamter = numpy.vstack(diameter_list)
            y_malignacy = numpy.vstack(malignacy_list);     y_sphericiy = numpy.vstack(sphericiy_list);   y_margin = numpy.vstack(margin_list)
            y_spiculation = numpy.vstack(spiculation_list); y_texture = numpy.vstack(texture_list);       y_calcification = numpy.vstack(calcification_list)
            y_subtlety = numpy.vstack(subtlety_list);       y_lobulation = numpy.vstack(lobulation_list); y_internal_structure = numpy.vstack(internal_structure_list)

            yield x, {"out_diamter": y_diamter, "out_malignancy": y_malignacy, "out_sphericiy": y_sphericiy, "out_margin": y_margin, "out_spiculation": y_spiculation, "out_texture": y_texture, "out_calcification": y_calcification, "out_internal_structure": y_internal_structure, "out_lobulation": y_lobulation, "out_subtlety": y_subtlety }
            
            subtlety_list = [];      lobulation_list = []; internal_structure_list = []
            calcification_list = []; texture_list = [];    spiculation_list = []
            margin_list = [];        sphericiy_list = [];  malignacy_list = []
            img_list = [];           diameter_list = [];   batch_counter = 0

def load_cube_img(src_path, rows, cols, size):
    
    img = cv2.imread(src_path, cv2.IMREAD_GRAYSCALE)
    res = numpy.zeros((rows * cols, size, size))

    img_height = size
    img_width = size

    for row in range(rows):
        
        for col in range(cols):
            
            src_y = row * img_height
            src_x = col * img_width
            res[row * cols + col] = img[src_y:src_y + img_height, src_x:src_x + img_width]

    return res

def normalize_and_expand(img):
    
    img = img.astype(numpy.float32)
    img -= 41  # 41 is MEAN_PIXEL_VALUE
    img /= 255.
    img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2], 1)
    
    return img

In [None]:
def step_decay(epoch):
    
    res = 0.001
    
    if epoch > 5:
        
        res = 0.0001
        
    print("learnrate: ", res, " epoch: ", epoch)
    
    return res

In [None]:
def get_net(input_shape=(32, 32, 32, 1), load_weight_path=None) -> Model: 
    
    inputs = Input(shape=input_shape, name="input_1")
    x = inputs
    x = AveragePooling3D(pool_size=(2, 1, 1), strides=(2, 1, 1), border_mode="same")(x)
    x = Convolution3D(64, 3, 3, 3, activation='relu', border_mode='same', name='conv1', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), border_mode='valid', name='pool1')(x)

    # 2nd layer group
    x = Convolution3D(128, 3, 3, 3, activation='relu', border_mode='same', name='conv2', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool2')(x)

    # 3rd layer group
    x = Convolution3D(256, 3, 3, 3, activation='relu', border_mode='same', name='conv3a', subsample=(1, 1, 1))(x)
    x = Convolution3D(256, 3, 3, 3, activation='relu', border_mode='same', name='conv3b', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool3')(x)

    # 4th layer group
    x = Convolution3D(512, 3, 3, 3, activation='relu', border_mode='same', name='conv4a', subsample=(1, 1, 1))(x)
    x = Convolution3D(512, 3, 3, 3, activation='relu', border_mode='same', name='conv4b', subsample=(1, 1, 1),)(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool4')(x)

    # output
    last64 = Convolution3D(64, 2, 2, 2, activation="relu", name="last_64")(x)
    out_attribute = Convolution3D(1, 1, 1, 1, activation=None, name="out_attribute_last")(last64)
    out_attribute = Flatten(name="out_texture")(out_attribute)

    model = Model(input=inputs, output=out_attribute)
    
    if load_weight_path is not None: model.load_weights(load_weight_path, by_name=False)

    optimizer = SGD(lr=0.001, momentum=0.9, nesterov=True)
    loss = {"out_texture": "mean_absolute_error"}
    metrics = {"out_texture": [mean_absolute_error]}

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    model.summary(line_length=140)

    return model