In [None]:
import os
import cv2
import glob
import ntpath
import random
import numpy as np
import pandas as pd
import seaborn as sns
from imblearn import over_sampling
from keras.models import Model
from keras.optimizers import SGD
from keras.utils import np_utils 
from keras.layers.advanced_activations import ReLU
from keras.callbacks import LearningRateScheduler,ModelCheckpoint
from keras.metrics import categorical_accuracy, categorical_crossentropy
from keras.layers import Input,AveragePooling2D,Convolution2D,BatchNormalization,MaxPooling2D,Concatenate,GlobalMaxPooling2D,Dense,concatenate

In [None]:
batch_size = 16

trainset, testset = load_data()

train_gen = data_generator(batch_size, trainset, need_augment=True)

test_gen = data_generator(batch_size, testset, need_augment=False)

learnrate_scheduler = LearningRateScheduler(step_decay)

model = get_net(load_weight_path=None)

checkpoint = ModelCheckpoint("G:/LiverCancer/model/" + "{epoch:02d}-{val_loss:.4f}.hd5", monitor='val_loss', period=1)

model.fit_generator(train_gen, int(len(trainset)/batch_size), 10, validation_data=test_gen, nb_val_samples=int(len(testset)/batch_size), callbacks=[checkpoint, learnrate_scheduler])

model.save("G:/LiverCancer/model/the_end.hd5")

In [None]:
def load_data():

    split_rule = 1
    
    src_path = "G:/LiverCancer/resources/split_rule/" + str(split_rule) +"/"

    train_biopsy_names = pd.read_csv(src_path + "train.csv")["file_path"].apply(ntpath.basename).tolist()
    test_biopsy_names = pd.read_csv(src_path + "holdout.csv")["file_path"].apply(ntpath.basename).tolist()
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    src_path = "G:/LiverCancer/masked_biopsy_resized/"

    biopsy_masks = [file_path for fold in os.listdir(src_path) for file_path in glob.glob(os.path.join(src_path, fold, "*.jpg"))]

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    src_path = "G:/LiverCancer/nucleus/image_filtered/"
    
    nucleus_samples = [file for fold in os.listdir(src_path) for file in glob.glob(os.path.join(src_path, fold, '1', "*.jpg"))]
    
    train_nucleus_samples = []

    for index, nucleus_sample in enumerate(nucleus_samples):

        temp = ntpath.basename(nucleus_sample).split("_")
        if (temp[0] + temp[1]) == "grade0": nucleus_sample_from = "ImageN" + "." + temp[2] + ".jpg"
        if (temp[0] + temp[1]) == "grade1": nucleus_sample_from = "Image1" + "-" + temp[2] + ".jpg"
        if (temp[0] + temp[1]) == "grade2": nucleus_sample_from = "Image2" + "-" + temp[2] + ".jpg"
        if (temp[0] + temp[1]) == "grade3": nucleus_sample_from = "Image3" + "-" + temp[2] + ".jpg"
        if (temp[0] + temp[1]) == "grade4": nucleus_sample_from = "Image4" + "-" + temp[2] + ".jpg"

        if nucleus_sample_from in train_biopsy_names: train_nucleus_samples += [nucleus_sample]

    src_path = "G:/LiverCancer/nucleus/image/"

    nucleus_samples = [file for fold_1 in os.listdir(src_path)
                            for fold_2 in os.listdir(os.path.join(src_path, fold_1))
                            for file in glob.glob(os.path.join(src_path, fold_1, fold_2, "*.jpg"))]

    test_nucleus_samples = [s for s in nucleus_samples if (s.split(".jpg")[0][-9:]+".jpg") in test_biopsy_names]

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    train_samples, test_samples = [], []

    for index, train_nucleus_sample in enumerate(train_nucleus_samples):
        
        temp = ntpath.basename(train_nucleus_sample).split("_")
        if temp[0]+temp[1] == "grade0": biopsy_mask_from = "ImageN."+temp[2]+".jpg"
        if temp[0]+temp[1] == "grade1": biopsy_mask_from = "Image1-"+temp[2]+".jpg"
        if temp[0]+temp[1] == "grade2": biopsy_mask_from = "Image2-"+temp[2]+".jpg"
        if temp[0]+temp[1] == "grade3": biopsy_mask_from = "Image3-"+temp[2]+".jpg"
        if temp[0]+temp[1] == "grade4": biopsy_mask_from = "Image4-"+temp[2]+".jpg"
            
        for biopsy_mask in biopsy_masks:
            
            if biopsy_mask_from in biopsy_mask: biopsy_mask_from = biopsy_mask
                
        if "grade0" in train_nucleus_sample: train_samples += [ [train_nucleus_sample, biopsy_mask_from, 0] ]
        if "grade1" in train_nucleus_sample: train_samples += [ [train_nucleus_sample, biopsy_mask_from, 1] ]
        if "grade2" in train_nucleus_sample: train_samples += [ [train_nucleus_sample, biopsy_mask_from, 2] ]
        if "grade3" in train_nucleus_sample: train_samples += [ [train_nucleus_sample, biopsy_mask_from, 3] ]
        if "grade4" in train_nucleus_sample: train_samples += [ [train_nucleus_sample, biopsy_mask_from, 4] ]

    for index, test_nucleus_sample in enumerate(test_nucleus_samples):

        temp = ntpath.basename(test_nucleus_sample).split("_")
        if temp[0]+temp[1] == "grade0": biopsy_mask_from = "ImageN."+temp[2]+".jpg"
        if temp[0]+temp[1] == "grade1": biopsy_mask_from = "Image1-"+temp[2]+".jpg"
        if temp[0]+temp[1] == "grade2": biopsy_mask_from = "Image2-"+temp[2]+".jpg"
        if temp[0]+temp[1] == "grade3": biopsy_mask_from = "Image3-"+temp[2]+".jpg"
        if temp[0]+temp[1] == "grade4": biopsy_mask_from = "Image4-"+temp[2]+".jpg"
            
        for biopsy_mask in biopsy_masks:
            
            if biopsy_mask_from in biopsy_mask: biopsy_mask_from = biopsy_mask

        if "grade0" in test_nucleus_sample: test_samples += [ [test_nucleus_sample, biopsy_mask_from, 0] ]
        if "grade1" in test_nucleus_sample: test_samples += [ [test_nucleus_sample, biopsy_mask_from, 1] ]
        if "grade2" in test_nucleus_sample: test_samples += [ [test_nucleus_sample, biopsy_mask_from, 2] ]
        if "grade3" in test_nucleus_sample: test_samples += [ [test_nucleus_sample, biopsy_mask_from, 3] ]
        if "grade4" in test_nucleus_sample: test_samples += [ [test_nucleus_sample, biopsy_mask_from, 4] ]
            
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    train_samples_x = np.array(train_samples)[:,0:2]
    train_samples_y = np.array(train_samples)[:,2].reshape(-1,1)

    ros = over_sampling.RandomOverSampler(random_state=0)
    train_samples_x, train_samples_y = ros.fit_resample(train_samples_x, train_samples_y)
   
    sns.countplot(train_samples_y)

    train_samples = np.hstack((train_samples_x, train_samples_y.reshape(-1,1))).tolist()

    random.shuffle(train_samples)
    random.shuffle(test_samples)

    return train_samples, test_samples

In [None]:
def data_generator(batch_size, record_list, need_augment):
        
    batch_counter, image_list, mask_list, label_list = 0, [], [], []
   
    if need_augment: random.shuffle(record_list)

    #按照batch_size动态生成数据
    for index, record in enumerate(record_list):

        sample_path = record[0]
        mask_path = record[1]
        sample_label = record[2]

        #转换成多分类标签
        sample_label = np_utils.to_categorical(sample_label,5)  

        #读取图片、修改尺寸、标准化
        sample_image = cv2.imread(sample_path)
        sample_image = (sample_image - np.average(sample_image)) / np.std(sample_image)
        sample_image = sample_image.reshape(1, sample_image.shape[0], sample_image.shape[1], 3)
        
        #读取mask、修改尺寸、标准化
        mask_image = cv2.imread(mask_path)
        indent_x = random.randint(0, 128)
        indent_y = random.randint(0, 128)       
        mask_image = mask_image[indent_x:indent_x + 128, indent_y:indent_y + 128]
        mask_image = (mask_image - np.average(mask_image)) / np.std(mask_image)
        mask_image = mask_image.reshape(1, mask_image.shape[0], mask_image.shape[1], 3)
        
        if need_augment:  
            if random.randint(0, 100) > 50: sample_image = np.fliplr(sample_image)
            if random.randint(0, 100) > 50: sample_image = np.flipud(sample_image)
            if random.randint(0, 100) > 50: sample_image = sample_image[:,::-1]
            if random.randint(0, 100) > 50: sample_image = sample_image[::-1, :]
                
            if random.randint(0, 100) > 50: mask_image = np.fliplr(mask_image)
            if random.randint(0, 100) > 50: mask_image = np.flipud(mask_image)
            if random.randint(0, 100) > 50: mask_image = mask_image[:,::-1]
            if random.randint(0, 100) > 50: mask_image = mask_image[::-1, :]

        image_list.append(sample_image); mask_list.append(mask_image); label_list.append(sample_label)
        
        batch_counter += 1

        if batch_counter >= batch_size:
            
            x_sample = np.vstack(image_list); x_mask = np.vstack(mask_list); y = np.vstack(label_list)
            
            yield {"sample_image":x_sample, "mask_image":x_mask}, y
            
            image_list = []; mask_list = []; label_list = []
            
            batch_counter = 0

In [None]:
def step_decay(epoch):
    
    res = 0.001
    
    if epoch > 10:
        
        res = 0.0001
        
    print("learnrate: ", res, " epoch: ", epoch)
    
    return res

In [None]:
def get_net(input_shape=(128, 128, 3), load_weight_path=None) -> Model:  
    
    inputs = Input(shape=input_shape, name="sample_image")
    inputs_mask = Input(shape=input_shape, name="mask_image")
    x = inputs
    x_mask = inputs_mask
    
    ##################################################################################################################
    x_ident_1 = x
    x_ident_1 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_ident_1)
    # 1st layer group
    x = Convolution2D(16, 3, 3, activation=None, border_mode='same', name='conv1a', subsample=(1, 1))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Convolution2D(16, 3, 3, activation=None, border_mode='same', name='conv1b', subsample=(1, 1))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool1')(x)
    x = Concatenate(axis=3)([x,x_ident_1])
    
    x_ident_1 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_ident_1)
    x_ident_2 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x)
    # 2nd layer group
    x = Convolution2D(32, 3, 3, activation=None, border_mode='same', name='conv2a', subsample=(1, 1))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Convolution2D(32, 3, 3, activation=None, border_mode='same', name='conv2b', subsample=(1, 1))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool2')(x)
    x = Concatenate(axis=3)([x,x_ident_1,x_ident_2])

    x_ident_1 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_ident_1)
    x_ident_2 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_ident_2)
    x_ident_3 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x)
    # 3rd layer group
    x = Convolution2D(64, 3, 3, activation=None, border_mode='same', name='conv3a', subsample=(1, 1))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Convolution2D(64, 3, 3, activation=None, border_mode='same', name='conv3b', subsample=(1, 1))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool3')(x)
    x = Concatenate(axis=3)([x,x_ident_1,x_ident_2,x_ident_3])
     
    x_ident_1 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_ident_1)
    x_ident_2 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_ident_2)
    x_ident_3 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_ident_3)
    x_ident_4 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x)
    # 4th layer group
    x = Convolution2D(128, 3, 3, activation=None, border_mode='same', name='conv4a', subsample=(1, 1),)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Convolution2D(128, 3, 3, activation=None, border_mode='same', name='conv4b', subsample=(1, 1),)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool4')(x)
    x = Concatenate(axis=3)([x,x_ident_1,x_ident_2,x_ident_3,x_ident_4])
    
    x = GlobalMaxPooling2D()(x)
    x = BatchNormalization(name="final_features_344")(x)
    
    ##################################################################################################################
    x_mask_ident_1 = x_mask
    x_mask_ident_1 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_mask_ident_1)
    # 1st layer group
    x_mask = Convolution2D(8, 3, 3, activation=None, border_mode='same', name='conv1_mask', subsample=(1, 1))(x_mask)
    x_mask = BatchNormalization()(x_mask)
    x_mask = ReLU()(x_mask)
    x_mask = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool1_mask')(x_mask)
    x_mask = Concatenate(axis=3)([x_mask,x_mask_ident_1])
    
    x_mask_ident_1 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_mask_ident_1)
    x_mask_ident_2 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_mask)
    # 2nd layer group
    x_mask = Convolution2D(16, 3, 3, activation=None, border_mode='same', name='conv2_mask', subsample=(1, 1))(x_mask)
    x_mask = BatchNormalization()(x_mask)
    x_mask = ReLU()(x_mask)
    x_mask = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool2_mask')(x_mask)
    x_mask = Concatenate(axis=3)([x_mask,x_mask_ident_1,x_mask_ident_2])

    x_mask_ident_1 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_mask_ident_1)
    x_mask_ident_2 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_mask_ident_2)
    x_mask_ident_3 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid')(x_mask)
    # 3rd layer group
    x_mask = Convolution2D(32, 3, 3, activation=None, border_mode='same', name='conv3_mask', subsample=(1, 1))(x_mask)
    x_mask = BatchNormalization()(x_mask)
    x_mask = ReLU()(x_mask)
    x_mask = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool3_mask')(x_mask)
    x_mask = Concatenate(axis=3)([x_mask,x_mask_ident_1,x_mask_ident_2,x_mask_ident_3])

    x_mask = GlobalMaxPooling2D()(x_mask)
    x_mask = BatchNormalization(name="final_features_344_mask")(x_mask)
    
    ##################################################################################################################
    x = concatenate([x,x_mask])
        
    x = Dense(64, activation='relu', name="final_features_64")(x)
    out_class = Dense(5, activation='softmax', name='out_class')(x)

    model = Model(input=[inputs,inputs_mask], output=out_class)
    
    if load_weight_path is not None: model.load_weights(load_weight_path, by_name=False)

    optimizer = SGD(lr=0.001, momentum=0.9, nesterov=True)
    loss = {"out_class": "categorical_crossentropy"}
    metrics={"out_class": [categorical_accuracy, categorical_crossentropy]}

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    model.summary(line_length=140)

    return model