In [3]:
import os
import cv2
import glob
import ntpath
import random
import warnings
import numpy as np
import pandas as pd
import seaborn as sns

from keras.utils import np_utils 
from keras.models import Model
from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler,ModelCheckpoint
from keras.metrics import categorical_accuracy, categorical_crossentropy
from keras.layers import Input, Convolution2D, MaxPooling2D, Flatten, Dense, Dropout

warnings.filterwarnings("ignore")

In [4]:
USE_DROPOUT = False
LEARN_RATE = 0.001
Height = 128
Weight = 128
Channel= 3

In [5]:
def get_train_holdout_files(current_iteration = 1):
    
    ###############################读取biopsy图像###########################################################
    print("Get train/holdout files.")
        
    src_dir = "D:/jupyter-notebook/LiverCancer/Data_Description/" + str(current_iteration) +"/"

    #分割训练数据和测试数据  
    train_samples = pd.read_csv(src_dir + "train.csv")["file_path"].tolist()
    holdout_samples = pd.read_csv(src_dir + "holdout.csv")["file_path"].tolist()
    print("Train Count: ", len(train_samples), ", Holdout Count: ", len(holdout_samples))
    
    ###############################读取biopsy_mask图像#######################################################
    src_dir = "D:/jupyter-notebook/LiverCancer/Mask_Resized_128/"
    biopsy_masks = []

    for fold in os .listdir(src_dir):
        biopsy_masks = biopsy_masks + [file_path for file_path in glob.glob(src_dir+fold+"/*.jpg")]
        
    print("Mask Full Count: ",len(biopsy_masks))
    
    ##########################################################################################################
    train_mask_samples = []
    holdout_mask_samples = []

    for index, train_sample in enumerate(train_samples):
        temp1 = ntpath.basename(train_sample)
        for index, biopsy_mask in enumerate(biopsy_masks):
            temp2 = ntpath.basename(biopsy_mask)
            if temp1 == temp2:
                train_mask_samples += [biopsy_mask]
                
    for index, holdout_sample in enumerate(holdout_samples):
        temp1 = ntpath.basename(holdout_sample)
        for index, biopsy_mask in enumerate(biopsy_masks):
            temp2 = ntpath.basename(biopsy_mask)
            if temp1 == temp2:
                holdout_mask_samples += [biopsy_mask]
        
    ################################建立描述集合###############################################################
    train_rep = []
    holdout_rep = []
    sets = [[train_rep, train_mask_samples], [holdout_rep, holdout_mask_samples]]

    for set_item in sets:

        rep = set_item[0]
        samples = set_item[1]

        for index, sample_path in enumerate(samples):

            if "grade0" in sample_path:
                sample_label = 0
            elif "grade1" in sample_path:
                sample_label = 1
            elif "grade2" in sample_path:
                sample_label = 2
            elif "grade3" in sample_path:
                sample_label = 3
            elif "grade4" in sample_path:
                sample_label = 4

            rep.append([sample_path, sample_label])

    print("Train Count: ", len(train_rep), ", Holdout Count: ", len(holdout_rep))

    return train_rep, holdout_rep

In [6]:
def data_generator(batch_size, record_list, train_set):
    
    while True:
        
        batch_index = 0
        image_list = []
        label_list = []

        if train_set:
            random.shuffle(record_list)

        #逐一遍历所有数据
        for index, record_item in enumerate(record_list):

            sample_path = record_item[0]
            sample_label = record_item[1]

            #转换成多分类标签
            sample_label = np_utils.to_categorical(sample_label,5)  

            #读取图片、修改尺寸、标准化
            sample_image = cv2.imread(sample_path)
            sample_image = (sample_image - np.average(sample_image)) / np.std(sample_image)
            sample_image = sample_image.reshape(1, sample_image.shape[0], sample_image.shape[1], 3)

            #数据增强
            if train_set:   
                if random.randint(0, 100) > 50:
                    sample_image = np.fliplr(sample_image)
                if random.randint(0, 100) > 50:
                    sample_image = np.flipud(sample_image)
                if random.randint(0, 100) > 50:
                    sample_image = sample_image[:,::-1]
                if random.randint(0, 100) > 50:
                    sample_image = sample_image[::-1, :]

            #添加数据
            image_list.append(sample_image)
            label_list.append(sample_label)
            batch_index += 1

            if batch_index >= batch_size:
                x = np.vstack(image_list)
                y = np.vstack(label_list)
                yield x, y
                image_list = []
                label_list = []
                batch_index = 0

In [7]:
train_files, holdout_files = get_train_holdout_files(current_iteration = 1)

Get train/holdout files.
Train Count:  266 , Holdout Count:  30
Mask Full Count:  296
Train Count:  266 , Holdout Count:  30


In [8]:
def get_net(input_shape=(Height, Weight, Channel), load_weight_path=None) -> Model:  #期待返回类型为model
    
    inputs = Input(shape=input_shape, name="input")
    x = inputs
    x = Convolution2D(32, 3, 3, activation='relu', border_mode='same', name='conv1a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool1')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)

    # 2nd layer group
    x = Convolution2D(32, 3, 3, activation='relu', border_mode='same', name='conv2a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool2')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)

    # 3rd layer group
    x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv3b', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool3')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)

    # 4th layer group
    x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv4b', subsample=(1, 1),)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool4')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
        
    # 5th layer group
    x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv5a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool5')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
        
    # 6th layer group
    x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv6a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool6')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
        
    # 7th layer group
    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv7a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool7')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
        
#     # 8th layer group
#     x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv8a', subsample=(1, 1))(x)
#     x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool8')(x)
#     if USE_DROPOUT:
#         x = Dropout(p=0.3)(x)
    
    #输出
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    out_class = Dense(5, activation='softmax', name='out_class')(x)

    model = Model(input=inputs, output=out_class)
    
    if load_weight_path is not None:
        model.load_weights(load_weight_path, by_name=False)

    #编译模型
    model.compile(optimizer=SGD(lr=LEARN_RATE, momentum=0.9, nesterov=True), loss={ "out_class": "categorical_crossentropy" }, metrics={"out_class": [categorical_accuracy, categorical_crossentropy] } )
    model.summary(line_length=140)

    return model

In [9]:
get_net()

____________________________________________________________________________________________________________________________________________
Layer (type)                                                   Output Shape                                            Param #              
input (InputLayer)                                             (None, 128, 128, 3)                                     0                    
____________________________________________________________________________________________________________________________________________
conv1a (Conv2D)                                                (None, 128, 128, 32)                                    896                  
____________________________________________________________________________________________________________________________________________
pool1 (MaxPooling2D)                                           (None, 64, 64, 32)                                      0                    
_____________

<keras.engine.training.Model at 0x26e7f86cf28>

In [12]:
model_name = "liver_CNN"
load_weights_path=None
batch_size = 8

for i in range(10):

    model_name_i =model_name + "_" + str(i+1)
    
    #获得训练和测试集合，以：路径、class label的形式保存
    train_files, holdout_files = get_train_holdout_files( current_iteration = (i+1) )

    #训练数据集
    train_gen = data_generator(batch_size, train_files, train_set=True)

    #测试数据集
    holdout_gen = data_generator(batch_size, holdout_files, train_set=False)

    #动态设置学习率
    learnrate_scheduler = LearningRateScheduler(step_decay)

    #获取model
    model = get_net(load_weight_path=load_weights_path)

    checkpoint = ModelCheckpoint("workdir/model_" + model_name_i + "_"  + "_e" + "{epoch:02d}-{val_loss:.4f}.hd5", monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

    checkpoint_best = ModelCheckpoint("workdir/model_" + model_name_i + "_"  + "_best.hd5", monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    
    model.fit_generator(generator=train_gen, samples_per_epoch=len(train_files), nb_epoch=10, verbose=1, validation_data=holdout_gen, nb_val_samples=len(holdout_files), class_weight="auto", callbacks=[checkpoint, checkpoint_best, learnrate_scheduler])

    model.save("workdir/model_" + model_name_i + "_end.hd5")

Get train/holdout files.
Train Count:  266 , Holdout Count:  30
Mask Full Count:  296
Train Count:  266 , Holdout Count:  30
____________________________________________________________________________________________________________________________________________
Layer (type)                                                   Output Shape                                            Param #              
input (InputLayer)                                             (None, 128, 128, 3)                                     0                    
____________________________________________________________________________________________________________________________________________
conv1a (Conv2D)                                                (None, 128, 128, 32)                                    896                  
____________________________________________________________________________________________________________________________________________
pool1 (MaxPooling2D)         

Epoch 6/10
learnrate:  0.001  epoch:  5

Epoch 00006: saving model to workdir/model_liver_CNN_1__e06-0.4963.hd5

Epoch 00006: val_loss did not improve from 0.47900
Epoch 7/10
learnrate:  0.001  epoch:  6

Epoch 00007: saving model to workdir/model_liver_CNN_1__e07-0.3909.hd5

Epoch 00007: val_loss improved from 0.47900 to 0.39093, saving model to workdir/model_liver_CNN_1__best.hd5
Epoch 8/10
learnrate:  0.001  epoch:  7

Epoch 00008: saving model to workdir/model_liver_CNN_1__e08-0.4091.hd5

Epoch 00008: val_loss did not improve from 0.39093
Epoch 9/10
learnrate:  0.001  epoch:  8

Epoch 00009: saving model to workdir/model_liver_CNN_1__e09-0.4378.hd5

Epoch 00009: val_loss did not improve from 0.39093
Epoch 10/10
learnrate:  0.001  epoch:  9

Epoch 00010: saving model to workdir/model_liver_CNN_1__e10-0.2287.hd5

Epoch 00010: val_loss improved from 0.39093 to 0.22866, saving model to workdir/model_liver_CNN_1__best.hd5
Get train/holdout files.
Train Count:  266 , Holdout Count:  30
M


Epoch 00001: saving model to workdir/model_liver_CNN_2__e01-1.2191.hd5

Epoch 00001: val_loss improved from inf to 1.21906, saving model to workdir/model_liver_CNN_2__best.hd5
Epoch 2/10
learnrate:  0.001  epoch:  1

Epoch 00002: saving model to workdir/model_liver_CNN_2__e02-0.9929.hd5

Epoch 00002: val_loss improved from 1.21906 to 0.99293, saving model to workdir/model_liver_CNN_2__best.hd5
Epoch 3/10
learnrate:  0.001  epoch:  2

Epoch 00003: saving model to workdir/model_liver_CNN_2__e03-0.6635.hd5

Epoch 00003: val_loss improved from 0.99293 to 0.66353, saving model to workdir/model_liver_CNN_2__best.hd5
Epoch 4/10
learnrate:  0.001  epoch:  3

Epoch 00004: saving model to workdir/model_liver_CNN_2__e04-0.8550.hd5

Epoch 00004: val_loss did not improve from 0.66353
Epoch 5/10
learnrate:  0.001  epoch:  4

Epoch 00005: saving model to workdir/model_liver_CNN_2__e05-1.1891.hd5

Epoch 00005: val_loss did not improve from 0.66353
Epoch 6/10
learnrate:  0.001  epoch:  5

Epoch 00006:

Epoch 1/10
learnrate:  0.001  epoch:  0

Epoch 00001: saving model to workdir/model_liver_CNN_3__e01-1.2426.hd5

Epoch 00001: val_loss improved from inf to 1.24256, saving model to workdir/model_liver_CNN_3__best.hd5
Epoch 2/10
learnrate:  0.001  epoch:  1

Epoch 00002: saving model to workdir/model_liver_CNN_3__e02-0.8472.hd5

Epoch 00002: val_loss improved from 1.24256 to 0.84724, saving model to workdir/model_liver_CNN_3__best.hd5
Epoch 3/10
learnrate:  0.001  epoch:  2

Epoch 00003: saving model to workdir/model_liver_CNN_3__e03-1.0537.hd5

Epoch 00003: val_loss did not improve from 0.84724
Epoch 4/10
learnrate:  0.001  epoch:  3

KeyboardInterrupt: 

In [11]:
def step_decay(epoch):
    res = 0.001
    if epoch > 100:
        res = 0.0001
    print("learnrate: ", res, " epoch: ", epoch)
    return res