In [20]:
import os
import cv2
import glob
import random
import seaborn as sns
import numpy as np

from keras.utils import np_utils 
from keras.models import Model
from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler,ModelCheckpoint
from keras.metrics import categorical_accuracy, categorical_crossentropy
from keras.layers import Input, Convolution2D, MaxPooling2D, Flatten, Dense, Dropout

In [11]:
USE_DROPOUT = False
LEARN_RATE = 0.001
Height = 256
Weight = 256
Channel= 3

In [12]:
def get_train_holdout_files(train_percentage=90):
    
    print("Get train/holdout files.")
        
    src_dir = "D:/jupyter-notebook/LiverCancer/Image_Resized/"
    file_paths = []

    for fold in os .listdir(src_dir):
        file_paths = file_paths + [file_path for file_path in glob.glob(src_dir+fold+"/*.jpg")]
        
    random.shuffle(file_paths)
    print("Full Count: ",len(file_paths))

    #分割训练数据和测试数据
    train_count = int((len(file_paths) * train_percentage) / 100)
    train_samples = file_paths[:train_count]
    holdout_samples = file_paths[train_count:]
    print("Train Count: ", len(train_samples), ", Holdout Count: ", len(holdout_samples))

    #建立描述集合
    train_rep = []
    holdout_rep = []
    sets = [[train_rep, train_samples], [holdout_rep, holdout_samples]]

    for set_item in sets:

        rep = set_item[0]
        samples = set_item[1]

        for index, sample_path in enumerate(samples):

            if "grade0" in sample_path:
                sample_label = 0
            elif "grade1" in sample_path:
                sample_label = 1
            elif "grade2" in sample_path:
                sample_label = 2
            elif "grade3" in sample_path:
                sample_label = 3
            elif "grade4" in sample_path:
                sample_label = 4

            rep.append([sample_path, sample_label])

    print("Train Count: ", len(train_rep), ", Holdout Count: ", len(holdout_rep))

    return train_rep, holdout_rep

In [13]:
def data_generator(batch_size, record_list, train_set):
    
    while True:
        
        batch_index = 0
        image_list = []
        label_list = []

        if train_set:
            random.shuffle(record_list)

        #逐一遍历所有数据
        for index, record_item in enumerate(record_list):

            sample_path = record_item[0]
            sample_label = record_item[1]

            #转换成多分类标签
            sample_label = np_utils.to_categorical(sample_label,5)  

            #读取图片、修改尺寸、标准化
            sample_image = cv2.imread(sample_path)
            sample_image = (sample_image - np.average(sample_image)) / np.std(sample_image)
            sample_image = sample_image.reshape(1, sample_image.shape[0], sample_image.shape[1], 3)

            #数据增强
            if train_set:  
                if random.randint(0, 100) > 50:
                    sample_image = np.fliplr(sample_image)
                if random.randint(0, 100) > 50:
                    sample_image = np.flipud(sample_image)
                if random.randint(0, 100) > 50:
                    sample_image = sample_image[:,::-1]
                if random.randint(0, 100) > 50:
                    sample_image = sample_image[::-1, :]

            #添加数据
            image_list.append(sample_image)
            label_list.append(sample_label)
            batch_index += 1

            if batch_index >= batch_size:
                x = np.vstack(image_list)
                y = np.vstack(label_list)
                yield x, y
                image_list = []
                label_list = []
                batch_index = 0

In [14]:
train_files, holdout_files = get_train_holdout_files(train_percentage=90)

Get train/holdout files.
Full Count:  296
Train Count:  266 , Holdout Count:  30
Train Count:  266 , Holdout Count:  30


In [15]:
def get_net(input_shape=(Height, Weight, Channel), load_weight_path=None) -> Model:  #期待返回类型为model
    
    inputs = Input(shape=input_shape, name="input")
    x = inputs
    x = Convolution2D(32, 3, 3, activation='relu', border_mode='same', name='conv1a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool1')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)

    # 2nd layer group
    x = Convolution2D(32, 3, 3, activation='relu', border_mode='same', name='conv2a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool2')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)

    # 3rd layer group
    x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv3b', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool3')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)

    # 4th layer group
    x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv4b', subsample=(1, 1),)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool4')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
        
    # 5th layer group
    x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv5a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool5')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
        
    # 6th layer group
    x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv6a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool6')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
        
    # 7th layer group
    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv7a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool7')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
        
    # 8th layer group
    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv8a', subsample=(1, 1))(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool8')(x)
    if USE_DROPOUT:
        x = Dropout(p=0.3)(x)
    
    #输出
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    out_class = Dense(5, activation='softmax', name='out_class')(x)

    model = Model(input=inputs, output=out_class)
    
    if load_weight_path is not None:
        model.load_weights(load_weight_path, by_name=False)

    #编译模型
    model.compile(optimizer=SGD(lr=LEARN_RATE, momentum=0.9, nesterov=True), loss={ "out_class": "categorical_crossentropy" }, metrics={"out_class": [categorical_accuracy, categorical_crossentropy] } )
    model.summary(line_length=140)

    return model

In [16]:
get_net()

  """
  
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


____________________________________________________________________________________________________________________________________________
Layer (type)                                                   Output Shape                                            Param #              
input (InputLayer)                                             (None, 256, 256, 3)                                     0                    
____________________________________________________________________________________________________________________________________________
conv1a (Conv2D)                                                (None, 256, 256, 32)                                    896                  
____________________________________________________________________________________________________________________________________________
pool1 (MaxPooling2D)                                           (None, 128, 128, 32)                                    0                    
_____________

<keras.engine.training.Model at 0x1b0de202fd0>

In [19]:
model_name = "liver_CNN"
load_weights_path=None
batch_size = 8

#获得训练和测试集合，以：路径、class label的形式保存
train_files, holdout_files = get_train_holdout_files(train_percentage=90)

#训练数据集
train_gen = data_generator(batch_size, train_files, train_set=True)

#测试数据集
holdout_gen = data_generator(batch_size, holdout_files, train_set=False)

#动态设置学习率
learnrate_scheduler = LearningRateScheduler(step_decay)

#获取model
model = get_net(load_weight_path=load_weights_path)

checkpoint = ModelCheckpoint("workdir/model_" + model_name + "_"  + "_e" + "{epoch:02d}-{val_loss:.4f}.hd5", monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

model.fit_generator(generator=train_gen, samples_per_epoch=len(train_files), nb_epoch=10, verbose=1, validation_data=holdout_gen, nb_val_samples=len(holdout_files), class_weight="auto", callbacks=[checkpoint, learnrate_scheduler])

model.save("workdir/model_" + model_name + "_end.hd5")

Get train/holdout files.
Full Count:  296
Train Count:  266 , Holdout Count:  30
Train Count:  266 , Holdout Count:  30


  """
  
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


____________________________________________________________________________________________________________________________________________
Layer (type)                                                   Output Shape                                            Param #              
input (InputLayer)                                             (None, 256, 256, 3)                                     0                    
____________________________________________________________________________________________________________________________________________
conv1a (Conv2D)                                                (None, 256, 256, 32)                                    896                  
____________________________________________________________________________________________________________________________________________
pool1 (MaxPooling2D)                                           (None, 128, 128, 32)                                    0                    
_____________


Epoch 00006: saving model to workdir/model_liver_CNN__e06-0.1653.hd5
Epoch 7/10
learnrate:  0.001  epoch:  6

Epoch 00007: saving model to workdir/model_liver_CNN__e07-0.1425.hd5
Epoch 8/10
learnrate:  0.001  epoch:  7

Epoch 00008: saving model to workdir/model_liver_CNN__e08-0.0653.hd5
Epoch 9/10
learnrate:  0.001  epoch:  8

Epoch 00009: saving model to workdir/model_liver_CNN__e09-0.0099.hd5
Epoch 10/10
learnrate:  0.001  epoch:  9

Epoch 00010: saving model to workdir/model_liver_CNN__e10-0.0066.hd5


In [17]:
def step_decay(epoch):
    res = 0.001
    if epoch > 100:
        res = 0.0001
    print("learnrate: ", res, " epoch: ", epoch)
    return res