# Apert与正常群体间的二分类模型

In [15]:
import os
import numpy as np
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.callbacks import Callback
from tensorflow import keras
import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.keras import backend as K
from sklearn.metrics import f1_score, recall_score, precision_score

数据集路径

In [16]:
TRAIN_SET_DIR = './dataset/train'
TEST_SET_DIR = './dataset/test'

构造generator，避免一次读取全部图片导致占满内存

In [17]:
image_processor = ImageDataGenerator(validation_split=0.1)
train_generator = image_processor.flow_from_directory(
    directory=TRAIN_SET_DIR,
    target_size=(112, 96),
    batch_size=16,
    subset='training'
)
validation_generator = image_processor.flow_from_directory(
    directory=TRAIN_SET_DIR,
    target_size=(112, 96),
    batch_size=sum([
      len(os.listdir(os.path.join(TEST_SET_DIR, class_name)))
      for class_name in os.listdir(TEST_SET_DIR)
    ]),
    subset='validation'
)
test_generator = image_processor.flow_from_directory(
    directory=TEST_SET_DIR,
    target_size=(112, 96),
    batch_size=16
)

Found 535 images belonging to 4 classes.
Found 59 images belonging to 4 classes.
Found 64 images belonging to 4 classes.


加载经过训练的预训练MobileFaceNet模型

In [18]:
mobile_facenet = load_model('../../mobile_facenet.h5')



改造模型结构使其适应当前问题

In [19]:
top_layer = Dense(
    len(os.listdir(TRAIN_SET_DIR)),
    kernel_initializer='he_normal',
    activation='softmax'
)(mobile_facenet.output)

In [20]:
model = Model(inputs=mobile_facenet.input, outputs=top_layer)

增加checkpoint，保存中间结果

In [21]:
model_checkpoint_callback = ModelCheckpoint(
        'saved_models/top_layer_trained_weights.{epoch:02d}-{val_accuracy:.2f}.h5',
        monitor='val_accuracy',
        mode='max',
        verbose=1,
        save_best_only=True
    )

In [22]:
batches_per_epoch = train_generator.n // train_generator.batch_size

In [23]:
train_generator.n

535

使用tensorboard

In [24]:
tensorboard_callback = TensorBoard(
        log_dir='logs',
        batch_size=16
    )



监控准确率、查准率、查全率、auc

In [25]:
METRICS = [
  # keras.metrics.TruePositives(name='tp'),
  # keras.metrics.FalsePositives(name='fp'),
  # keras.metrics.TrueNegatives(name='tn'),
  # keras.metrics.FalseNegatives(name='fn'), 
  keras.metrics.BinaryAccuracy(name='accuracy'),
  keras.metrics.Precision(name='precision'),
  keras.metrics.Recall(name='recall'),
  keras.metrics.AUC(name='auc'),
]

使用focal loss作为损失函数，来抑制样本不均衡带来的不良影响

In [26]:
# focal loss with multi label
def focal_loss(classes_num, gamma=2., alpha=.25, e=0.1):
    # classes_num contains sample number of each classes
    def focal_loss_fixed(target_tensor, prediction_tensor):
        '''
        prediction_tensor is the output tensor with shape [None, 100], where 100 is the number of classes
        target_tensor is the label tensor, same shape as predcition_tensor
        '''

        #1# get focal loss with no balanced weight which presented in paper function (4)
        zeros = array_ops.zeros_like(prediction_tensor, dtype=prediction_tensor.dtype)
        one_minus_p = array_ops.where(tf.greater(target_tensor,zeros), target_tensor - prediction_tensor, zeros)
        FT = -1 * (one_minus_p ** gamma) * tf.math.log(tf.clip_by_value(prediction_tensor, 1e-8, 1.0))

        #2# get balanced weight alpha
        classes_weight = array_ops.zeros_like(prediction_tensor, dtype=prediction_tensor.dtype)

        total_num = float(sum(classes_num))
        classes_w_t1 = [ total_num / ff for ff in classes_num ]
        sum_ = sum(classes_w_t1)
        classes_w_t2 = [ ff/sum_ for ff in classes_w_t1 ]   #scale
        classes_w_tensor = tf.convert_to_tensor(classes_w_t2, dtype=prediction_tensor.dtype)
        classes_weight += classes_w_tensor

        alpha = array_ops.where(tf.greater(target_tensor, zeros), classes_weight, zeros)

        #3# get balanced focal loss
        balanced_fl = alpha * FT
        balanced_fl = tf.reduce_mean(balanced_fl)

        #4# add other op to prevent overfit
        # reference : https://spaces.ac.cn/archives/4493
        nb_classes = len(classes_num)
        fianal_loss = (1-e) * balanced_fl + e * K.categorical_crossentropy(K.ones_like(prediction_tensor)/nb_classes, prediction_tensor)

        return fianal_loss
    return focal_loss_fixed

编译模型

In [27]:
model.compile(
        loss=focal_loss([len(os.listdir(os.path.join(TRAIN_SET_DIR, class_name))) for class_name in os.listdir(TRAIN_SET_DIR)]),
        optimizer='adam',
        metrics = METRICS
    )

对模型进行训练

In [14]:
history = model.fit(
    train_generator,
    steps_per_epoch=batches_per_epoch,
    epochs=50,
    callbacks=[
            # mae_callback,
            # early_stopping_callback,
            # metrics,
            model_checkpoint_callback,
            tensorboard_callback
        ],
    validation_data=validation_generator
)

Epoch 1/50
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: val_accuracy improved from -inf to 0.76071, saving model to saved_models\top_layer_trained_weights.01-0.76.h5
Epoch 2/50
Epoch 00002: val_accuracy did not improve from 0.76071
Epoch 3/50
Epoch 00003: val_accuracy did not improve from 0.76071
Epoch 4/50
Epoch 00004: val_accuracy did not improve from 0.76071
Epoch 5/50
Epoch 00005: val_accuracy improved from 0.76071 to 0.78571, saving model to saved_models\top_layer_trained_weights.05-0.79.h5
Epoch 6/50
Epoch 00006: val_accuracy did not improve from 0.78571
Epoch 7/50
Epoch 00007: val_accuracy did not improve from 0.78571
Epoch 8/50
Epoch 00008: val_accuracy did not improve from 0.78571
Epoch 9/50
Epoch 00009: val_accuracy improved from 0.78571 to 0.78929, saving model to saved_models\top_layer_trained_weights.09-0.79.h5
Epoch 10/50
Epoch 00010: val_accuracy did not improve from 0.78929
Epoch 11/50
Epoch 00011: val_accuracy improved from 0.789

KeyboardInterrupt: 

打印模型结构

In [28]:
model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 112, 96, 3)] 0                                            
__________________________________________________________________________________________________
zero_padding2d (ZeroPadding2D)  (None, 114, 98, 3)   0           input_1[0][0]                    
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 56, 48, 64)   1728        zero_padding2d[0][0]             
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 56, 48, 64)   256         conv2d[0][0]                     
_______________________________________________________________________________________

评估模型效果

In [16]:
model.evaluate_generator(test_generator, verbose=1)

Instructions for updating:
Please use Model.evaluate, which supports generators.


[0.786801815032959, 0.7901639342308044, 0.0, 0.0, 0.623353898525238]

保存模型

In [17]:
model.save('./mutil-disease-cnn.h5')