# 猫狗大战

In [1]:
import tensorflow as tf

In [2]:
# set gpu for training
gpu = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

## 1. 定义相关路径和参数

In [3]:
data = '../data/dogVScat/'
train_path = 'train/'
val_path = 'val/'
test_path = 'test/'

In [4]:
size, train_batch_size, val_batch_size = (150, 150), 32, 8

## 2. 定义图像生成器

In [5]:
img_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255) # 对原始像素做像素值归一化

## 3. 使用图像生成器导入数据

### 3.1 定义训练集生成器

In [6]:
train_generator = img_generator.flow_from_directory(
    directory=data+train_path, # 图片路径
    target_size=size, # 训练大小
    class_mode='binary', # 二分类任务
    batch_size=train_batch_size # 生成器一次生成多少个图片
)

Found 20000 images belonging to 2 classes.


### 3.2 定义验证集生成器

In [7]:
val_generator = img_generator.flow_from_directory(
    directory=data+val_path, # 图片路径
    target_size=size, # 生成的尺寸
    class_mode='binary', # 二分类
    batch_size=val_batch_size
)

Found 2500 images belonging to 2 classes.


### 3.3 生成一个展示生成器的结果

In [8]:
res = next(train_generator)
res[0].shape

(32, 150, 150, 3)

In [9]:
res[1], res[1].shape # 取出标签数据

(array([0., 1., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 1.,
        0., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 1.],
       dtype=float32),
 (32,))

## 4. 模型搭建

In [10]:
model = tf.keras.Sequential(name='LeNet5')

In [11]:
model.add(tf.keras.layers.Conv2D(filters=6, kernel_size=(5, 5), strides=(1, 1), input_shape=(150, 150, 3), activation='relu'))
model.add(tf.keras.layers.MaxPool2D()) # 最大池化层
model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=(5, 5), strides=(1, 1), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Flatten())

In [12]:
model.add(tf.keras.layers.Dense(units=120, activation='relu'))
model.add(tf.keras.layers.Dense(units=84, activation='relu'))
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## 5. 模型的编译

In [13]:
model.compile(optimizer='adam', loss=tf.keras.losses.binary_crossentropy, metrics=tf.keras.metrics.binary_accuracy)

## 6. 模型的训练

In [14]:
history = model.fit(train_generator, steps_per_epoch=100, epochs=10, verbose=1, validation_data=val_generator, validation_steps=8)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## 7. 回调函数

In [15]:
model_check_point = tf.keras.callbacks.ModelCheckpoint(
    filepath='../models/best_model.h5', 
    monitor='val_binary_accuracy', 
    verbose=1, 
    save_best_only=True, # 仅保存最优模型
    save_weights_only=False, # 是否仅保存权重
    mode='auto' # 设置检测参数的最优方向
)

In [16]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_binary_accuracy',
    min_delta=0, # 设置最小增量
    patience=10, # 为改善的时间数
    verbose=1,
    mode='auto'
)

In [17]:
# 设置权重衰减
def scheduler(epoch):
    if epoch % 5 == 0 and epoch != 0:
        lr = tf.keras.backend.get_value(model.optimizer.lr)
        tf.keras.backend.set_value(model.optimizer.lr, lr*0.9)
        print('lr changed to {}'.format(lr*0.9))
    return tf.keras.backend.get_value(model.optimizer.lr)
learning_rate_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)

In [18]:
# 保存训练日志
csv_logger = tf.keras.callbacks.CSVLogger('../logs/training.log', separator=',', append=False)
callbacks = [model_check_point, early_stopping, learning_rate_scheduler, csv_logger]

In [19]:
model = tf.keras.Sequential(name='LeNet5')
model.add(tf.keras.layers.Conv2D(filters=6, kernel_size=(5, 5), strides=(1, 1), input_shape=(150, 150, 3), activation='relu'))
model.add(tf.keras.layers.MaxPool2D()) # 最大池化层
model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=(5, 5), strides=(1, 1), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units=120, activation='relu'))
model.add(tf.keras.layers.Dense(units=84, activation='relu'))
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [20]:
model.compile(optimizer='adam', loss=tf.keras.losses.binary_crossentropy, metrics=tf.keras.metrics.binary_accuracy)

In [21]:
history = model.fit(train_generator, steps_per_epoch=100, epochs=10, verbose=1, validation_data=val_generator, validation_steps=8, callbacks=callbacks)


Epoch 00001: LearningRateScheduler reducing learning rate to 0.001.
Epoch 1/10
Epoch 00001: val_binary_accuracy improved from -inf to 0.54688, saving model to ../models\best_model.h5

Epoch 00002: LearningRateScheduler reducing learning rate to 0.001.
Epoch 2/10
Epoch 00002: val_binary_accuracy improved from 0.54688 to 0.75000, saving model to ../models\best_model.h5

Epoch 00003: LearningRateScheduler reducing learning rate to 0.001.
Epoch 3/10
Epoch 00003: val_binary_accuracy did not improve from 0.75000

Epoch 00004: LearningRateScheduler reducing learning rate to 0.001.
Epoch 4/10
Epoch 00004: val_binary_accuracy did not improve from 0.75000

Epoch 00005: LearningRateScheduler reducing learning rate to 0.001.
Epoch 5/10
Epoch 00005: val_binary_accuracy improved from 0.75000 to 0.76562, saving model to ../models\best_model.h5
lr changed to 0.0009000000427477062

Epoch 00006: LearningRateScheduler reducing learning rate to 0.00090000004.
Epoch 6/10
Epoch 00006: val_binary_accuracy d