In [1]:
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model, Model
from keras.layers import Input, Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers.noise import GaussianNoise
from keras.layers.normalization import BatchNormalization
from keras.applications import ResNet50, VGG16, InceptionV3
from keras.applications.vgg16 import preprocess_input, decode_predictions
# from utils import make_parallel
import os
import sys
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import time
from keras.utils import to_categorical
from tqdm import tqdm
%matplotlib inline

Using TensorFlow backend.


## 1.将预训练模型搭配新分类器 在新数据集上训练该分类器 (Transfer Learning)

**两种方式**
- 方式1：载入预训练的整个模型结构 + 权重，搭配新分类器训练
- 方式2：构建与预训练模型结构一致的模型，仅载入预训练的权重，搭配新分类器训练

### 1.0 数据准备
**基本处理流程**
- 读入每张图片：cv2.imread
- 调整图片尺寸：cv2.resize
- 定义图片对应的类别：cat = 0, dog = 1
- 同步打乱图片和分类顺序：shuffle
- 分割为训练集和验证集：train_test_split （可以省略，后面训练时可以通过配置validation_split参数达到同样目的）

In [3]:
shape = 224
label = np.array([0] * 12500 + [1] * 12500)
data = np.zeros((25000, shape, shape, 3), dtype=np.uint8)

for i in tqdm(range(12500)):
    img = cv2.imread('./train/cat.%s.jpg' % str(i))
    img = img[:, :, ::-1]
    data[i] = cv2.resize(img, (shape, shape))
    
for i in tqdm(range(12500)):
    img = cv2.imread('./train/dog.%s.jpg' % str(i))
    img = img[:, :, ::-1]
    data[i + 12500] = cv2.resize(img, (shape, shape))
    
print('Training Data Size = %.2f GB' % (sys.getsizeof(data)/1024**3))

100%|██████████| 12500/12500 [02:09<00:00, 96.27it/s] 
100%|██████████| 12500/12500 [01:54<00:00, 109.38it/s]

Training Data Size = 3.00 GB





In [5]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(data, label, test_size=0.2, random_state=42)

In [6]:
test = np.zeros((12500, shape, shape, 3), dtype=np.uint8)
for i in tqdm(range(12500)):
    img = cv2.imread('./test/%s.jpg' % str(i + 1))
    img = img[:, :, ::-1]
    test[i] = cv2.resize(img, (shape, shape))
print('Testing Data Size = %.2f GB' % (sys.getsizeof(test)/1024**3))

100%|██████████| 12500/12500 [01:30<00:00, 138.42it/s]

Testing Data Size = 1.00 GB





### 1.1 载入预训练模型及权重，训练新分类器

**基本流程**
- 导入预训练模型及相应权重（去除分类器部分）：XXX()
- 将预训练模型所有层锁定以避免被训练修改：layers.trainable = False
- 给预训练模型尾部添加新的分类器：根据分类个数选择 sigmoid 还是 softmax
- 编译预训练模型：根据分类个数选择 binary_crossentropy 还是 categorical_crossentropy
- 查看可训练权重个数：自定义 get_param_count 函数计算模型解锁参数个数
- 训练模型
  - 应使用较小的 batch_size 进行训练，这样即使使用较少的训练代数依然能够收敛到很高的准确率。
  - 训练代数通常不需要太多，5-10代即可，这是因为模型目前可训练参数个数很少（这里只有500多个），因此并不需要太多代来让所有参数都达到最优值。

**下面我们先使用具有22层的VGG16模型进行Transfer Learning.**

In [7]:
from keras import backend as K

def get_params_count(model):
    trainable = int(np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
    non_trainable = int(np.sum([K.count_params(p) for p in set(model.non_trainable_weights)]))
    return trainable, non_trainable

In [8]:
base_model = VGG16(input_shape=(224, 224, 3), weights='imagenet', include_top=False, pooling='avg')

for layers in base_model.layers:
    layers.trainable = False

y = Dropout(0.5)(base_model.output)
y = Dense(1, activation='sigmoid')(y)

model1 = Model(inputs=base_model.input, outputs=y)
model1.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['accuracy'])
model1.summary()
print('Model has %d layers.' % len(model1.layers))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584   

In [9]:
model1.fit(x=X_train, y=y_train, batch_size=16, epochs=5, validation_data=(X_val, y_val), shuffle=True)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f6a84f0ce50>

**下面我们使用含有178层的ResNet50预训练模型进行Transfer Learning**

In [10]:
base_model = ResNet50(input_shape=(224, 224, 3), weights='imagenet', include_top=False, pooling='avg')

for layers in base_model.layers:
    layers.trainable = False

y = Dropout(0.25)(base_model.output)
y = Dense(1, activation='sigmoid')(y)

model2 = Model(inputs=base_model.input, outputs=y)
model2.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['accuracy'])
model2.summary()
print('Model has %d layers.' % len(model2.layers))

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_2 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D) (None, 230, 230, 3)   0           input_2[0][0]                    
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 112, 112, 64)  9472        zero_padding2d_1[0][0]           
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 112, 112, 64)  256         conv1[0][0]                      
___________________________________________________________________________________________

In [11]:
model2.fit(x=X_train, y=y_train, batch_size=16, epochs=5, validation_data=(X_val, y_val), shuffle=True)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f69ca887e50>

### 1.2 构建与预训练模型结构一致的模型，仅载入预训练的权重，搭配新分类器训练
- 这种方案显然没有第一种方案简单，因为需要自己手动搭出一个完整的模型，然后再导入预训练模型的权重后才能使用。
- 这种方案要求你能够搭出和预训练模型结构完全一致（所有权重矩阵的尺寸都相同）的模型，否则在 load_weights 的时候就会报错。这种方案主要的好处是自己构建的模型其实可以和预训练模型有所出入，比如输入尺寸、各层的超参配置等。对于图片大小比较小的数据集（例如 CIFAR10），由于图片尺寸根本没达到预训练模型要求的下限，因此没法直接定义 VGG16(input_shape=(32, 32, 3))。要不然放大所有图片，要不然就是这种方案。

In [12]:
x = Input(shape=(32, 32, 3))
y = x
y = Convolution2D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = Convolution2D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = MaxPooling2D(pool_size=2, strides=2, padding='same')(y)

y = Convolution2D(filters=128, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = Convolution2D(filters=128, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = MaxPooling2D(pool_size=2, strides=2, padding='same')(y)

y = Convolution2D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = Convolution2D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = Convolution2D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = MaxPooling2D(pool_size=2, strides=2, padding='same')(y)

y = Convolution2D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = Convolution2D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = Convolution2D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = MaxPooling2D(pool_size=2, strides=2, padding='same')(y)

y = Convolution2D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = Convolution2D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = Convolution2D(filters=512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')(y)
y = MaxPooling2D(pool_size=2, strides=2, padding='same')(y)

y = GlobalAveragePooling2D()(y)
# y = Dropout(0.5)(y)
# y = Dense(10, activation='softmax')(y)

modelx = Model(inputs=x, outputs=y)

In [14]:
modelx.load_weights('/home/ubuntu/.keras/models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')
modelx.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 16, 16, 128)       147584    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 8, 8, 128)         0         
__________

In [15]:
modelx.layers

[<keras.engine.topology.InputLayer at 0x7f69c8100bd0>,
 <keras.layers.convolutional.Conv2D at 0x7f69cae6b1d0>,
 <keras.layers.convolutional.Conv2D at 0x7f69caeda450>,
 <keras.layers.pooling.MaxPooling2D at 0x7f6a84e4cb50>,
 <keras.layers.convolutional.Conv2D at 0x7f69cad3d290>,
 <keras.layers.convolutional.Conv2D at 0x7f69cac775d0>,
 <keras.layers.pooling.MaxPooling2D at 0x7f69cab75990>,
 <keras.layers.convolutional.Conv2D at 0x7f69caa75ad0>,
 <keras.layers.convolutional.Conv2D at 0x7f69c80a3f10>,
 <keras.layers.convolutional.Conv2D at 0x7f69c80c2850>,
 <keras.layers.pooling.MaxPooling2D at 0x7f69c80d21d0>,
 <keras.layers.convolutional.Conv2D at 0x7f69c8070550>,
 <keras.layers.convolutional.Conv2D at 0x7f69c8081c10>,
 <keras.layers.convolutional.Conv2D at 0x7f69c8091510>,
 <keras.layers.pooling.MaxPooling2D at 0x7f69c8036f90>,
 <keras.layers.convolutional.Conv2D at 0x7f69b87ba950>,
 <keras.layers.convolutional.Conv2D at 0x7f69b87c8a90>,
 <keras.layers.convolutional.Conv2D at 0x7f69b87d

In [16]:
[weights, bias] = modelx.layers[1].get_weights()
print(weights.shape)
print(bias.shape)

(3, 3, 3, 64)
(64,)


In [17]:
weights[:, :, :, 0]

array([[[ 0.42947057,  0.55037946,  0.4800154 ],
        [ 0.373467  ,  0.44007453,  0.4085474 ],
        [-0.06136011, -0.08138704, -0.06514555]],

       [[ 0.27476987,  0.34573907,  0.31047726],
        [ 0.03868078,  0.04063221,  0.05020237],
        [-0.36722335, -0.45350131, -0.40338343]],

       [[-0.05746817, -0.05863491, -0.05087169],
        [-0.26224968, -0.33066967, -0.28522751],
        [-0.35009676, -0.4850302 , -0.41851634]]], dtype=float32)

In [18]:
bias[0]

0.73429835

### 将预训练模型搭配训练好的分类器，在新数据集上微调预训练模型最后几层 (Fine-tuning)

**基本流程**
- 选定模型中较靠后的部分解冻，参与训练
- 由于模型中通常都是串联的几层搭配组合，一般建议属于一个组合的层最好同时冻结或解冻。
- 关于如何选择解锁层的数量：从高（输出端）往低（输入端）一部分一部分的解锁，每解锁一部分，就训练5轮，看看模型的Loss和准确率是否有改善的趋势，如果有就继续训练，如果没有，就接着解锁上一部分，直至模型训练出现改善。

In [19]:
model2.layers[-37:]

[<keras.layers.core.Activation at 0x7f69cb4f8bd0>,
 <keras.layers.convolutional.Conv2D at 0x7f69cb4f8d90>,
 <keras.layers.normalization.BatchNormalization at 0x7f69cb4c7e90>,
 <keras.layers.core.Activation at 0x7f69cb498fd0>,
 <keras.layers.convolutional.Conv2D at 0x7f69cb498590>,
 <keras.layers.normalization.BatchNormalization at 0x7f69cb43c610>,
 <keras.layers.core.Activation at 0x7f69cb370f50>,
 <keras.layers.convolutional.Conv2D at 0x7f69cb3b9f90>,
 <keras.layers.convolutional.Conv2D at 0x7f69cb313ed0>,
 <keras.layers.normalization.BatchNormalization at 0x7f69cb3dd590>,
 <keras.layers.normalization.BatchNormalization at 0x7f69cb35be50>,
 <keras.layers.merge.Add at 0x7f69cb2e6c90>,
 <keras.layers.core.Activation at 0x7f69cb28df50>,
 <keras.layers.convolutional.Conv2D at 0x7f69cb27d950>,
 <keras.layers.normalization.BatchNormalization at 0x7f69cb20ae50>,
 <keras.layers.core.Activation at 0x7f69cb1d7e50>,
 <keras.layers.convolutional.Conv2D at 0x7f69cb220c10>,
 <keras.layers.normaliza

In [20]:
for layers in model2.layers[-35:]:
    layers.trainable = True
    
print('Trainable = %d, Non-Trainable = %d' % (get_params_count(model2)))

Trainable = 14453249, Non-Trainable = 9136512


In [21]:
model2.fit(x=X_train, y=y_train, batch_size=16, epochs=10, validation_data=(X_val, y_val))

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6a874e7b50>

In [22]:
model2.save('ResNet_Finetune_last3_epoch5.h5')