### 本程序参考了下面的ipynb内容
https://github.com/nkmk/tensorflow-keras-examples/blob/ffd4fc68dab9c6f65b750e222447d88c68f81da5/transfer-learning/cifar10_data_with_mobilenet_v2_model_fine_tuning.ipynb

In [57]:
import tensorflow
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Convolution2D, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping

(X_train, y_train), (X_test, y_test) = tensorflow.keras.datasets.cifar10.load_data()

In [58]:
print(tensorflow.__version__)

2.0.3


In [59]:
tensorflow.random.set_seed(0)

In [60]:
X_train.shape,y_train.shape,X_test.shape,y_test.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [61]:
X_train.shape[1:],type(X_train)

((32, 32, 3), numpy.ndarray)

In [62]:
X_train.dtype

dtype('uint8')

In [63]:
X_train[0].shape

(32, 32, 3)

In [14]:
# X_train = X_train.astype('float32')
# X_test = X_test.astype('float32')
# print("X_train dtype=",X_train.dtype)
# X_train /= 255.0
# X_test /= 255.0

X_train dtype= float32


In [64]:
inputs = tensorflow.keras.Input(shape=(None, None, 3))
x = tensorflow.keras.layers.Lambda(lambda img: tensorflow.image.resize(img, (160, 160)))(inputs)
x = tensorflow.keras.layers.Lambda(tensorflow.keras.applications.mobilenet_v2.preprocess_input)(x)

In [65]:
x

<tf.Tensor 'lambda_3/Identity:0' shape=(None, 160, 160, 3) dtype=float32>

In [66]:
base_model = tensorflow.keras.applications.mobilenet_v2.MobileNetV2(
    weights='imagenet', input_tensor=x, input_shape=(160, 160, 3),
    include_top=False, pooling='avg'
)

In [67]:
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 160, 160, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 160, 160, 3)  0           lambda_2[0][0]                   
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 161, 161, 3)  0           lambda_3[0][0]                   
_______________________________________________________________________________

In [68]:
model = Sequential([
    base_model,
    tensorflow.keras.layers.Dense(10, activation='softmax')
])

In [69]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Model) (None, 1280)              2257984   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                12810     
Total params: 2,270,794
Trainable params: 2,236,682
Non-trainable params: 34,112
_________________________________________________________________


In [70]:
print(len(model.layers))
print(len(base_model.layers))
print(model.layers[0].name)
print(model.layers[1].name)
print(len(model.layers[0].layers))

2
158
mobilenetv2_1.00_160
dense_1
158


In [71]:
# 冻结base_model，不可训练
# 已经是训练好的不需要在训练，而且可以提高训练速度
base_model.trainable = False

In [73]:
# Trainable params为0 应为已经冻结，不可训练
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 160, 160, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 160, 160, 3)  0           lambda_2[0][0]                   
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 161, 161, 3)  0           lambda_3[0][0]                   
_______________________________________________________________________________

In [74]:
# 只有自己加的最后一个全连接层是可以训练，其他层都冻结了，提高了训练速度
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Model) (None, 1280)              2257984   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                12810     
Total params: 2,270,794
Trainable params: 12,810
Non-trainable params: 2,257,984
_________________________________________________________________


In [75]:
model.compile(optimizer=tensorflow.keras.optimizers.RMSprop(learning_rate=0.0001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [76]:
# 这个是没有进行迁移训练的模型预测结果，准确率非常低
print(model.evaluate(X_test, y_test, verbose=2))

10000/1 - 7s - loss: 2.6251 - accuracy: 0.1132
[2.922475629425049, 0.1132]


In [77]:
model.fit(X_train, y_train, epochs=6, validation_split=0.2, batch_size=256)

Train on 40000 samples, validate on 10000 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7f786bbccfd0>

In [78]:
# 现在是通过6次训练后的模型测试结果，准确率大幅度提高
print(model.evaluate(X_test, y_test, verbose=2))

10000/1 - 6s - loss: 0.9878 - accuracy: 0.7198
[0.8419379863739014, 0.7198]


### 下面的是把基本模型的一部分解冻，并再次训练的方法

In [133]:
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 160, 160, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 160, 160, 3)  0           lambda_2[0][0]                   
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 161, 161, 3)  0           lambda_3[0][0]                   
_______________________________________________________________________________

In [134]:
layer_names = [l.name for l in base_model.layers]

In [136]:
idx=layer_names.index("block_12_expand")
print(idx)

110


In [137]:
base_model.trainable=True
for layer in base_model.layers[:idx]:
    layer.trainable=False

In [138]:
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 160, 160, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 160, 160, 3)  0           lambda_2[0][0]                   
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 161, 161, 3)  0           lambda_3[0][0]                   
_______________________________________________________________________________

In [140]:
# 在设定了trainable之后需要重新再compile一下
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Model) (None, 1280)              2257984   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                12810     
Total params: 471,178
Trainable params: 12,810
Non-trainable params: 458,368
_________________________________________________________________


In [141]:
model.compile(
    optimizater=tensorflow.keras.optimizers.RMSprop(learning_rate=0.00001),
#     optimizater=tensorflow.keras.optimizers.Adam,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [142]:
# 重新编译后就不会有错误了，可以正常显示需要训练的参数数量
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Model) (None, 1280)              2257984   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                12810     
Total params: 2,270,794
Trainable params: 1,812,426
Non-trainable params: 458,368
_________________________________________________________________


In [143]:
# 这个是旧的model模型的预测结果，下面将进行重新学习并预测
print(model.evaluate(X_test, y_test, verbose=2))

10000/1 - 7s - loss: 11.3522 - accuracy: 0.4383
[10.383626011657714, 0.4383]


In [144]:
model.fit(X_train,
          y_train,
          epochs=6,
          validation_split=0.2,
          batch_size=256
)

Train on 40000 samples, validate on 10000 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7f786aedadd8>

In [96]:
# 从上面的训练结果看有明细的过拟合现象，说明base model不应该被破坏，不要解冻base model
# 重新训练后，再次进行预测
print(model.evaluate(X_test, y_test, verbose=2))

10000/1 - 6s - loss: 11.3522 - accuracy: 0.4383
[10.383626011657714, 0.4383]


### 不解冻base model，但更新optimizer的优化方式

In [164]:
# 为了防止base_model被多次使用污染，需要重新做一遍
base_model = tensorflow.keras.applications.mobilenet_v2.MobileNetV2(
    weights='imagenet', input_tensor=x, input_shape=(160, 160, 3),
    include_top=False, pooling='avg'
)

In [165]:
base_model.trainable=False

In [166]:
base_1=Sequential([
    base_model,
    tensorflow.keras.layers.Dense(10,activation='softmax')
]
)

In [167]:
base_1.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Model) (None, 1280)              2257984   
_________________________________________________________________
dense_6 (Dense)              (None, 10)                12810     
Total params: 2,270,794
Trainable params: 12,810
Non-trainable params: 2,257,984
_________________________________________________________________


In [168]:
base_1.compile(
    optimizer=tensorflow.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [169]:
base_1.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Model) (None, 1280)              2257984   
_________________________________________________________________
dense_6 (Dense)              (None, 10)                12810     
Total params: 2,270,794
Trainable params: 12,810
Non-trainable params: 2,257,984
_________________________________________________________________


In [170]:
%%time
base_1.fit(
    X_train,
    y_train,
    epochs=6,
    validation_split=0.2,
    batch_size=256
)

Train on 40000 samples, validate on 10000 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
CPU times: user 28 s, sys: 7.02 s, total: 35 s
Wall time: 2min 56s


<tensorflow.python.keras.callbacks.History at 0x7f73ec9a3cc0>

In [171]:
print(base_1.evaluate(X_test, y_test, verbose=2))

10000/1 - 6s - loss: 1.3533 - accuracy: 0.7167
[0.9879087584495544, 0.7167]
