### 本程序参考了下面的ipynb内容
https://github.com/nkmk/tensorflow-keras-examples/blob/ffd4fc68dab9c6f65b750e222447d88c68f81da5/transfer-learning/cifar10_data_with_mobilenet_v2_model_fine_tuning.ipynb

In [1]:
import tensorflow
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Convolution2D, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping

(X_train, y_train), (X_test, y_test) = tensorflow.keras.datasets.cifar10.load_data()

In [2]:
print(tensorflow.__version__)

2.3.1


In [3]:
tensorflow.random.set_seed(0)

In [4]:
X_train.shape,y_train.shape,X_test.shape,y_test.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [5]:
X_train.shape[1:],type(X_train)

((32, 32, 3), numpy.ndarray)

In [6]:
X_train.dtype

dtype('uint8')

In [7]:
X_train[0].shape

(32, 32, 3)

In [8]:
# X_train = X_train.astype('float32')
# X_test = X_test.astype('float32')
# print("X_train dtype=",X_train.dtype)
# X_train /= 255.0
# X_test /= 255.0

In [9]:
inputs = tensorflow.keras.Input(shape=(None, None, 3))
x = tensorflow.keras.layers.Lambda(lambda img: tensorflow.image.resize(img, (160, 160)))(inputs)
x = tensorflow.keras.layers.Lambda(tensorflow.keras.applications.mobilenet_v2.preprocess_input)(x)

In [10]:
x

<tf.Tensor 'lambda_1/sub:0' shape=(None, 160, 160, 3) dtype=float32>

In [11]:
base_model = tensorflow.keras.applications.mobilenet_v2.MobileNetV2(
    weights='imagenet', input_tensor=x, input_shape=(160, 160, 3),
    include_top=False, pooling='avg'
)

In [12]:
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 160, 160, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 160, 160, 3)  0           lambda[0][0]                     
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 161, 161, 3)  0           lambda_1[0][0]                   
_______________________________________________________________________________

In [13]:
model = Sequential([
    base_model,
    tensorflow.keras.layers.Dense(10, activation='softmax')
])

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Functi (None, 1280)              2257984   
_________________________________________________________________
dense (Dense)                (None, 10)                12810     
Total params: 2,270,794
Trainable params: 2,236,682
Non-trainable params: 34,112
_________________________________________________________________


In [15]:
print(len(model.layers))
print(len(base_model.layers))
print(model.layers[0].name)
print(model.layers[1].name)
print(len(model.layers[0].layers))

2
158
mobilenetv2_1.00_160
dense
158


In [16]:
# 冻结base_model，不可训练
# 已经是训练好的不需要在训练，而且可以提高训练速度
base_model.trainable = False

In [17]:
# Trainable params为0 应为已经冻结，不可训练
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 160, 160, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 160, 160, 3)  0           lambda[0][0]                     
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 161, 161, 3)  0           lambda_1[0][0]                   
_______________________________________________________________________________

In [18]:
# 只有自己加的最后一个全连接层是可以训练，其他层都冻结了，提高了训练速度
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Functi (None, 1280)              2257984   
_________________________________________________________________
dense (Dense)                (None, 10)                12810     
Total params: 2,270,794
Trainable params: 12,810
Non-trainable params: 2,257,984
_________________________________________________________________


In [19]:
model.compile(optimizer=tensorflow.keras.optimizers.Adam(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [20]:
# 这个是没有进行迁移训练的模型预测结果，准确率非常低
print(model.evaluate(X_test, y_test, verbose=2))

313/313 - 6s - loss: 2.9225 - accuracy: 0.1132
[2.9224750995635986, 0.11320000141859055]


In [21]:
model.fit(X_train, y_train, epochs=6, validation_split=0.2, batch_size=256)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7feef4584b50>

In [22]:
# 现在是通过6次训练后的模型测试结果，准确率大幅度提高
print(model.evaluate(X_test, y_test, verbose=2))

313/313 - 6s - loss: 0.3965 - accuracy: 0.8661
[0.39647266268730164, 0.866100013256073]


### 下面的是把基本模型的一部分解冻，并再次训练的方法

In [23]:
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 160, 160, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 160, 160, 3)  0           lambda[0][0]                     
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 161, 161, 3)  0           lambda_1[0][0]                   
_______________________________________________________________________________

In [24]:
layer_names = [l.name for l in base_model.layers]

In [25]:
idx=layer_names.index("block_12_expand")
print(idx)

110


In [26]:
base_model.trainable = True
for layer in base_model.layers[:idx]:
    layer.trainable = False

In [27]:
base_model.summary()

Model: "mobilenetv2_1.00_160"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 160, 160, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 160, 160, 3)  0           lambda[0][0]                     
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 161, 161, 3)  0           lambda_1[0][0]                   
_______________________________________________________________________________

In [28]:
# 在设定了trainable之后需要重新再compile一下
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Functi (None, 1280)              2257984   
_________________________________________________________________
dense (Dense)                (None, 10)                12810     
Total params: 2,270,794
Trainable params: 1,812,426
Non-trainable params: 458,368
_________________________________________________________________


In [29]:
model.compile(
    optimizer=tensorflow.keras.optimizers.Adam(),
    #               optimizer=tensorflow.keras.optimizers.RMSprop(learning_rate=0.00001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

In [30]:
# 重新编译后就不会有错误了，可以正常显示需要训练的参数数量
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Functi (None, 1280)              2257984   
_________________________________________________________________
dense (Dense)                (None, 10)                12810     
Total params: 2,270,794
Trainable params: 1,812,426
Non-trainable params: 458,368
_________________________________________________________________


In [31]:
# 这个是旧的model模型的预测结果，下面将进行重新学习并预测
print(model.evaluate(X_test, y_test, verbose=2))

313/313 - 6s - loss: 0.3965 - accuracy: 0.8661
[0.39647266268730164, 0.866100013256073]


In [32]:
model.fit(X_train, y_train, epochs=6, validation_split=0.2, batch_size=256)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7feeeac563d0>

In [25]:
# 从上面的训练结果看有明细的过拟合现象，说明base model不应该被破坏，不要解冻base model
# 使用Adam的精度明显比SGD要好很多
# 重新训练后，再次进行预测
print(model.evaluate(X_test, y_test, verbose=2))

313/313 - 6s - loss: 3.9097 - accuracy: 0.4660
[3.90969181060791, 0.4659999907016754]


### 不解冻base model，但更新optimizer的优化方式为Adam

In [33]:
# 为了防止base_model被多次使用污染，需要重新做一遍
base_model = tensorflow.keras.applications.mobilenet_v2.MobileNetV2(
    weights='imagenet', input_tensor=x, input_shape=(160, 160, 3),
    include_top=False, pooling='avg'
)

In [34]:
base_model.trainable=False

In [35]:
base_1=Sequential([
    base_model,
    tensorflow.keras.layers.Dense(10,activation='softmax')
]
)

In [36]:
base_1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Functi (None, 1280)              2257984   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                12810     
Total params: 2,270,794
Trainable params: 12,810
Non-trainable params: 2,257,984
_________________________________________________________________


In [37]:
base_1.compile(
    optimizer=tensorflow.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [38]:
base_1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Functi (None, 1280)              2257984   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                12810     
Total params: 2,270,794
Trainable params: 12,810
Non-trainable params: 2,257,984
_________________________________________________________________


In [39]:
%%time
base_1.fit(
    X_train,
    y_train,
    epochs=6,
    validation_split=0.2,
    batch_size=256
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
CPU times: user 21.3 s, sys: 6.9 s, total: 28.2 s
Wall time: 2min 39s


<tensorflow.python.keras.callbacks.History at 0x7feee0730ac0>

In [40]:
print(base_1.evaluate(X_test, y_test, verbose=2))

313/313 - 6s - loss: 0.3969 - accuracy: 0.8659
[0.3968921899795532, 0.8658999800682068]


## 结论
```
最后的结果准确率0.86 损失0.39，这个结果比参考文档的[0.6538689835548401, 0.7845]要高很多。
1.不应该破坏base model的结果，完全使用它的迁移学习结果
2.还有就是应该使用Adam作为优化参数，效果比RMSprop要好
```