# 迁移学习与深度学习结合-在图片识别中的应用
## 2021-6
学号:2201201Z5012 刘莉

## 步骤一：导入相关库

In [7]:
# 导入相关库
import numpy as np
import pandas as pd
import tensorflow as tf
import os

## 步骤2：准备实验数据
实验所用数据是kaggle里面的一个图片数据集，链接如下
https://www.kaggle.com/binhminhs10/food5k

In [12]:
# 整理数据集
def dframe(dtype,datapath):
    X = []
    y = []
    path = datapath + dtype + '/'
    for i in os.listdir(path):
        # 图像
        X.append(i)
        # 标签
        y.append(i.split('_')[0])
    X = np.array(X)
    y = np.array(y)
    df = pd.DataFrame()
    df['dataname'] = X
    df['label'] = y
    return df
# 设置数据路径
datapath = './Food-5K/'
df_train = dframe('training',datapath)
df_val = dframe('validation',datapath)
df_test = dframe('evaluation',datapath)

In [14]:
# 查看最后5个训练数据
df_train.tail()

Unnamed: 0,dataname,label
2995,1_995.jpg,1
2996,1_996.jpg,1
2997,1_997.jpg,1
2998,1_998.jpg,1
2999,1_999.jpg,1


In [15]:
# 导入处理图片的包
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [17]:
# 处理图片为合适的数据格式
# 创建一个ImageDataGenerator对象
train_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
)
val_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
)
# 生成图像批次，扩充数据集，并设置图像增强的参数（利用flow_from_dataframe方法）
train_generator = train_datagen.flow_from_dataframe(
    df_train,
    directory='Food-5K/training/',
    x_col='dataname',
    y_col='label',
    class_mode='binary',
    target_size=(224, 224),
)
val_generator = train_datagen.flow_from_dataframe(
    df_val,
    directory='Food-5K/validation/',
    x_col='dataname',
    y_col='label',
    class_mode='binary',
    target_size=(224, 224),
)

Found 3000 validated image filenames belonging to 2 classes.
Found 1000 validated image filenames belonging to 2 classes.


## 步骤3：训练模型
通过迁移学习技术来训练模型，不需要从头开始跑 CNN 模型

In [19]:
# 导入预训练模型
from tensorflow.keras.applications import ResNet50
feature_extractor = ResNet50(weights='imagenet', 
                             input_shape=(224, 224, 3),
                             include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [20]:
# 将ResNet50作为基础，并根据实验数据对其中最后一部分进行调整
# Set this parameter to make sure it's not being trained
feature_extractor.trainable = False

# 设置输入层
input_ = tf.keras.Input(shape=(224, 224, 3))

# 设置特征提取器
x = feature_extractor(input_, training=False)

# 设置池化层
x = tf.keras.layers.GlobalAveragePooling2D()(x)

# 将最后一层的激活函数设置为Sigmoid函数
output_ = tf.keras.layers.Dense(1, activation='sigmoid')(x)

# 创建新的model
model = tf.keras.Model(input_, output_)

# 定义损失函数及优化器
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])

# 输出模型运行结果
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
resnet50 (Functional)        (None, 7, 7, 2048)        23587712  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 2049      
Total params: 23,589,761
Trainable params: 2,049
Non-trainable params: 23,587,712
_________________________________________________________________


In [21]:
# 根据实验数据来拟合模型
model.fit(train_generator, epochs=20, validation_data=val_generator)



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1b057dcef08>

## 步骤4：测试模型
在测试数据集上验证模型，并结合一个pillow库来加载和调整图片大小，以及 scikit-learn 来确定模型性能。
利用scikit-learn 库的分类报告，以生成关于模型执行的报告。

In [23]:
!pip install pillow



In [22]:
# 导入相关库（函数）
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix

In [24]:
# 测试数据
# 实际标签
y_true = []
# 预测标签
y_pred = []

for i in os.listdir('Food-5K/evaluation'):
    # 读取测试图片数据
    img = Image.open('Food-5K/evaluation/' + i)
    img = img.resize((224, 224)) #调整图片尺寸
    img = np.array(img)
    img = np.expand_dims(img, 0)
    
    y_true.append(int(i.split('_')[0]))
    y_pred.append(1 if model.predict(img) > 0.5 else 0)
    
# 打印分类结果报告
print('='*20+'模型分类测试结果报告'+'='*20)
print(classification_report(y_true, y_pred))
print('-'*50)
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.98      0.98       500
           1       0.98      0.97      0.98       500

    accuracy                           0.98      1000
   macro avg       0.98      0.98      0.98      1000
weighted avg       0.98      0.98      0.98      1000

--------------------------------------------------
[[490  10]
 [ 13 487]]


## 保存二次训练好模型的模型

In [25]:
# 设置模型保存路径
savepath = './myTrainModel/'
modelname = 'resnet50_food_model'
model.save(savepath+modelname)

INFO:tensorflow:Assets written to: ./myTrainModel/resnet50_food_model\assets


## 加载自己训练好的模型

In [None]:
# 加载模型
loadpath = './myTrainModel/'
modelname = 'resnet50_food_model'
# 加载自己训练好的模型
model = tf.keras.models.load_model(loadpath + modelname')