Contents
==

[Introduction to The Kernel](#intro)<br/>
[1. Preprocessing Data](#pre) <br/>
&emsp;&emsp; [1.1 Organizing & Splitting Data](#reshape) <br/>
&emsp;&emsp; [1.2 Augmenting Data](#augment)<br/>
[2. Building The Model](#model) <br/>
&emsp;&emsp; [2.1 Model's Callbacks](#call)<br/>
&emsp;&emsp; [2.2 Model Architecture](#arch)<br/>
&emsp;&emsp; [2.3 Hyperparameters Tuning](#params)<br/>
&emsp;&emsp; [2.4 Model Training](#train)<br/>
[3. Model Evaluation](#eval) <br/>
[4. Submission](#submit)

<a id="intro"></a>
# Introduction to The Kernel
在本kernel中，我们将使用一个来自另一个在ImageNet上训练的体系结构的预训练编码器，并且只训练解码部分

## 步骤
1. 对数据进行预处理，包括重构、归一化和图像增强
2. 构建模型，尝试不同的架构和超参数调优
3. 模型评估和计算模型的性能指标

In [None]:
# For data handling and manipulation
import numpy as np
import pandas as pd
import cv2

# For Visualiztion
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import clear_output


# for model building and trining
from keras import backend as K
from keras.layers import Dense, Activation, Dropout, BatchNormalization, Input, Flatten, Conv2D, MaxPooling2D, Lambda, UpSampling2D, Concatenate
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.initializers import he_normal
from keras.preprocessing.image import ImageDataGenerator

# For organizing data
import os
import shutil

In [None]:
# 查看示例图像文件名- bash命令
!ls ../input/dog-breed-identification/train/ -U | head -5 

In [None]:
labels = pd.read_csv('../input/dog-breed-identification/labels.csv')
labels.head(5)

In [None]:
# 共120类标签(狗的种类)
classes = np.unique(labels.breed)
classes_num = classes.size
classes_num

In [None]:
train_dir = '../input/dog-breed-identification/train'  # the images directory
images_names = os.listdir(train_dir)  # names of the files in the directory
images_num = len(images_names)
print(f'Number of images: {images_num}')  # 10222张图片用于训练

<a id="pre"></a>
# 1. 数据处理


<a id="reshape"></a>
## 1.1 拆分训练集/验证集/测试集

In [None]:
new_train_dir = '/root/new_train/'  # parent directoiry of the training set
new_test_dir = '/root/new_test/'  # parent directory of the validation set
new_valid_dir = '/root/new_valid/'  # parent directory of the test set
!mkdir $new_train_dir
!mkdir $new_test_dir
!mkdir $new_valid_dir

In [None]:
# 为每个品种创建子目录
for sub_dir in classes:
    os.mkdir(new_train_dir+sub_dir)
    os.mkdir(new_test_dir+sub_dir)
    os.mkdir(new_valid_dir+sub_dir)
!ls $new_train_dir

In [None]:
labels_jpg = labels.copy(deep=True)
labels_jpg['id'] += '.jpg'  
grouped_ids = labels_jpg.groupby('breed')['id'].apply(list).to_dict()
print(classes[0], grouped_ids[classes[0]])

In [None]:
# 7:2:1划分训练验证测试集
test_split = 0.1
valid_split = 0.2

In [None]:
# iterators to track the final sizes of the sets
train_size = 0
valid_size = 0
test_size = 0

# loop on the images of each breed and using the defined probabilities assign each image to one of the 3 sets
for breed_idx, (breed, breed_images) in enumerate(grouped_ids.items()):
    for img in breed_images:
        rnd_prob = np.random.rand()  # give the current image a random number in the range [0, 1]
        if rnd_prob <= test_split: 
            # copy to the corresponding breed subdirectory in the test directory
            shutil.copy(train_dir+'/'+img, new_test_dir+'/'+breed) 
            test_size += 1
            
        elif rnd_prob <= (test_split + valid_split):
            # copy to the corresponding breed subdirectory in the validation directory
            shutil.copy(train_dir+'/'+img, new_valid_dir+'/'+breed)
            valid_size += 1
            
        else:
            # copy to the corresponding breed subdirectory in the training directory
            shutil.copy(train_dir+'/'+img, new_train_dir+'/'+breed)
            train_size += 1
            
    clear_output(wait=True)
    print(f'Organized {breed_idx+1} out of {classes_num} breeds: {breed}')

In [None]:
print(train_size, valid_size, test_size)

In [None]:
test_breed = classes[0]
!ls $new_train_dir/$test_breed | head -5

<a id="features"></a>
## 1.2 描述统计

In [None]:
# plt.imshow的图像尺寸
width, height, channels = 512, 512, 3

images_samples = np.zeros((4, height, width, 3), dtype=float)
samples_labels = []

# get random 4 images
rnd_indexes = np.random.randint(0, images_num, 4)
for i, rnd_idx in enumerate(rnd_indexes):
    img_filename = images_names[rnd_idx]
    img_id = img_filename[:-4]
    img_bgr = cv2.imread(train_dir + '/' + img_filename)  # loads the images channels in (blue, green, red) order
    images_samples[i] = cv2.resize(src=img_bgr[:, :, [2, 1, 0]], dsize=(width, height)) / 255  # store the random image
    img_label = labels.breed[labels.id == img_id].values[0]
    samples_labels.append(img_label)  # store the random images' label
    
# view the 4 samples
fig, axs = plt.subplots(1, 4, figsize=(20, 5))
for ax, img, label in zip(axs.ravel(), images_samples, samples_labels):
    ax.imshow(img)
    ax.axis('off')
    ax.set_title(f'Class: {label}', size=15);

In [None]:
def img_process(img):
    img_float32 = np.float32(img)
 
    dft = cv2.dft(img_float32, flags = cv2.DFT_COMPLEX_OUTPUT)
    dft_shift = np.fft.fftshift(dft)
    # 得到灰度图能表示的形式
    magnitude_spectrum = 20*np.log(cv2.magnitude(dft_shift[:,:,0],dft_shift[:,:,1]))

    rows, cols = img.shape
    crow, ccol = int(rows/2) , int(cols/2)     # 中心位置

    # 低通滤波
    mask = np.zeros((rows, cols, 2), np.uint8)
    mask[crow-20:crow+20, ccol-20:ccol+20] = 1
 
    # IDFT
    fshift = dft_shift*mask
    f_ishift = np.fft.ifftshift(fshift)
    img_back_low = cv2.idft(f_ishift)
    img_back_low = cv2.magnitude(img_back_low[:,:,0],img_back_low[:,:,1])          

    # 高通滤波
    mask = np.ones((rows, cols, 2), np.uint8)
    mask[crow-20:crow+20, ccol-20:ccol+20] = 0
 
    # IDFT
    fshift = dft_shift*mask
    f_ishift = np.fft.ifftshift(fshift)
    img_back_high = cv2.idft(f_ishift)
    img_back_high = cv2.magnitude(img_back_high[:,:,0],img_back_high[:,:,1])
    return img_back_low,img_back_high

#  4个增强后样本
fig, axs = plt.subplots(3, 4, figsize=(20,10))  
fig.suptitle('Fliter Results', size=32)
for axs_col, img in enumerate(images_samples):
    img = img[:, :, 0]
    img_back_low,img_back_high = img_process(img)
    
    axs[0, axs_col].imshow(img, cmap='gray')
    axs[0, axs_col].axis('off')
    axs[0, axs_col].set_title('Original Image', size=15)
    
    axs[1, axs_col].imshow(img_back_low, cmap = 'gray')
    axs[1, axs_col].axis('off')
    axs[1, axs_col].set_title('LPF Image', size=15)
    
    axs[2, axs_col].imshow(img_back_high, cmap = 'gray')
    axs[2, axs_col].axis('off')
    axs[2, axs_col].set_title('HPF Image', size=15)

<a id="augment"></a>
## 1.3 数据增强

注意:数据集中存在少量标签错误的数据

In [None]:
norm_factor = 1 / 255

# 使用Keras的ImageDataGenerator类完成数据增强
transform_params = {
    'featurewise_center': False,
    'featurewise_std_normalization': False,
    'samplewise_center': False,
    'samplewise_std_normalization': False,
    'rotation_range': 30,  # 图像随机在(-30,30)间翻转
    'width_shift_range': 0.15,  # 图像随机在左右（-0.05,0.05）（百分比）内水平移动
    'height_shift_range': 0.15,  # 图像随机在上下（-0.05,0.05）（百分比）内水平移动
    'horizontal_flip': True,  # 图像随机水平翻转
    'rescale': norm_factor  # 图像按norm_factor缩放尺寸
}

# the generator used for training - gives augmented images
img_gen = ImageDataGenerator(**transform_params) 

In [None]:
# 用于验证的生成器——保持图像不变，以便验证错误成为测试错误的良好指示
img_feed = ImageDataGenerator(rescale=1/255)

In [None]:
#  4个增强后样本
fig, axs = plt.subplots(2, 4, figsize=(20,10))  
fig.suptitle('Augmentation Results', size=32)

for axs_col, img in enumerate(images_samples):
    viz_transoform_params = {  # defined each iteration to get new augmentation values each time
        'theta': np.random.randint(-transform_params['rotation_range'], transform_params['rotation_range']),
        'tx': np.random.uniform(0, transform_params['width_shift_range']),
        'ty': np.random.uniform(0, transform_params['height_shift_range']),
        'flip_horizontal': np.random.choice([True, False], p=[0.5, 0.5])
    }

    aug_img = img_gen.apply_transform(img, viz_transoform_params)  # the same image after augmentation
    
    axs[0, axs_col].imshow(img);
    axs[0, axs_col].axis('off')
    axs[0, axs_col].set_title('Original Image', size=15)
    
    axs[1, axs_col].imshow(aug_img);
    axs[1, axs_col].axis('off')
    axs[1, axs_col].set_title('Augmented Image', size=15)

<a id="model"></a>
# 2. 构建模型

<a id="call"></a>
## 2.1 设置模型回调

In [None]:
# 用于模型训练时绘制训练曲线(准确性，损耗)
class Plotter(Callback):
    def plot(self):  # Updates the graph
        clear_output(wait=True)
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
        
        # plot the losses
        ax1.plot(self.epochs, self.losses, label='train_loss')
        ax1.plot(self.epochs, self.val_losses, label='val_loss')
        
        # plot the accuracies
        ax2.plot(self.epochs, self.acc, label='train_acc')
        ax2.plot(self.epochs, self.val_acc, label='val_acc')
    
        ax1.set_title(f'Loss vs Epochs')
        ax1.set_xlabel("Epochs")
        ax1.set_ylabel("Loss")
        
        ax2.set_title(f'Accuracy vs Epochs')
        ax2.set_xlabel("Epoches")
        ax2.set_ylabel("Accuracy")
        
        ax1.legend()
        ax2.legend()
        plt.show()
        
        # print out the accuracies at each epoch
        print(f'Epoch #{self.epochs[-1]+1} >> train_acc={self.acc[-1]*100:.3f}%, train_loss={self.losses[-1]:.5f}')
        print(f'Epoch #{self.epochs[-1]+1} >> val_acc={self.val_acc[-1]*100:.3f}%, val_loss={self.val_losses[-1]:.5f}')
        
    def on_train_begin(self, logs={}):
        # initialize lists to store values from training
        self.losses = []
        self.val_losses = []
        self.epochs = []
        self.batch_no = []
        self.acc = []
        self.val_acc = []
    
    def on_epoch_end(self, epoch, logs={}):
        # append values from the last epoch
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.acc.append(logs.get('acc'))
        self.val_acc.append(logs.get('val_acc'))
        self.epochs.append(epoch)
        self.plot()  # update the graph
        
    def on_train_end(self, logs={}):
        self.plot()
        
    def load_plot_data(self, data):
        self.losses, self.val_losses, self.epochs, self.batch_no, self.acc, self.val_acc = data
    
    def get_plot_data(self):
        return [self.losses, self.val_losses, self.epochs, self.batch_no, self.acc, self.val_acc]
               
plotter = Plotter()

In [None]:
# 如果val_acc没有提升，用来降低学习率
plateau_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.01,
                              patience=1, min_lr=1e-20)

In [None]:
# 回滚到训练中获得的最佳权重
e_stop = EarlyStopping(monitor='val_loss', patience=15, mode='min', restore_best_weights=True)

In [None]:
callbacks = [plotter, plateau_reduce, e_stop]

<a id="arch"></a>
## 2.2 模型架构

因为训练大型模型的计算成本非常高，所以我们将使用迁移学习，并将预训练模型的编码器层加载到类似的数据集上

In [None]:
# 具有activation, batchnorm和dropout的全连接层
def dense_block(x, neurons, layer_no):
    x = Dense(neurons, kernel_initializer=he_normal(layer_no), name=f'topDense{layer_no}')(x)
    x = Activation('relu', name=f'Relu{layer_no}')(x)
    x = BatchNormalization(name=f'BatchNorm{layer_no}')(x)
    x = Dropout(0.5, name=f'Dropout{layer_no}')(x)
    return x

In [None]:
def create_model(shape):
    input_layer = Input(shape, name='input_layer')  
    
    # 加载初始化权重的InceptionResNetV2，并移除最后的dense layers - frozen layers
    incep_res = InceptionResNetV2(include_top=False, weights='imagenet', input_tensor=input_layer)
    for layer in incep_res.layers:
        layer.trainable = False
    
    # 此处需了解InceptionResNetV2原理和输出层维数
    # 池化层降低每个feature map的维数
    pool = MaxPooling2D(pool_size=[3, 3], strides=[3, 3], padding='same')(incep_res.output)
    # 扁平化
    flat1 = Flatten(name='Flatten1')(pool)
    # BN层归一化
    flat1_bn = BatchNormalization(name='BatchNormFlat')(flat1)
 
    # 后接三个全连接模块(全连接层+relu激活层【学习到非线性信息】+BN层【归一化，加速收敛，防过拟合】+DropOut层【0.5概率随机丢弃信息，防过拟合】)
    dens1 = dense_block(flat1_bn, neurons=512, layer_no=1)
    dens2 = dense_block(dens1, neurons=512, layer_no=2)
    dens3 = dense_block(dens2, neurons=1024, layer_no=3)
    # 最后1024*120的全连接层，用softmax激活分类
    dens_final = Dense(classes_num, name='Dense4')(dens3)
    output_layer = Activation('softmax', name='Softmax')(dens_final)
    
    model = Model(inputs=[input_layer], outputs=[output_layer])

    return model

<a id="params"></a>
## 2.3 调参

In [None]:
# hyperparameters
height, width, channels_num = 512, 512, 3  # 初始输入512*512*3
learning_rate = 0.004
epochs = 15
batch_size = 32  # 过大会超载GPU - 可以通过减小图片大小来增大batch_size

In [None]:
model = create_model((height, width, channels_num))
optimizer = Adam(learning_rate)  # 选用Adam优化器调度
# 多分类交叉熵为计算的loss，acc作为评估指标, 0.004的学习率训练15个epoch
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc'])
model.summary()

<a id="train"></a>
## 2.4 模型训练
Colab中训练(GPU配额更高)

In [None]:
# 从目录加载后向模型提供增强训练数据
train_gen = img_gen.flow_from_directory(directory=new_train_dir, target_size=(height, width), color_mode='rgb', classes=list(classes), 
                                        class_mode='categorical', batch_size=batch_size, shuffle=True, interpolation='nearest')

# 从目录加载后向提供模型增强验证数据
valid_gen = img_feed.flow_from_directory(directory=new_valid_dir, target_size=(height, width), color_mode='rgb', classes=list(classes), 
                                        class_mode='categorical', batch_size=batch_size, shuffle=True, interpolation='nearest')


# # 加载在colab上训练好的模型
# # 用定义好的生成器拟合模型
# model.fit_generator(train_gen, validation_data=valid_gen, epochs=epochs, 
#                         steps_per_epoch=train_size//batch_size + 1, 
#                         validation_steps=valid_size//batch_size + 1, callbacks=callbacks)

模型的训练曲线

In [None]:
!wget "https://drive.google.com/uc?id=1-3Y-DB5uhOaY69pvVl5rmfirw9aKCYYB&export=download" -O '/root/training_curves.npy'

In [None]:
plot_data = np.load('/root/training_curves.npy', allow_pickle=True)
plotter.load_plot_data(plot_data)  # load the data into the plotter
plotter.plot()

保存特定层的权重（确保图层名称与加载的相同，或者可以按顺序加载）

In [None]:
# decoder_weights = {}
# for layer in model.layers[-15:]:
#     decoder_weights[layer.name] = layer.get_weights()

加载经过训练的解码器层

In [None]:
!wget "https://drive.google.com/uc?id=1Omp4wFOWc2WslToduWAgBkyRv9poQVKb&export=download" -O '/root/decoder_weights.npy'

In [None]:
decoder_weights = np.load('/root/decoder_weights.npy', allow_pickle=True).item()
for layer_name, layer_weights in decoder_weights.items():
    model.get_layer(layer_name).set_weights(layer_weights)  # set each layer of the decoder with its weights

In [None]:
# check the dictionary
decoder_weights.keys()

<a id="eval"></a>
# 3. 模型评估

In [None]:
# 定义测试图像生成器用于模型评估
test_gen = ImageDataGenerator(rescale=1/255)
test_flow = test_gen.flow_from_directory(new_test_dir,
        target_size=(512, 512),
        batch_size=1,
        shuffle=False)

In [None]:
# 测试集上评估模型
metrics = model.evaluate_generator(test_flow, steps=test_size)

In [None]:
m_names = model.metrics_names
print(f'{m_names[0]} = {metrics[0]}\n{m_names[1]} = {metrics[1]}')

## 分类问题上有90%的top-1准确率(top1 accuracy: 预测的label取最终概率向量里面最大的作为预测结果，如过你的预测结果中概率最大的那个分类正确，则预测正确；否则预测错误)，而在随机概率上有0.83%的准确率