### 模型融合训练
#### 融合InceptionV3、Xception、DenseNet201

In [2]:
import numpy as np
np.random.seed(19906)
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras.preprocessing import image
import cv2
import os
import shutil
import h5py
%matplotlib inline
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.callbacks import EarlyStopping
from keras.models import *
from keras.layers import *
from keras.layers.core import Dropout
from keras.optimizers import Adam, SGD, RMSprop
from keras.regularizers import l2
from keras.applications.inception_v3 import InceptionV3, preprocess_input as inceptionv3_preinput
from keras.applications.xception import Xception, preprocess_input as xception_preinput
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input as inception_resnet_preinput
from keras.applications.densenet import DenseNet201, preprocess_input as densenet_preinput

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


选择作为验证集的司机ID

In [3]:
root_path = "/data/wanlijia/data/distracted_driver_detection/unzip_data/"
drivers_pd = pd.read_csv(root_path + "drivers_img_nop081_list.csv")
imgs_pd = drivers_pd["img"]
class_pd = drivers_pd["classname"]
subject_pd = drivers_pd["subject"]
choices = ['p021', 'p072']
print("选作验证集的司机:", choices)

选作验证集的司机: ['p021', 'p072']


按选择的司机ID分割训练集和验证集

In [4]:
val_index = []
for choice in choices:
    val_index.extend(subject_pd[subject_pd == choice].index.tolist())
    
test_mask = np.zeros(np.alen(subject_pd), dtype=np.bool)
for val_i in val_index:
    test_mask[val_i] = True
    
train_index = subject_pd[np.logical_not(test_mask)].index
print("after split the amount of train set:", np.alen(train_index), "，the amount of validation set:", np.alen(val_index))

after split the amount of train set: 20741 ，the amount of validation set: 1583


创建图像数据处理目录

In [9]:
def rmrf_mkdir(dirname):
    if os.path.exists(dirname):
        shutil.rmtree(dirname)
    os.mkdir(dirname)

train_dir = root_path + "imgs/train2"
val_dir = root_path + "imgs/val2"
test_dir = root_path + "imgs/test1"
saved_weights_dir = "/data/wanlijia/code/udacity_homework/machinelearning2/graduation_project/saved_weights"
if not os.path.exists(saved_weights_dir):
    os.mkdir(saved_weights_dir)


# 因为加载测试集时目录中也需要有子目录，将data/imgs/test目录软链接到data/imgs/test1/test
if not os.path.exists(test_dir):
    os.mkdir(test_dir)
    os.symlink(root_path + 'imgs/test', test_dir+"/test")

# 在新的训练或验证集目录中为图片创建到原位置的链接
def link_imgs(target_dir, X, y):
    for img_name, target in zip(X, y):
        symlink_dir = os.path.join(target_dir, target)
        if not os.path.exists(symlink_dir):
            os.mkdir(symlink_dir)
        os.symlink(root_path + 'imgs/train/'+target+'/'+img_name, symlink_dir+'/'+img_name)

数据准备

In [10]:
# 删除上次分离出的训练集和验证集文件，并重新创建目录
rmrf_mkdir(train_dir)
rmrf_mkdir(val_dir)

X_train, X_val = imgs_pd[train_index], imgs_pd[val_index]
y_train, y_val = class_pd[train_index], class_pd[val_index]

# 链接训练集到新的目录中
link_imgs(train_dir, X_train, y_train)

# 链接验证集到新的目录中
link_imgs(val_dir, X_val, y_val)

print("split valid data done!")

split valid data done!


定义特征提取方法

In [11]:
batch_size = 128

def write_bottleneck(MODEL, weight_file, image_size, preprocess_fun=None):
    input_tensor = Input((*image_size, 3))
    base_model = MODEL(input_tensor=input_tensor, weights=None, include_top=False)
    
    model = Model(inputs=base_model.input, outputs=GlobalAveragePooling2D()(base_model.output), name=base_model.name)
    weights_path = os.path.join(saved_weights_dir, weight_file)
    model.load_weights(weights_path, by_name=True)
    print("loaded model weights: ", model.name, ", weights path:", weights_path)
    
    # 训练集图像生成器
    train_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_fun,
        rotation_range=10.,
        width_shift_range=0.05,
        height_shift_range=0.05,
        shear_range=0.1,
        zoom_range=0.1,
        rescale=1./255
    )

    # 验证集图像生成器
    val_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_fun,
        rescale=1./255)
    
    test_datagen = ImageDataGenerator(preprocessing_function=preprocess_fun, rescale=1./255)
    
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=image_size,
        batch_size=batch_size,
        shuffle=False,
        class_mode='categorical')

    val_generator = val_datagen.flow_from_directory(
        val_dir,
        target_size=image_size,
        batch_size=batch_size,
        shuffle=False,
        class_mode='categorical')
    
    test_generator = test_datagen.flow_from_directory(test_dir, image_size, shuffle=False, 
                                             batch_size=batch_size, class_mode=None)
    
    train = model.predict_generator(train_generator, verbose=1)
    valid = model.predict_generator(val_generator, verbose=1)
    
    print("begin create bottleneck file:")
    file = os.path.join(saved_weights_dir, "bottleneck_noaug_%s.h5") % model.name
    if os.path.exists(file):
        os.remove(file)
    with h5py.File(file) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("valid", data=valid)
        h.create_dataset("label", data=train_generator.classes)
        h.create_dataset("valid_label", data=val_generator.classes)
    print("create bottleneck file done for model: ", model.name)
    
    test = model.predict_generator(test_generator, verbose=1)
    
    file = os.path.join(saved_weights_dir, "bottleneck_test_%s.h5") % model.name
    if os.path.exists(file):
        os.remove(file)
    print("begin create test bottleneck file:")
    with h5py.File(file) as h:
        h.create_dataset("test", data=test)
    print("create test bottleneck file done for model: ", model.name)

In [7]:
write_bottleneck(InceptionV3, "inception_v3_model.h5", (299, 299), inceptionv3_preinput)

loaded model weights:  inception_v3 , weights path: saved_weights/inception_v3_model.h5
Found 20741 images belonging to 10 classes.
Found 1583 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.
begin create bottleneck file:
create bottleneck file done for model:  inception_v3
begin create test bottleneck file:
create test bottleneck file done for model:  inception_v3


In [12]:
write_bottleneck(Xception, "xception_model.h5", (299, 299), xception_preinput)

OSError: Unable to open file (unable to open file: name = '/data/wanlijia/code/udacity_homework/machinelearning2/graduation_project/saved_weights/xception_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [7]:
write_bottleneck(InceptionResNetV2, "inception_resnet_v2_model.h5", (299, 299), inception_resnet_preinput)

loaded model weights:  inception_resnet_v2 , weights path: saved_weights/inception_resnet_v2_model.h5
Found 20741 images belonging to 10 classes.
Found 1583 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.
begin create bottleneck file:
create bottleneck file done for model:  inception_resnet_v2
begin create test bottleneck file:
create test bottleneck file done for model:  inception_resnet_v2


In [8]:
write_bottleneck(DenseNet201, "densenet201_model.h5", (224, 224), densenet_preinput)

loaded model weights:  densenet201 , weights path: saved_weights/densenet201_model.h5
Found 20741 images belonging to 10 classes.
Found 1583 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.
begin create bottleneck file:
create bottleneck file done for model:  densenet201
begin create test bottleneck file:
create test bottleneck file done for model:  densenet201


结束