# 使用多模型融合

1 用训练图像分别导出各finetune好了的模型特征，以及对应label

2 构建分类器，并训练，保存权重

3 用test图像导出特征，输入分类器预测

4 输出预测结果到csv

### 1 导出特征和label

In [1]:
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *

import h5py

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
def write_gap(MODEL, image_size, lambda_func=None, weights_file=None, train_imgs_path=None, test_imgs_path=None, model_name=None):
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        x = Lambda(lambda_func)(x)
    
    # build a train liked network, to reload weights
    load_base_model = MODEL(input_tensor=x, weights=None, include_top=False)
    load_m_out = load_base_model.output
    load_p_out = GlobalAveragePooling2D()(load_m_out)
    load_p_out = Dropout(0.5)(load_p_out)
    load_predictions = Dense(2, activation='softmax')(load_p_out)
    load_model = Model(inputs=load_base_model.input, outputs=load_predictions)
    load_model.load_weights(weights_file)
    
    
    model = Model(load_model.input, load_p_out)
    
    
    gen = ImageDataGenerator()
    train_generator = gen.flow_from_directory(train_imgs_path, image_size, shuffle=False, class_mode="categorical",
                                              batch_size=16)
    test_generator = gen.flow_from_directory(test_imgs_path, image_size, shuffle=False,
                                             batch_size=16, class_mode=None)
    
    train_img_nums = train_generator.samples
    test_img_nums = test_generator.samples
    
    train = model.predict_generator(train_generator, (train_img_nums//16) + 1, verbose=1)
    test = model.predict_generator(test_generator, (test_img_nums//16) + 1, verbose=1)

    print("model %s"%(model_name))
    print(train.shape)
    print(test.shape)
    print((train_generator.classes).shape)
    
    train = train[:train_img_nums]
    test = test[:test_img_nums]
    
    print("model %s"%(model_name))
    print(train.shape)
    print(test.shape)
    print((train_generator.classes).shape)
    
    print(train_generator.classes)
    print("#")

    with h5py.File("gap_%s.h5"%(model_name)) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("test", data=test)
        h.create_dataset("label", data=train_generator.classes)

# 输入所有的训练样本，后续在分割
#write_gap(ResNet50, (224, 224), None,
#          'ResNet50_finetune.h5', '../dataset/chest_xray/train-ready', '../dataset/chest_xray/test', 'ResNet50')
write_gap(InceptionV3, (299, 299), inception_v3.preprocess_input, 
          'InceptionV3_finetune.h5', '../dataset/chest_xray/train-ready', '../dataset/chest_xray/test/', 'InceptionV3')
write_gap(Xception, (299, 299), xception.preprocess_input, 
          'Xception_finetune.h5', '../dataset/chest_xray/train-ready', '../dataset/chest_xray/test', 'Xception')

Found 4708 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
model InceptionV3
(4708, 2048)
(640, 2048)
(4708,)
model InceptionV3
(4708, 2048)
(624, 2048)
(4708,)
[0 0 0 ... 1 1 1]
#
Found 4708 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
model Xception
(4708, 2048)
(640, 2048)
(4708,)
model Xception
(4708, 2048)
(624, 2048)
(4708,)
[0 0 0 ... 1 1 1]
#


### 2 融合特征

In [4]:
import h5py
import numpy as np
from sklearn.utils import shuffle
np.random.seed(2017)

X_train = []
X_test = []

# 特征是需要融合的，但label是一致的（没有使用乱序），即多个特征融合后对应同一个label

#for filename in ["gap_Xception.h5", "gap_InceptionV3.h5", "gap_ResNet50.h5"]:
for filename in ["gap_Xception.h5", "gap_InceptionV3.h5"]:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])
        #X_train = np.array(h['train'])
        #X_test = np.array(h['test'])
        #y_train = np.array(h['label'])

print(np.array(X_train).shape)
print(np.array(X_test).shape)
X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)
print(np.array(X_train).shape)
print(np.array(X_test).shape)

X_train, y_train = shuffle(X_train, y_train)

print(y_train)
#print("fusion model")
#print("train.shape %d test.shape %d label.shape %d"%(X_train.shape, X_test.shape, y_train.shape))

(2, 4708, 2048)
(2, 624, 2048)
(4708, 4096)
(624, 4096)
[1 1 1 ... 0 1 1]


In [5]:
from keras import backend as K

y_train = K.one_hot(y_train, 2)
y_train = K.eval(y_train)
print(y_train[0])
print(y_train[1])
print(y_train[2])

[0. 1.]
[0. 1.]
[0. 1.]


### 3 构建分类器

In [6]:
from keras.models import *
from keras.layers import *

np.random.seed(2017)

input_tensor = Input(X_train.shape[1:])
#x = GlobalAveragePooling2D()(input_tensor)
x = Dropout(0.5)(input_tensor)
predictions = Dense(2, activation='softmax')(x)
model = Model(input_tensor, predictions)

model.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 4096)              0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 8194      
Total params: 8,194
Trainable params: 8,194
Non-trainable params: 0
_________________________________________________________________


### 4 训练分类器

In [7]:
batch_size = 128
epochs = 10

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.05)

Train on 4472 samples, validate on 236 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1c615632080>

### 5 预测测试集

In [23]:
test_predictions = model.predict(X_test, verbose=1)



### 6 写入CSV

In [24]:
print(test_predictions[3])
print(np.argmax(test_predictions[3]))

[4.0267883e-11 1.0625736e-07 6.8008569e-07 9.9999797e-01 3.8216777e-13
 6.9565328e-07 2.9235672e-09 1.4573551e-07 1.7710824e-10 6.6497705e-08
 8.1254939e-08 8.0354567e-08]
3


In [25]:
import csv  

def as_num(x):
    y = '{:.6f}'.format(x) # 6f表示保留6位小数点的float型
    return(y)

class_index = ['Black-grass', 'Charlock', 
               'Cleavers', 'Common Chickweed', 
               'Common wheat', 'Fat Hen', 
               'Loose Silky-bent', 'Maize', 
               'Scentless Mayweed', 'Shepherds Purse', 
               'Small-flowered Cranesbill', 'Sugar beet']

gen = ImageDataGenerator()
test_generator = gen.flow_from_directory("../dataset/test/", (224, 224), shuffle=False, 
                                         batch_size=16, class_mode=None)

with open('output.csv', 'w+', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(["file", "species"])
    for index, fname in enumerate(test_generator.filenames):
        fname = fname.split("\\")[1]
        class_name = class_index[np.argmax(test_predictions[index])]
        writer.writerow([fname, class_name])

Found 794 images belonging to 1 classes.
