## 模型构建


In [8]:
import numpy as np
from keras.preprocessing import image
from keras.applications import resnet50, ResNet50
from keras.layers.pooling import GlobalAveragePooling2D
from keras.models import Model, Sequential
from keras.applications import xception, Xception, resnet50, ResNet50, vgg16, VGG16
from keras.layers.core import Dense, Dropout
from tqdm import tqdm
import matplotlib.pyplot as plt
import h5py
from glob2 import glob
from keras.callbacks import ModelCheckpoint  
from sklearn.utils import shuffle
import pandas as pd

def generate_test_prediction(pred, filename):
    df = pd.read_csv('prediction/sample_submission.csv')
    datagen = image.ImageDataGenerator()
    test_generator = datagen.flow_from_directory(
                'data/test',
                target_size=(224,224),
                batch_size=50,
                shuffle=False,
                class_mode=None)

    for i,path in enumerate(test_generator.filenames):
        index = int(path[path.rfind('/')+1:path.rfind('.')])
        df.set_value(index-1,'label',pred[i])

    df.to_csv('./prediction/{}.csv'.format(filename),index=False)

### 模型1_Resnet50+重新训练分类器


In [3]:
with h5py.File('gap_resnet50.h5','r') as h:
        x_train1 = np.array(h['train'])
        y_train1 = np.array(h['label_train'])
        x_test1 = np.array(h['test'])

x_train1, y_train1 = shuffle(x_train1, y_train1)

In [5]:
model1 = Sequential()
model1.add(Dense(1024,activation='relu',input_shape=(2048,)))
model1.add(Dense(256,activation='relu'))
model1.add(Dense(1,activation='sigmoid'))

model1.summary()
model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


# 训练model1
checkpointer = ModelCheckpoint(filepath='./saved_models/weights.best.model1.hdf5', 
                               verbose=1, save_best_only=True)

model1.fit(x_train1,y_train1, epochs=10, batch_size=50, verbose=1, callbacks=[checkpointer], validation_split=0.15)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 1024)              2098176   
_________________________________________________________________
dense_5 (Dense)              (None, 256)               262400    
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 257       
Total params: 2,360,833
Trainable params: 2,360,833
Non-trainable params: 0
_________________________________________________________________
Train on 27460 samples, validate on 4847 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.03128, saving model to ./saved_models/weights.best.model1.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 0.03128 to 0.03120, saving model to ./saved_models/weights.best.model1.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 0.03120 to 0.03087, saving model to ./saved_models/weig

<keras.callbacks.History at 0x7f83a10edf28>

In [9]:
model1.load_weights(filepath='./saved_models/weights.best.model1.hdf5')

model1_pred = model1.predict(x_test1)

pred = model1_pred.clip(0.005,0.995)

generate_test_prediction(pred, 'model1_base')

Found 12500 images belonging to 1 classes.




### 模型2_模型融合+重新训练分类器


In [14]:
x_train2 = []
x_test2 = []
for filename in ['gap_xception.h5','gap_resnet50.h5','gap_vgg16.h5']:
    with h5py.File(filename,'r') as h:
        x_train2.append(np.array(h['train']))
        y_train2 = np.array(h['label_train'])
        x_test2.append(np.array(h['test']))
x_train2 = np.concatenate(x_train2, axis=1)
x_test2 = np.concatenate(x_test2, axis=1)

x_train2, y_train2 = shuffle(x_train2, y_train2)

In [15]:
# 构造模型
model2 = Sequential()
model2.add(Dropout(0.5,input_shape=x_train2.shape[1:]))
model2.add(Dense(1024,activation='relu'))
model2.add(Dense(256,activation='relu'))
model2.add(Dense(1,activation='sigmoid'))

model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath='./saved_models/weights.best.comb_model.hdf5', 
                               verbose=1, save_best_only=True)


model2.fit(x_train2,y_train2, epochs=10, batch_size=50, verbose=1, callbacks=[checkpointer], validation_split=0.1)


Train on 29076 samples, validate on 3231 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.01768, saving model to ./saved_models/weights.best.comb_model.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 0.01768 to 0.01530, saving model to ./saved_models/weights.best.comb_model.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 0.01530 to 0.01271, saving model to ./saved_models/weights.best.comb_model.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 0.01271 to 0.01186, saving model to ./saved_models/weights.best.comb_model.hdf5
Epoch 5/10

Epoch 00005: val_loss improved from 0.01186 to 0.00708, saving model to ./saved_models/weights.best.comb_model.hdf5
Epoch 6/10

Epoch 00006: val_loss did not improve
Epoch 7/10

Epoch 00007: val_loss did not improve
Epoch 8/10

Epoch 00008: val_loss did not improve
Epoch 9/10

Epoch 00009: val_loss did not improve
Epoch 10/10

Epoch 00010: val_loss improved from 0.00708 to 0.00676, saving model to ./saved_models/weights.best.co

<keras.callbacks.History at 0x7f83875c5978>

In [16]:
model2.load_weights(filepath='./saved_models/weights.best.comb_model.hdf5')

model2_pred = model2.predict(x_test2)

pred = model2_pred.clip(0.005,0.995)

generate_test_prediction(pred, 'model2_base')

Found 12500 images belonging to 1 classes.


