In [1]:
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
import numpy as np
from sklearn.utils import shuffle

import h5py

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  (fname, cnt))
  (fname, cnt))


In [2]:
def write_gap(MODEL, image_size, lambda_func=None):
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        x = Lambda(lambda_func)(x)
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))

    gen = ImageDataGenerator()
    train_generator = gen.flow_from_directory("train2", target_size=image_size, shuffle=False, 
                                              batch_size=32)
    test_generator = gen.flow_from_directory("test2", target_size=image_size, shuffle=False, 
                                             batch_size=32, class_mode=None)

    train = model.predict_generator(train_generator, max_queue_size=10, workers=8, use_multiprocessing=True, verbose=0)
    test = model.predict_generator(test_generator, max_queue_size=10, workers=8, use_multiprocessing=True, verbose=0)
    with h5py.File("gap_%s.h5"%MODEL.__name__) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("test", data=test)
        h.create_dataset("label", data=train_generator.classes)

In [14]:
write_gap(ResNet50, (224, 224))

Found 24962 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [5]:
write_gap(InceptionV3, (299, 299), inception_v3.preprocess_input)

Found 24962 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [6]:
write_gap(Xception, (299, 299), xception.preprocess_input)

Found 24962 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [3]:
write_gap(InceptionResNetV2, (299, 299), inception_resnet_v2.preprocess_input)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.7/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 24962 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [7]:
with h5py.File("gap_InceptionResNetV2.h5", 'r') as h:
    print("train shape",np.array(h['train']).shape)
    print("test shape",np.array(h['test']).shape)
    print("label shape",np.array(h['label']).shape)

train shape (24962, 1536)
test shape (12500, 1536)
label shape (24962,)


In [16]:
np.random.seed(2017)

X_train = []
X_test = []

for filename in ["gap_ResNet50.h5", "gap_Xception.h5", "gap_InceptionV3.h5"]:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])

X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)

X_train, y_train = shuffle(X_train, y_train)

In [17]:
input_tensor = Input(X_train.shape[1:])
x = input_tensor
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(input_tensor, x)

model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])

In [20]:
from IPython.display import SVG
from keras.utils.vis_utils import plot_model

plot_model(model, to_file='gap_model.png',show_shapes=True)
# SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

In [21]:
model.fit(X_train, y_train, batch_size=128, nb_epoch=8, validation_split=0.2)

  if __name__ == '__main__':


Train on 19969 samples, validate on 4993 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f5bb44f4128>

In [22]:
model.save('gap_model.h5')

In [23]:
y_pred = model.predict(X_test, verbose=1)



In [25]:
import pandas as pd
from keras.preprocessing.image import *

df = pd.read_csv("sample_submission.csv")

image_size = (224, 224)
gen = ImageDataGenerator()
test_generator = gen.flow_from_directory("test2", image_size, shuffle=False, 
                                         batch_size=16, class_mode=None)

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('gap_pred.csv', index=None)


y_pred_u = y_pred.clip(min=0.005, max=0.995)
df = pd.read_csv("sample_submission.csv")
for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred_u[i])

df.to_csv('gap_pred_u.csv', index=None)
df.head(10)

Found 12500 images belonging to 1 classes.




Unnamed: 0,id,label
0,1,0.995
1,2,0.995
2,3,0.995
3,4,0.995
4,5,0.005
5,6,0.005
6,7,0.005
7,8,0.005
8,9,0.005
9,10,0.005


在kaggle上的得分是0.03870