In [1]:
import zipfile

zip_ref = zipfile.ZipFile("all.zip", 'r')
zip_ref.extractall(".")
zip_ref.close()

zip_ref = zipfile.ZipFile("train.zip", 'r')
zip_ref.extractall(".")
zip_ref = zipfile.ZipFile("test.zip", 'r')
zip_ref.extractall(".")
zip_ref.close()

import os
import shutil

train_filenames = os.listdir('train')
train_cat = filter(lambda x:x[:3] == 'cat', train_filenames)
train_dog = filter(lambda x:x[:3] == 'dog', train_filenames)

def rmrf_mkdir(dirname):
    if os.path.exists(dirname):
        shutil.rmtree(dirname)
    os.mkdir(dirname)
    
rmrf_mkdir('trainRun')
os.mkdir('trainRun/cat')
os.mkdir('trainRun/dog')

rmrf_mkdir('testRun')
os.symlink('../test/', 'testRun/test')

for filename in train_cat:
    os.symlink('../../train/'+filename, 'trainRun/cat/'+filename)

for filename in train_dog:
    os.symlink('../../train/'+filename, 'trainRun/dog/'+filename)
    

In [1]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

import h5py

# from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
base_model = InceptionV3(weights='imagenet', include_top=False)

In [3]:
# 添加全局平均池化层
x = base_model.output
x = GlobalAveragePooling2D()(x)
# 添加全连接层
x = Dense(1024, activation='relu')(x)
# 添加一个2元分类器
predictions = Dense(1, activation='softmax')(x)
# 或者下面这个
# predictions = Dense(2, activation='softmax')(x)

# 建立训练使用的模型
model = Model(inputs=base_model.input, outputs=predictions)

In [4]:
# 锁住全部的卷积层
for layer in base_model.layers:
    layer.trainable = False

# 锁层后，进行编译模型
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [5]:
train_data_dir = 'trainRun'
train_datagen = image.ImageDataGenerator(rescale=1./255,
                                        shear_range=0.2,
                                        zoom_range=0.2,
                                        horizontal_flip=True)

In [6]:
img_width, img_height = 299, 299
nm_batch_size = 16
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    shuffle=False,
    batch_size=nm_batch_size,
    class_mode='binary')

Found 25000 images belonging to 2 classes.


In [7]:
# 1
# train_h5py = model.predict_generator(train_generator)
# 2
train_h5py = model.fit_generator(train_generator,
                                samples_per_epoch=2500,
                                epochs=10)


In [None]:
# 2
# 查看层数、名字
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

In [None]:
# 2
# 根据层数，放开后面的两层。
for layer in model.layers[:249]:
   layer.trainable = False
for layer in model.layers[249:]:
   layer.trainable = True

In [None]:
# 2
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

In [None]:
# 2
model.fit_generator(train_generator,
                   samples_per_epoch=2500,
                   epochs=10)

In [None]:
model.save_weights('first_gap_weights.h5')

In [8]:
test_data_dir = 'testRun'
test_datagen = image.ImageDataGenerator(rescale=1./255)

In [9]:
test_generator = test_datagen.flow_from_directory(test_data_dir,
                                                 target_size=(img_height, img_width),
                                                 shuffle=False,
                                                 batch_size=nm_batch_size,
                                                 class_mode=None)

Found 12500 images belonging to 1 classes.


In [11]:
test_h5py = model.predict_generator(test_generator)

In [13]:
with h5py.File("init_weights_InceptionV3.h5") as h:
        h.create_dataset("train", data=train_h5py)
        h.create_dataset("test", data=test_h5py)
        h.create_dataset("label", data=train_generator.classes)

In [15]:
import h5py
import numpy as np
from sklearn.utils import shuffle
np.random.seed(2017)

In [16]:
X_train = []
X_test = []

In [17]:
with h5py.File("init_weights_InceptionV3.h5", 'r') as h:
    X_train.append(np.array(h['train']))
    X_test.append(np.array(h['test']))
    y_train = np.array(h['label'])

In [18]:
X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)

In [19]:
X_train, y_train = shuffle(X_train, y_train)

In [20]:
from keras.layers import Input, Dropout

In [21]:
np.random.seed(2017)

In [22]:
input_tensor = Input(X_train.shape[1:])
x = Dropout(0.5)(input_tensor)
x = Dense(1, activation='sigmoid')(x)
model_run = Model(input_tensor, x)

model_run.compile(optimizer='adadelta',
                 loss='binary_crossentropy',
                 metrics=['accuracy'])

In [23]:
model_run.fit(X_train, y_train, batch_size=128, nb_epoch=8, validation_split=0.2)

  if __name__ == '__main__':


Train on 20000 samples, validate on 5000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7efc468cd278>

In [24]:
y_pred = model_run.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.003, max=0.997)



In [25]:
import pandas as pd

df = pd.read_csv("sample_submission.csv")

test_run_datagen = image.ImageDataGenerator(rescale=1./255)
test_run_generator = test_run_datagen.flow_from_directory("testRun", 
                                                          (img_height, img_width), 
                                                         shuffle=False, 
                                                         batch_size=nm_batch_size, 
                                                         class_mode=None)


Found 12500 images belonging to 1 classes.


In [26]:
for i, fname in enumerate(test_run_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

  app.launch_new_instance()


In [28]:
df.to_csv('pred.csv', index=None)
df.head(10)

Unnamed: 0,id,label
0,1,0.50186
1,2,0.506907
2,3,0.50585
3,4,0.505536
4,5,0.494151
5,6,0.493319
6,7,0.494156
7,8,0.495172
8,9,0.49144
9,10,0.493716
