In [1]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
from keras.layers import Lambda
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input as resnet50_pre
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as inceptionV3_pre
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input as xception_pre
from keras.preprocessing import image                  
from tqdm import tqdm
from PIL import ImageFile  
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense, Input
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint  
import numpy as np
import pandas as pd
from glob import glob
import cv2                
import matplotlib.pyplot as plt    
import matplotlib.image as mpimg
%matplotlib inline 
import random
import os
import shutil
import h5py

Using TensorFlow backend.


## 对图片进行分类

In [43]:
def load_dataset(path):
    data = load_files(path)
    files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), 2)
    return files, targets

In [44]:

train_path = 'dogs-vs-cats/train1/'
test_path = 'dogs-vs-cats/test1/'

dog_path = train_path + 'dog/'
cat_path = train_path + 'cat/'

if not os.path.exists(dog_path):
    names= os.listdir(train_path)
    os.mkdir(dog_path)
    os.mkdir(cat_path)
    [shutil.move(train_path + name, cat_path + name) for name in names if name.startswith('cat')]
    [shutil.move(train_path + name, dog_path + name) for name in names if name.startswith('dog')]
    

train_files, train_targets = load_dataset(train_path)
test_files, test_targets = load_dataset(test_path)

In [23]:
def path_to_tensor(img_path, image_size):
    # 用PIL加载RGB图像为PIL.Image.Image类型
    img = image.load_img(img_path, target_size=image_size)
    # 将PIL.Image.Image类型转化为格式为(224, 224, 3)的3维张量
    x = image.img_to_array(img)
    # 将3维张量转化为格式为(1, 224, 224, 3)的4维张量并返回
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths, image_size, preprocess):
    list_of_tensors = [preprocess(path_to_tensor(img_path, image_size)) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [28]:
def write_gap(MODEL, image_size, lambda_func=None):
    base_model = MODEL(weights='imagenet', include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))

    train_path = "dogs-vs-cats/train1/"
    test_path = 'dogs-vs-cats/test1/'
    train_tensors = paths_to_tensor(train_files, image_size, lambda_func)
    test_tensors = paths_to_tensor(test_files, image_size, lambda_func)
    datagen = ImageDataGenerator(width_shift_range=0.1, 
                            height_shift_range=0.1, 
                            horizontal_flip=True)
    datagen.fit(train_tensors)
    train_generator = datagen.flow(train_tensors, train_targets, batch_size=1, shuffle=False)
    datagen = ImageDataGenerator(width_shift_range=0.1, 
                            height_shift_range=0.1, 
                            horizontal_flip=True)
    datagen.fit(test_tensors)
    test_generator = datagen.flow(test_tensors, test_targets, shuffle=False,
                                             batch_size=1)

    train = model.predict_generator(train_generator, len(train_generator), verbose=1)
    test = model.predict_generator(test_generator, len(test_generator),verbose=1)
    with h5py.File("gap_{0}.h5".format(base_model.name)) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("test", data=test)
        h.create_dataset("label", data=train_generator.y)

In [29]:
write_gap(ResNet50, (224, 224), resnet50_pre)

100%|███████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 181.20it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 56.17it/s]




In [33]:
write_gap(InceptionV3, (299, 299), inceptionV3_pre)

100%|███████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 125.71it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 140.66it/s]




In [34]:
write_gap(Xception, (299, 299), xception_pre)

100%|███████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 122.13it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 139.81it/s]




In [35]:
np.random.seed(2017)

X_train = []
X_test = []
for filenames in ["gap_resnet50.h5", "gap_inception_v3.h5", "gap_xception.h5"]:
    filename = filenames
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])
X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)

In [47]:
inputs = Input(X_train.shape[1:])#shape=（2048*3，）
x = Dropout(0.5)(inputs)
x = Dense(2, activation='sigmoid')(x)
model = Model(inputs, x)
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [49]:
model.fit(X_train,train_targets, batch_size=128, nb_epoch=1, validation_split=0.2,verbose=2)

y_pred = model.predict(X_test, verbose=1)

  """Entry point for launching an IPython kernel.


Train on 160 samples, validate on 40 samples
Epoch 1/1
 - 22s - loss: 0.8806 - acc: 0.4281 - val_loss: 0.6484 - val_acc: 0.5000


In [52]:
y_pred = [np.argmax(pred) for pred in y_pred]
#y_pred = y_pred.clip(min=0.005, max=0.995)

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1]

In [53]:
df = pd.read_csv("dogs-vs-cats/sample_submission.csv")

gen = ImageDataGenerator()
test_path = 'dogs-vs-cats/test1/'
test_generator = gen.flow_from_directory(test_path, (224, 224), shuffle=False,
                                         batch_size=1, class_mode=None)

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('\\')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('pred.csv', index=None)
df.head(10)

Found 100 images belonging to 1 classes.


  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,id,label
0,1,1.0
1,2,1.0
2,3,1.0
3,4,1.0
4,5,1.0
5,6,1.0
6,7,1.0
7,8,1.0
8,9,1.0
9,10,1.0
