## 自己构建的网络

In [29]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
from keras.layers import Lambda
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input as resnet50_pre
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as inceptionV3_pre
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input as xception_pre
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input as vgg16_pre
from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input as vgg19_pre
from keras.preprocessing import image   
from keras.optimizers import SGD, Adam
from keras.utils.np_utils import to_categorical
from tqdm import tqdm
from PIL import ImageFile  
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense, Input, Activation
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint  
import numpy as np
import pandas as pd
from glob import glob
import cv2                
import matplotlib.pyplot as plt    
import matplotlib.image as mpimg
%matplotlib inline 
import random
import os
import shutil
import h5py

In [2]:
def move_data(names, divide_train_path, divide_valid_path):
    os.mkdir(divide_train_path)
    os.mkdir(divide_valid_path)      
    valid_names = random.sample(names, int(len(names) * 0.2))
    train_names = [name for name in names if name not in valid_names]
    [shutil.move(train_path + name, divide_train_path + name) for name in train_names]
    [shutil.move(train_path + name, divide_valid_path + name) for name in valid_names]

    
def divide_images():
    # 训练集
    train_dog_path = train_path + 'dog/'
    train_cat_path = train_path + 'cat/'
    valid_dog_path = valid_path + 'dog/'
    valid_cat_path = valid_path + 'cat/'
    
    if not os.path.exists(train_dog_path):
        names= os.listdir(train_path)
        cat_names = [name for name in names if name.startswith('cat')]
        dog_names = [name for name in names if name.startswith('dog')]
        move_data(cat_names, train_cat_path, valid_cat_path)
        move_data(dog_names, train_dog_path, valid_dog_path)
    
    divide_test_path = test_path + 'test/'
    if not os.path.exists(divide_test_path):
        names = os.listdir(test_path)
        os.mkdir(divide_test_path)
        [shutil.move(test_path + name, divide_test_path + name) for name in names]
    

## 对图片进行分类

In [3]:
train_path = 'dogs-vs-cats/train1/'
test_path = 'dogs-vs-cats/test1/'
valid_path = 'dogs-vs-cats/valid1/'
if not os.path.exists(valid_path):
    os.mkdir(valid_path)
    
divide_images()
    


In [45]:
def extract_features1(base_model, target_size, preprocess):
    datagen = ImageDataGenerator(preprocessing_function=preprocess)
#     valid_datagen = ImageDataGenerator(rescale=1./255)
#     test_datagen = ImageDataGenerator(rescale=1./255)
    
    train_generator = datagen.flow_from_directory(train_path, target_size=target_size,
                                                    batch_size=batch_size, class_mode='binary', shuffle=False)
    valid_generator = datagen.flow_from_directory(valid_path, target_size=target_size,
                                                    batch_size=batch_size, class_mode='binary', shuffle=False)
    test_generator = datagen.flow_from_directory(test_path, target_size=target_size, batch_size=batch_size, class_mode=None, shuffle=True)
    train_features = base_model.predict_generator(train_generator, train_generator.samples // batch_size)
    valid_features = base_model.predict_generator(valid_generator, valid_generator.samples // batch_size)
    test_featrues = base_model.predict_generator(test_generator, test_generator.samples // batch_size)
    
    np.savez('{0}_features.npz'.format(base_model.name),train=train_features, train_label=train_generator.classes,
             valid=valid_features, valid_label=valid_generator.classes, test=test_featrues, test_filename=test_generator.filenames)


batch_size = 20



In [183]:
def extract_features(base_model, target_size):
    datagen = ImageDataGenerator(rescale=1./255)
#     valid_datagen = ImageDataGenerator(rescale=1./255)
#     test_datagen = ImageDataGenerator(rescale=1./255)
    
    train_generator = datagen.flow_from_directory(train_path, target_size=target_size,
                                                    batch_size=batch_size, class_mode='binary', shuffle=False)
    valid_generator = datagen.flow_from_directory(valid_path, target_size=target_size,
                                                    batch_size=batch_size, class_mode='binary', shuffle=False)
    test_generator = datagen.flow_from_directory(test_path, target_size=target_size, batch_size=batch_size, class_mode=None)
    train_features = base_model.predict_generator(train_generator, train_generator.samples // batch_size)
    valid_features = base_model.predict_generator(valid_generator, valid_generator.samples // batch_size)
    test_featrues = base_model.predict_generator(test_generator, test_generator.samples // batch_size)
    
    np.savez('{0}_features.npz'.format(base_model.name),train=train_features, train_label=train_generator.classes,
             valid=valid_features, valid_label=valid_generator.classes, test=test_featrues)


batch_size = 20

In [47]:
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

In [48]:
extract_features1(base_model, (224, 224), resnet50_pre)

Found 2000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.
Found 100 images belonging to 1 classes.


In [49]:
resnet50_features = np.load('resnet50_features.npz')

In [50]:
train_features = resnet50_features['train']
train_labels = resnet50_features['train_label']
valid_features = resnet50_features['valid']
valid_labels = resnet50_features['valid_label']
test_features = resnet50_features['test']

In [51]:
model = Sequential()
#model.add(Flatten(input_shape=(train_features.shape[1],)))
model.add(Dense(2048, input_shape=(train_features.shape[1],), activation='relu'))
model.add(Dropout(.5))
model.add(Dense(2048, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [52]:
checkpointer = ModelCheckpoint(filepath='resnet50.hdf5', verbose=1, save_best_only=True)

model.fit(train_features, train_labels, epochs=2, batch_size=batch_size,
          validation_data=(valid_features, valid_labels), verbose=1, callbacks=[checkpointer])

Train on 2000 samples, validate on 800 samples
Epoch 1/2

Epoch 00001: val_loss improved from inf to 0.19977, saving model to resnet50.hdf5
Epoch 2/2

Epoch 00002: val_loss did not improve from 0.19977


<keras.callbacks.History at 0x1de576ce6d8>

In [79]:
prediction = model.predict(test_features, batch_size=10)

In [80]:
prediction = prediction[:, 0].clip(0.01, 0.99)

In [55]:
test_filenames = resnet50_features['test_filename']

In [100]:
test_fileindex = np.array([os.path.splitext(os.path.split(filename)[1])[0] for filename in test_filenames])

In [101]:
data = np.stack([test_fileindex, prediction], axis=1)

In [104]:
data1= pd.DataFrame(data, columns=['id', 'label'])

In [107]:
data1.to_csv('resnet50.csv', index=False)


In [28]:
base_model = VGG16(weights='imagenet', include_top=False, pooling='avg')

In [30]:
extract_features1(base_model, (224, 224), vgg16_pre)

Found 2000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.
Found 100 images belonging to 1 classes.


In [31]:
vgg16_features = np.load('vgg16_features.npz')

In [32]:
vgg16_features['train'][0].shape

(512,)

In [33]:
train_features = vgg16_features['train']
train_labels = vgg16_features['train_label']
valid_features = vgg16_features['valid']
valid_labels = vgg16_features['valid_label']
test_features = vgg16_features['test']

In [34]:
model = Sequential()
#model.add(Flatten(input_shape=train_features.shape[1:]))
model.add(Dense(512, input_shape=(train_features.shape[1],), activation='relu'))
model.add(Dropout(.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [35]:
checkpointer = ModelCheckpoint(filepath='vgg16.hdf5', verbose=1, save_best_only=True)

model.fit(train_features, train_labels, epochs=2, batch_size=batch_size,
          validation_data=(valid_features, valid_labels), verbose=1, callbacks=[checkpointer])

Train on 2000 samples, validate on 800 samples
Epoch 1/2

Epoch 00001: val_loss improved from inf to 0.24033, saving model to vgg16.hdf5
Epoch 2/2

Epoch 00002: val_loss improved from 0.24033 to 0.19430, saving model to vgg16.hdf5


<keras.callbacks.History at 0x1de558b54e0>

In [43]:
prediction = model.predict(test_features, batch_size=10)

In [44]:
prediction = prediction.clip(0.01, 0.99)

In [41]:
df = pd.read_csv("dogs-vs-cats/sample_submission.csv")

gen = ImageDataGenerator()
test_generator = gen.flow_from_directory("test2", (224, 224), shuffle=False, 
                                         batch_size=16, class_mode=None)

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('pred.csv', index=None)