## 自己构建的网络

In [1]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
from keras.layers import Lambda
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input as resnet50_pre
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as inceptionV3_pre
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input as xception_pre
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input as vgg16_pre
from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input as vgg19_pre
from keras.preprocessing import image   
from keras.optimizers import SGD, Adam
from keras.utils.np_utils import to_categorical
from tqdm import tqdm
from PIL import ImageFile  
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense, Input, Activation
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint  
import numpy as np
import pandas as pd
from glob import glob
import cv2                
import matplotlib.pyplot as plt    
import matplotlib.image as mpimg
%matplotlib inline 
import random
import os
import shutil
import h5py
import common
import pickle

Using TensorFlow backend.


## 对图片进行分类

In [2]:
common.divide_images()

In [3]:
models_and_features = {}
models_and_test_features = {}

In [4]:
def batch_extract_features():
    
    # VGG16
    base_model = VGG16(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_features(base_model, (224, 224), vgg16_pre)
    models_and_features['vgg16'] = features_name
    
    # VGG19
    base_model = VGG19(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_features(base_model, (224, 224), vgg19_pre)
    models_and_features['vgg19'] = features_name
    
    # ResNet50
    base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_features(base_model, (224, 224), resnet50_pre)
    models_and_features['resnet50'] = features_name
    
    # InceptionV3
    base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_features(base_model, (299, 299), inceptionV3_pre)
    models_and_features['inceptionV3'] = features_name
    
    # Xception
    base_model = Xception(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_features(base_model, (299, 299), xception_pre)
    models_and_features['xception'] = features_name

In [5]:
def batch_extract_test_features():
    
    # VGG16
    base_model = VGG16(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_test_features(base_model, (224, 224), vgg16_pre)
    models_and_test_features['vgg16'] = features_name
    
    # VGG19
    base_model = VGG19(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_test_features(base_model, (224, 224), vgg19_pre)
    models_and_test_features['vgg19'] = features_name
    
    # ResNet50
    base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_test_features(base_model, (224, 224), resnet50_pre)
    models_and_test_features['resnet50'] = features_name
    
    # InceptionV3
    base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_test_features(base_model, (299, 299), inceptionV3_pre)
    models_and_test_features['inceptionV3'] = features_name
    
    # Xception
    base_model = Xception(weights='imagenet', include_top=False, pooling='avg')
    features_name = common.extract_test_features(base_model, (299, 299), xception_pre)
    models_and_test_features['xception'] = features_name

In [6]:
batch_extract_features()

Found 19988 images belonging to 2 classes.
Found 4995 images belonging to 2 classes.
Found 19988 images belonging to 2 classes.
Found 4995 images belonging to 2 classes.
Found 19988 images belonging to 2 classes.
Found 4995 images belonging to 2 classes.
Found 19988 images belonging to 2 classes.
Found 4995 images belonging to 2 classes.
Found 19988 images belonging to 2 classes.
Found 4995 images belonging to 2 classes.


In [7]:
batch_extract_test_features()

Found 12500 images belonging to 1 classes.
Found 12500 images belonging to 1 classes.
Found 12500 images belonging to 1 classes.
Found 12500 images belonging to 1 classes.
Found 12500 images belonging to 1 classes.


In [8]:
if len(models_and_features) == 0:
    models_and_features['vgg16'] = 'vgg16_features.npz'
    models_and_features['vgg19'] = 'vgg19_features.npz'
    models_and_features['resnet50'] = 'resnet50_features.npz'
    models_and_features['inceptionV3'] = 'inception_v3_features.npz'
    models_and_features['xception'] = 'xception_features.npz'

In [9]:
if len(models_and_test_features) == 0:
    models_and_test_features['vgg16'] = 'test_vgg16_features.npz'
    models_and_test_features['vgg19'] = 'test_vgg19_features.npz'
    models_and_test_features['resnet50'] = 'test_resnet50_features.npz'
    models_and_test_features['inceptionV3'] = 'test_inception_v3_features.npz'
    models_and_test_features['xception'] = 'test_xception_features.npz'

In [18]:
def model_train(key, dropout, optimizer, file_header):
    features = np.load(models_and_features[key])
    features_test = np.load(models_and_test_features[key])
    train_features = features['train']
    train_labels = features['train_label'][:len(train_features)]
    valid_features = features['valid']
    valid_labels = features['valid_label'][:len(valid_features)]
    test_features = features_test['test']
    test_filenames = features_test['test_filename']
    
    model = Sequential()
    model.add(Dense(2048, input_shape=(train_features.shape[1],), activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(2048, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    
    checkpointer = ModelCheckpoint(filepath='{0}.hdf5'.format(file_header), verbose=1, save_best_only=True)
    train_result = model.fit(train_features, train_labels, epochs=20, batch_size=common.batch_size,
          validation_data=(valid_features, valid_labels), verbose=1, callbacks=[checkpointer])
    
    with open('{0}_history.pkl'.format(file_header), 'wb') as f:
        pickle.dump(train_result.history, f)
    model.load_weights('{0}.hdf5'.format(file_header))
    prediction = model.predict(test_features, batch_size=common.batch_size)
    prediction = prediction[:, 0].clip(0.01, 0.99)
    test_fileindex = np.array([os.path.splitext(os.path.split(filename)[1])[0] for filename in test_filenames])
    data = np.stack([test_fileindex, prediction], axis=1)
    tmp = pd.DataFrame(data, columns=['id', 'label'])
    tmp['id'] = tmp['id'].apply(pd.to_numeric)
    submit_frame = pd.read_csv('dogs-vs-cats/sample_submission.csv')
    result = pd.merge(submit_frame, tmp, on="id", how='left')
    result = result.rename(index=str, columns={"label_y": "label"})
    result.dropna(axis=0, subset=['label'], inplace=True)
    result[['id','label']].to_csv('{0}_predict.csv'.format(file_header),index=False)

## 训练+预测

In [19]:
# VGG16
model_train('vgg16', 0.5, SGD(lr=0.001), 'vgg16_SGD_0.5')

Train on 19980 samples, validate on 4980 samples
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.04754, saving model to vgg16_SGD_0.5.hdf5
Epoch 2/20

Epoch 00002: val_loss did not improve from 0.04754
Epoch 3/20

Epoch 00003: val_loss improved from 0.04754 to 0.04432, saving model to vgg16_SGD_0.5.hdf5
Epoch 4/20

Epoch 00004: val_loss did not improve from 0.04432
Epoch 5/20

Epoch 00005: val_loss improved from 0.04432 to 0.04323, saving model to vgg16_SGD_0.5.hdf5
Epoch 6/20

Epoch 00006: val_loss improved from 0.04323 to 0.04162, saving model to vgg16_SGD_0.5.hdf5
Epoch 7/20

Epoch 00007: val_loss did not improve from 0.04162
Epoch 8/20

Epoch 00008: val_loss improved from 0.04162 to 0.03977, saving model to vgg16_SGD_0.5.hdf5
Epoch 9/20

Epoch 00009: val_loss improved from 0.03977 to 0.03967, saving model to vgg16_SGD_0.5.hdf5
Epoch 10/20

Epoch 00010: val_loss improved from 0.03967 to 0.03826, saving model to vgg16_SGD_0.5.hdf5
Epoch 11/20

Epoch 00011: val_loss improved 

In [20]:
# VGG19
model_train('vgg19', 0.5, SGD(lr=0.001), 'vgg19_SGD_0.5')

Train on 19980 samples, validate on 4980 samples
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.04420, saving model to vgg19_SGD_0.5.hdf5
Epoch 2/20

Epoch 00002: val_loss did not improve from 0.04420
Epoch 3/20

Epoch 00003: val_loss improved from 0.04420 to 0.04337, saving model to vgg19_SGD_0.5.hdf5
Epoch 4/20

Epoch 00004: val_loss improved from 0.04337 to 0.03921, saving model to vgg19_SGD_0.5.hdf5
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.03921
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.03921
Epoch 7/20

Epoch 00007: val_loss improved from 0.03921 to 0.03641, saving model to vgg19_SGD_0.5.hdf5
Epoch 8/20

Epoch 00008: val_loss improved from 0.03641 to 0.03629, saving model to vgg19_SGD_0.5.hdf5
Epoch 9/20

Epoch 00009: val_loss did not improve from 0.03629
Epoch 10/20

Epoch 00010: val_loss improved from 0.03629 to 0.03578, saving model to vgg19_SGD_0.5.hdf5
Epoch 11/20

Epoch 00011: val_loss did not improve from 0.03578
Epoch 12/20

Epoch 0

In [21]:
# ResNet50
model_train('resnet50', 0.5, SGD(lr=0.001), 'resnet50_SGD_0.5')

Train on 19980 samples, validate on 4980 samples
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.05791, saving model to resnet50_SGD_0.5.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 0.05791 to 0.04162, saving model to resnet50_SGD_0.5.hdf5
Epoch 3/20

Epoch 00003: val_loss improved from 0.04162 to 0.03612, saving model to resnet50_SGD_0.5.hdf5
Epoch 4/20

Epoch 00004: val_loss improved from 0.03612 to 0.03344, saving model to resnet50_SGD_0.5.hdf5
Epoch 5/20

Epoch 00005: val_loss improved from 0.03344 to 0.03210, saving model to resnet50_SGD_0.5.hdf5
Epoch 6/20

Epoch 00006: val_loss improved from 0.03210 to 0.03114, saving model to resnet50_SGD_0.5.hdf5
Epoch 7/20

Epoch 00007: val_loss improved from 0.03114 to 0.03055, saving model to resnet50_SGD_0.5.hdf5
Epoch 8/20

Epoch 00008: val_loss improved from 0.03055 to 0.03031, saving model to resnet50_SGD_0.5.hdf5
Epoch 9/20

Epoch 00009: val_loss improved from 0.03031 to 0.02986, saving model to resnet50_SGD_0.5.hdf5
E

In [22]:
# InceptionV3
model_train('inceptionV3', 0.5, Adam(lr=0.001), 'inceptionV3_Adam_0.5')

Train on 19980 samples, validate on 4980 samples
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.20916, saving model to inceptionV3_Adam_0.5.hdf5
Epoch 2/20

Epoch 00002: val_loss did not improve from 0.20916
Epoch 3/20

Epoch 00003: val_loss improved from 0.20916 to 0.17581, saving model to inceptionV3_Adam_0.5.hdf5
Epoch 4/20

Epoch 00004: val_loss did not improve from 0.17581
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.17581
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.17581
Epoch 7/20

Epoch 00007: val_loss did not improve from 0.17581
Epoch 8/20

Epoch 00008: val_loss did not improve from 0.17581
Epoch 9/20

Epoch 00009: val_loss did not improve from 0.17581
Epoch 10/20

Epoch 00010: val_loss did not improve from 0.17581
Epoch 11/20

Epoch 00011: val_loss improved from 0.17581 to 0.15072, saving model to inceptionV3_Adam_0.5.hdf5
Epoch 12/20

Epoch 00012: val_loss improved from 0.15072 to 0.12337, saving model to inceptionV3_Adam_0.5.hdf5
Epoch 13

In [23]:
# Xception
model_train('xception', 0.3, Adam(lr=0.001), 'xception_Adam_0.3')

Train on 19980 samples, validate on 4980 samples
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.28948, saving model to xception_Adam_0.5.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 0.28948 to 0.14107, saving model to xception_Adam_0.5.hdf5
Epoch 3/20

Epoch 00003: val_loss did not improve from 0.14107
Epoch 4/20

Epoch 00004: val_loss improved from 0.14107 to 0.11593, saving model to xception_Adam_0.5.hdf5
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.11593
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.11593
Epoch 7/20

Epoch 00007: val_loss did not improve from 0.11593
Epoch 8/20

Epoch 00008: val_loss did not improve from 0.11593
Epoch 9/20

Epoch 00009: val_loss improved from 0.11593 to 0.11005, saving model to xception_Adam_0.5.hdf5
Epoch 10/20

Epoch 00010: val_loss did not improve from 0.11005
Epoch 11/20

Epoch 00011: val_loss did not improve from 0.11005
Epoch 12/20

Epoch 00012: val_loss did not improve from 0.11005
Epoch 13/20

Epoch 0