## 自己构建的网络

In [1]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
from keras.layers import Lambda
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input as resnet50_pre
from keras.applications.resnet50 import decode_predictions as resnet50_decode
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as inceptionV3_pre
from keras.applications.inception_v3 import decode_predictions as inceptionV3_decode
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input as xception_pre
from keras.applications.xception import decode_predictions as xception_decode
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input as vgg16_pre
from keras.applications.vgg16 import decode_predictions as vgg16_decode
from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input as vgg19_pre
from keras.applications.vgg19 import decode_predictions as vgg19_decode
from keras.preprocessing import image   
from keras.optimizers import SGD, Adam
from keras.utils.np_utils import to_categorical
from tqdm import tqdm
from PIL import ImageFile  
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense, Input, Activation
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint  
import numpy as np
import pandas as pd
from glob import glob
import cv2                
import matplotlib.pyplot as plt    
import matplotlib.image as mpimg
%matplotlib inline 
import random
import os
import shutil
import h5py
import common
import pickle
from functools import reduce

Using TensorFlow backend.


In [2]:
## ImageNet中狗的种类
imagenet_dogs = ['n02085620','n02085782','n02085936','n02086079','n02086240','n02086646','n02086910','n02087046','n02087394',
                 'n02088094','n02088238','n02088364','n02088466','n02088632','n02089078','n02089867','n02089973','n02090379',
                 'n02090622','n02090721','n02091032','n02091134','n02091244','n02091467','n02091635','n02091831','n02092002',
                 'n02092339','n02093256','n02093428','n02093647','n02093754','n02093859','n02093991','n02094114','n02094258',
                 'n02094433','n02095314','n02095570','n02095889','n02096051','n02096177','n02096294','n02096437','n02096585',
                 'n02097047','n02097130','n02097209','n02097298','n02097474','n02097658','n02098105','n02098286','n02098413',
                 'n02099267','n02099429','n02099601','n02099712','n02099849','n02100236','n02100583','n02100735','n02100877',
                 'n02101006','n02101388','n02101556','n02102040','n02102177','n02102318','n02102480','n02102973','n02104029',
                 'n02104365','n02105056','n02105162','n02105251','n02105412','n02105505','n02105641','n02105855','n02106030',
                 'n02106166','n02106382','n02106550','n02106662','n02107142','n02107312','n02107574','n02107683','n02107908',
                 'n02108000','n02108089','n02108422','n02108551','n02108915','n02109047','n02109525','n02109961','n02110063',
                 'n02110185','n02110341','n02110627','n02110806','n02110958','n02111129','n02111277','n02111500','n02111889',
                 'n02112018','n02112137','n02112350','n02112706','n02113023','n02113186','n02113624','n02113712','n02113799',
                 'n02113978']

## ImageNet中猫的种类
imagenet_cats=['n02123045','n02123159','n02123394','n02123597','n02124075','n02125311','n02127052']

In [3]:
names = os.listdir(common.train_path)

In [4]:
def get_exception_images(model, target_size, preprocess_func, decode_func):
    exception_names = []
    for name in names:
        img = image.load_img(common.train_path + name, target_size=target_size)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_func(x)
        preds = model.predict(x)
        
        ## 通过预训练模型对图片进行预测，并根据其前30个结果进行判断，如果标记为猫
        ## 其结果没有ImageNet中猫的种类，或者标记为狗其结果没有ImageNet中狗的种类,
        ## 则视为异常图片
        category_indexs,category_name,prop = zip(*decode_func(preds, top=30)[0])
        if name.startswith('cat'):
            if np.sum([1 for category_index in category_indexs if category_index in imagenet_cats]) < 1:
                exception_names.append(name)
        elif name.startswith('dog'):
            if np.sum([1 for category_index in category_indexs if category_index in imagenet_dogs]) < 1:
                exception_names.append(name)
    return exception_names

In [5]:
model = VGG16(weights='imagenet')
vgg16_exception_images = get_exception_images(model, (224, 224), vgg16_pre, vgg16_decode)

In [7]:
model = VGG19(weights='imagenet')
vgg19_exception_images = get_exception_images(model, (224, 224), vgg19_pre, vgg19_decode)

In [8]:
model = InceptionV3(weights='imagenet', include_top=True)
inceptionV3_exception_images = get_exception_images(model, (299, 299), inceptionV3_pre, inceptionV3_decode)

In [9]:
model = Xception(weights='imagenet')
xception_exception_images = get_exception_images(model, (299, 299), xception_pre, xception_decode)

In [10]:
model = ResNet50(weights='imagenet')
resnet50_exception_images = get_exception_images(model, (224, 224), resnet50_pre, resnet50_decode)

In [11]:
## 取5种模型预测的交集
exception_images = reduce(np.intersect1d, (vgg16_exception_images, vgg19_exception_images, inceptionV3_exception_images, 
       xception_exception_images,resnet50_exception_images))

In [12]:
## 5种模型都无法识别出来是猫或狗
print(exception_images)

['cat.10365.jpg' 'cat.10636.jpg' 'cat.10712.jpg' 'cat.11184.jpg'
 'cat.11565.jpg' 'cat.12227.jpg' 'cat.12272.jpg' 'cat.12424.jpg'
 'cat.12493.jpg' 'cat.2337.jpg' 'cat.252.jpg' 'cat.2520.jpg'
 'cat.2893.jpg' 'cat.2939.jpg' 'cat.3216.jpg' 'cat.3731.jpg'
 'cat.3868.jpg' 'cat.4338.jpg' 'cat.4688.jpg' 'cat.4833.jpg'
 'cat.4852.jpg' 'cat.503.jpg' 'cat.5351.jpg' 'cat.5418.jpg' 'cat.6429.jpg'
 'cat.6442.jpg' 'cat.6699.jpg' 'cat.7377.jpg' 'cat.7564.jpg'
 'cat.7920.jpg' 'cat.7968.jpg' 'cat.8383.jpg' 'cat.8456.jpg'
 'cat.8470.jpg' 'cat.9090.jpg' 'cat.9171.jpg' 'cat.9444.jpg'
 'dog.10161.jpg' 'dog.10190.jpg' 'dog.10237.jpg' 'dog.10801.jpg'
 'dog.12376.jpg' 'dog.1773.jpg' 'dog.1895.jpg' 'dog.2422.jpg'
 'dog.4367.jpg' 'dog.5604.jpg' 'dog.6475.jpg' 'dog.8736.jpg'
 'dog.9517.jpg']


In [14]:
## kaggle上讨论区给出的错误图片
error_images = ['cat.3216.jpg', 'cat.7377.jpg', 'cat.8456.jpg', 'cat.7564.jpg',
              'cat.9171.jpg', 'cat.4688.jpg', 'cat.4085.jpg', 'cat.5351.jpg', 'cat.5418.jpg', 'cat.11184.jpg', 
              'cat.10029.jpg', 'dog.1043.jpg', 'dog.1773.jpg', 'dog.4367.jpg', 'dog.8736.jpg', 'dog.8898.jpg', 'dog.10237.jpg']

In [21]:
## 最终判断为异常图片的图片集
exception_images = reduce(np.union1d, (exception_images, error_images))
print(exception_images)

['cat.10029.jpg' 'cat.10365.jpg' 'cat.10636.jpg' 'cat.10712.jpg'
 'cat.11184.jpg' 'cat.11565.jpg' 'cat.12227.jpg' 'cat.12272.jpg'
 'cat.12424.jpg' 'cat.12493.jpg' 'cat.2337.jpg' 'cat.252.jpg'
 'cat.2520.jpg' 'cat.2893.jpg' 'cat.2939.jpg' 'cat.3216.jpg'
 'cat.3731.jpg' 'cat.3868.jpg' 'cat.4085.jpg' 'cat.4338.jpg'
 'cat.4688.jpg' 'cat.4833.jpg' 'cat.4852.jpg' 'cat.503.jpg' 'cat.5351.jpg'
 'cat.5418.jpg' 'cat.6429.jpg' 'cat.6442.jpg' 'cat.6699.jpg'
 'cat.7377.jpg' 'cat.7564.jpg' 'cat.7920.jpg' 'cat.7968.jpg'
 'cat.8383.jpg' 'cat.8456.jpg' 'cat.8470.jpg' 'cat.9090.jpg'
 'cat.9171.jpg' 'cat.9444.jpg' 'dog.10161.jpg' 'dog.10190.jpg'
 'dog.10237.jpg' 'dog.1043.jpg' 'dog.10801.jpg' 'dog.12376.jpg'
 'dog.1773.jpg' 'dog.1895.jpg' 'dog.2422.jpg' 'dog.4367.jpg'
 'dog.5604.jpg' 'dog.6475.jpg' 'dog.8736.jpg' 'dog.8898.jpg'
 'dog.9517.jpg']


In [19]:
for name in exception_images:
    if os.path.exists(common.train_path + name):
        os.remove(common.train_path + name)