## 采用预处理模型剔除异常数据

In [1]:
import csv
import matplotlib.pyplot as plt

%matplotlib inline  

plt.style.use('seaborn-white')

img_size = (299, 299)

def get_imageNet_class(file_path):
    category_class = []
    with open(file_path, 'r') as f:
        reader = csv.reader(f)
        for line in reader:
            if line[1] == '狗' or line[1] == '猫':
                category_class.append(line[0])
    return category_class

imageNet_class = get_imageNet_class('ImageNetClasses.csv')

In [2]:
import os

os.chdir("{}/image".format(os.getcwd()))

In [8]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.applications.inception_v3 import preprocess_input
from keras.applications.inception_v3 import decode_predictions
import numpy as np
import random
from math import ceil

def get_outlier_predictions(train_path, model, img_size, top=10):
    outlier_predictions = {}
    targetnames = os.listdir(train_path)
    for name in targetnames[:]:
        img = image.load_img(train_path + name, target_size=img_size)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        preds = model.predict(x)
        predictions = decode_predictions(preds, top=top)[0]
        #if predictions not in imageNet_class:
        #    outlier_predictions.append(name)
        outlier_predictions[name] = predictions
            
    return outlier_predictions    

In [4]:
model = InceptionV3(weights='imagenet')

A local file was found, but it seems to be incomplete or outdated.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_tf_dim_ordering_tf_kernels.h5


In [9]:
outlier_list = get_outlier_predictions('outlier_test/', model, img_size, 10)

print(outlier_list)

{'dog.10.jpg': [('n02089867', 'Walker_hound', 0.31597313), ('n02088238', 'basset', 0.12977606), ('n02088632', 'bluetick', 0.12685494), ('n02107574', 'Greater_Swiss_Mountain_dog', 0.10331032), ('n02088364', 'beagle', 0.026519738), ('n02108000', 'EntleBucher', 0.026343489), ('n02109525', 'Saint_Bernard', 0.021485515), ('n02089973', 'English_foxhound', 0.020383401), ('n02107908', 'Appenzeller', 0.013605027), ('n02109047', 'Great_Dane', 0.0058515379)], 'cat.1.jpg': [('n02123045', 'tabby', 0.68273389), ('n02123159', 'tiger_cat', 0.15722224), ('n02124075', 'Egyptian_cat', 0.13042139), ('n02127052', 'lynx', 0.0055947555), ('n02123394', 'Persian_cat', 0.0010668333), ('n03958227', 'plastic_bag', 0.00086234766), ('n02971356', 'carton', 0.00055909768), ('n02123597', 'Siamese_cat', 0.00050864101), ('n02129604', 'tiger', 0.00048516353), ('n03223299', 'doormat', 0.00036668757)]}


In [None]:
def plt_outlier_img(outlier_list):
    plt.figure(figsize=(12, 20))
    for i in range(0, len(outlier_list)):
        plt.subplot(ceil(len(outlier_list) / 4), 4, i+1)
        img = image.load_img('train/'+ outlier_list[i])
        x = image.img_to_array(img)
        plt.title(outlier_list[i])
        #plt.axis('off')
        plt.tight_layout()
        plt.imshow(img)
        #shutil.move('train/' + outlier_list[i], 'outlier/' + outlier_list[i])

In [6]:
plt_outlier_img(outlier_list)

25000

In [10]:
test_dict = {'dog.10.jpg': [('n02089867', 'Walker_hound', 0.31597313), ('n02088238', 'basset', 0.12977606), ('n02088632', 'bluetick', 0.12685494), ('n02107574', 'Greater_Swiss_Mountain_dog', 0.10331032), ('n02088364', 'beagle', 0.026519738), ('n02108000', 'EntleBucher', 0.026343489), ('n02109525', 'Saint_Bernard', 0.021485515), ('n02089973', 'English_foxhound', 0.020383401), ('n02107908', 'Appenzeller', 0.013605027), ('n02109047', 'Great_Dane', 0.0058515379)], 'cat.1.jpg': [('n02123045', 'tabby', 0.68273389), ('n02123159', 'tiger_cat', 0.15722224), ('n02124075', 'Egyptian_cat', 0.13042139), ('n02127052', 'lynx', 0.0055947555), ('n02123394', 'Persian_cat', 0.0010668333), ('n03958227', 'plastic_bag', 0.00086234766), ('n02971356', 'carton', 0.00055909768), ('n02123597', 'Siamese_cat', 0.00050864101), ('n02129604', 'tiger', 0.00048516353), ('n03223299', 'doormat', 0.00036668757)]}

In [None]:
tes