In [1]:
# Verify that the 120 dog breeds
# are in the default imagenet categories

# https://arxiv.org/pdf/1512.03385.pdf Quote
'''
4. Experiments
4.1. ImageNet Classification
We evaluate our method on the ImageNet 2012 classifi-
cation dataset [36] that consists of 1000 classes. The models
are trained on the 1.28 million training images, and evaluated
on the 50k validation images. We also obtain a final
result on the 100k test images, reported by the test server.
We evaluate both top-1 and top-5 error rates.
'''

# keras docs on resnet50
# https://keras.io/applications/#resnet50
'''
ResNet50
keras.applications.resnet50.ResNet50(..., classes=1000)
ResNet50 model, with weights pre-trained on ImageNet.

...

classes: optional number of classes to classify images into, only to be specified if include_top is True, and if no weights argument is specified.

'''

Using TensorFlow backend.


In [12]:
# https://www.kaggle.com/c/dog-breed-identification/data
dog_breeds = open('./dog_breeds.txt').readlines()

In [13]:
dog_breeds

['affenpinscher\n',
 'afghan_hound\n',
 'african_hunting_dog\n',
 'airedale\n',
 'american_staffordshire_terrier\n',
 'appenzeller\n',
 'australian_terrier\n',
 'basenji\n',
 'basset\n',
 'beagle\n',
 'bedlington_terrier\n',
 'bernese_mountain_dog\n',
 'black-and-tan_coonhound\n',
 'blenheim_spaniel\n',
 'bloodhound\n',
 'bluetick\n',
 'border_collie\n',
 'border_terrier\n',
 'borzoi\n',
 'boston_bull\n',
 'bouvier_des_flandres\n',
 'boxer\n',
 'brabancon_griffon\n',
 'briard\n',
 'brittany_spaniel\n',
 'bull_mastiff\n',
 'cairn\n',
 'cardigan\n',
 'chesapeake_bay_retriever\n',
 'chihuahua\n',
 'chow\n',
 'clumber\n',
 'cocker_spaniel\n',
 'collie\n',
 'curly-coated_retriever\n',
 'dandie_dinmont\n',
 'dhole\n',
 'dingo\n',
 'doberman\n',
 'english_foxhound\n',
 'english_setter\n',
 'english_springer\n',
 'entlebucher\n',
 'eskimo_dog\n',
 'flat-coated_retriever\n',
 'french_bulldog\n',
 'german_shepherd\n',
 'german_short-haired_pointer\n',
 'giant_schnauzer\n',
 'golden_retriever\n',

In [14]:
# data munge, drop newlines, drop underscores
dog_breeds = [dog[:-1].replace('_', ' ') for dog in dog_breeds]

In [15]:
dog_breeds

['affenpinscher',
 'afghan hound',
 'african hunting dog',
 'airedale',
 'american staffordshire terrier',
 'appenzeller',
 'australian terrier',
 'basenji',
 'basset',
 'beagle',
 'bedlington terrier',
 'bernese mountain dog',
 'black-and-tan coonhound',
 'blenheim spaniel',
 'bloodhound',
 'bluetick',
 'border collie',
 'border terrier',
 'borzoi',
 'boston bull',
 'bouvier des flandres',
 'boxer',
 'brabancon griffon',
 'briard',
 'brittany spaniel',
 'bull mastiff',
 'cairn',
 'cardigan',
 'chesapeake bay retriever',
 'chihuahua',
 'chow',
 'clumber',
 'cocker spaniel',
 'collie',
 'curly-coated retriever',
 'dandie dinmont',
 'dhole',
 'dingo',
 'doberman',
 'english foxhound',
 'english setter',
 'english springer',
 'entlebucher',
 'eskimo dog',
 'flat-coated retriever',
 'french bulldog',
 'german shepherd',
 'german short-haired pointer',
 'giant schnauzer',
 'golden retriever',
 'gordon setter',
 'great dane',
 'great pyrenees',
 'greater swiss mountain dog',
 'groenendael',


In [16]:
# the keras resnet50 application docs indicate the 1000 classes are the ones from the 2012
# imagenet challenge
# http://image-net.org/challenges/LSVRC/2012/browse-synsets
image_net_classes = open('./imagenet_2012_classes.txt').readlines()

In [20]:
# data munge
image_net_classes = [category.lower() for category in image_net_classes] 

In [21]:
image_net_classes

['kit fox, vulpes macrotis \n',
 'english setter \n',
 'australian terrier \n',
 'grey whale, gray whale, devilfish, eschrichtius gibbosus, eschrichtius robustus \n',
 'lesser panda, red panda, panda, bear cat, cat bear, ailurus fulgens \n',
 'egyptian cat \n',
 'ibex, capra ibex \n',
 'persian cat \n',
 'cougar, puma, catamount, mountain lion, painter, panther, felis concolor \n',
 'gazelle \n',
 'porcupine, hedgehog \n',
 'sea lion \n',
 'badger \n',
 'great dane \n',
 'scottish deerhound, deerhound \n',
 'killer whale, killer, orca, grampus, sea wolf, orcinus orca \n',
 'mink \n',
 'african elephant, loxodonta africana \n',
 'red wolf, maned wolf, canis rufus, canis niger \n',
 'jaguar, panther, panthera onca, felis onca \n',
 'hyena, hyaena \n',
 'titi, titi monkey \n',
 'three-toed sloth, ai, bradypus tridactylus \n',
 'sorrel \n',
 'black-footed ferret, ferret, mustela nigripes \n',
 'dalmatian, coach dog, carriage dog \n',
 'staffordshire bullterrier, staffordshire bull terrier 

In [22]:
# define helper function to see if dog breed in imagenet 2012 list of categories
def dog_in_list(dog, lst):
    for category in lst:
        if category.find(dog) != -1:
            return True
    return False

In [23]:
# sanitycheck
dog_in_list('australian terrier', image_net_classes)

True

In [24]:
# kaggle challenge dogs not in imagenet 2012 categories
for dog in dog_breeds:
    if not dog_in_list(dog, image_net_classes):
        print(dog)

In [26]:
# sanitycheck
for dog in dog_breeds + ['foobar']:
    if not dog_in_list(dog, image_net_classes):
        print(dog)

foobar
