In [1]:
import glob
import os
import re

from PIL import Image
import torch
from torchvision import transforms

Refer for code: https://pytorch.org/hub/pytorch_vision_alexnet/

In [2]:
with open('./imagenet_classes.txt', 'r') as f:
    categories = [s.strip() for s in f.readlines()]

print(categories)

['tench', 'goldfish', 'great white shark', 'tiger shark', 'hammerhead', 'electric ray', 'stingray', 'cock', 'hen', 'ostrich', 'brambling', 'goldfinch', 'house finch', 'junco', 'indigo bunting', 'robin', 'bulbul', 'jay', 'magpie', 'chickadee', 'water ouzel', 'kite', 'bald eagle', 'vulture', 'great grey owl', 'European fire salamander', 'common newt', 'eft', 'spotted salamander', 'axolotl', 'bullfrog', 'tree frog', 'tailed frog', 'loggerhead', 'leatherback turtle', 'mud turtle', 'terrapin', 'box turtle', 'banded gecko', 'common iguana', 'American chameleon', 'whiptail', 'agama', 'frilled lizard', 'alligator lizard', 'Gila monster', 'green lizard', 'African chameleon', 'Komodo dragon', 'African crocodile', 'American alligator', 'triceratops', 'thunder snake', 'ringneck snake', 'hognose snake', 'green snake', 'king snake', 'garter snake', 'water snake', 'vine snake', 'night snake', 'boa constrictor', 'rock python', 'Indian cobra', 'green mamba', 'sea snake', 'horned viper', 'diamondback', 

In [3]:
# Path to all image files (jpg/jpeg)
data_path = '../data/*.j*'


# Creating a mapping in the form of filename: path
basename_path_mapping = {}

for file in glob.glob(data_path):
    basename_path_mapping[os.path.basename(file).split('.')[0]] = file
    
print(basename_path_mapping)

{'crab8': '../data/crab8.jpg', 'couch1': '../data/couch1.jpeg', 'rose3': '../data/rose3.jpg', 'mushroom5': '../data/mushroom5.jpeg', 'rose5': '../data/rose5.jpeg', 'rose6': '../data/rose6.jpg', 'rose4': '../data/rose4.jpeg', 'mushroom4': '../data/mushroom4.jpeg', 'rose7': '../data/rose7.jpg', 'mushroom8': '../data/mushroom8.jpeg', 'dinosaur8': '../data/dinosaur8.jpg', 'mushroom3': '../data/mushroom3.jpeg', 'aquariumfish8': '../data/aquariumfish8.jpg', 'couch7': '../data/couch7.jpeg', 'couch6': '../data/couch6.jpeg', 'rose2': '../data/rose2.jpeg', 'mushroom2': '../data/mushroom2.jpeg', 'aquariumfish1': '../data/aquariumfish1.jpg', 'mushroom1': '../data/mushroom1.jpeg', 'couch5': '../data/couch5.jpeg', 'aquariumfish2': '../data/aquariumfish2.jpg', 'aquariumfish3': '../data/aquariumfish3.jpg', 'aquariumfish7': '../data/aquariumfish7.jpg', 'aquariumfish6': '../data/aquariumfish6.jpg', 'couch4': '../data/couch4.jpeg', 'aquariumfish4': '../data/aquariumfish4.jpg', 'aquariumfish5': '../data/a

In [4]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
model.eval()

Using cache found in /Users/alindjain11/.cache/torch/hub/pytorch_vision_v0.10.0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [5]:
# Traversing each image and getting top 15 probabilities using ViT
for label, path in basename_path_mapping.items():
    input_image = Image.open(path)
    preprocess = transforms.Compose([
        transforms.Resize((384, 384)),
        transforms.ToTensor(),
        transforms.Normalize(0.5, 0.5),
    ])
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

    # move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        model.to('cuda')

    with torch.no_grad():
        output = model(input_batch)
    # Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
    # print(output[0])
    # The output has unnormalized scores. To get probabilities, you can run a softmax on it.
    probabilities = torch.nn.functional.softmax(output[0], dim=0)
    # print(probabilities)
    print('-----------------')
    print(label)
    print('-----------------')
    top15_prob, top15_catid = torch.topk(probabilities, 15)
    for i in range(top15_prob.size(0)):
        print(categories[top15_catid[i]], top15_prob[i].item())
    print()
    print()

-----------------
crab8
-----------------
fiddler crab 0.4847469925880432
leafhopper 0.17952479422092438
hermit crab 0.09018038958311081
cricket 0.021281231194734573
conch 0.020605843514204025
scorpion 0.016891181468963623
wolf spider 0.01387973316013813
isopod 0.011847618967294693
lacewing 0.009972975589334965
barn spider 0.009387830272316933
cicada 0.0059675551019608974
mantis 0.005773933604359627
grasshopper 0.005317923612892628
tick 0.005073176696896553
horned viper 0.0049798679538071156


-----------------
couch1
-----------------
studio couch 0.5277934670448303
folding chair 0.10098237544298172
rocking chair 0.0582786463201046
cradle 0.0535900741815567
pedestal 0.040155377238988876
mousetrap 0.01635017618536949
fire screen 0.015161354094743729
chest 0.014442292973399162
plate rack 0.013153658248484135
wardrobe 0.010898173786699772
dining table 0.009402037598192692
file 0.009053438901901245
table lamp 0.008646270260214806
panpipe 0.007474792189896107
desk 0.006625564303249121


--

-----------------
mushroom1
-----------------
agaric 0.2545754909515381
prayer rug 0.026227999478578568
mushroom 0.02426217310130596
croquet ball 0.01888134889304638
soccer ball 0.018002796918153763
plastic bag 0.017392953857779503
earthstar 0.017370129004120827
bolete 0.01628468930721283
pot 0.01024589128792286
shopping basket 0.009775780141353607
golf ball 0.009055284783244133
chainlink fence 0.008665094152092934
hen-of-the-woods 0.00822183396667242
umbrella 0.0075928447768092155
birdhouse 0.007515794597566128


-----------------
couch5
-----------------
whistle 0.0476926751434803
pill bottle 0.04623160883784294
screw 0.0449608713388443
spotlight 0.03497586399316788
spatula 0.029732516035437584
nipple 0.02887340821325779
thimble 0.021624688059091568
pick 0.020351765677332878
Band Aid 0.01863970048725605
lotion 0.01845749281346798
rubber eraser 0.018350988626480103
hair slide 0.01541473250836134
switch 0.015381185337901115
mortar 0.014841715805232525
pencil sharpener 0.012160448357462

-----------------
mushroom7
-----------------
golf ball 0.13733738660812378
agaric 0.06407371908426285
Indian cobra 0.0638052299618721
croquet ball 0.04863130301237106
acorn 0.038484375923871994
mushroom 0.028142161667346954
prairie chicken 0.02569478005170822
baseball 0.02476804330945015
electric ray 0.0214252769947052
partridge 0.02070583589375019
stinkhorn 0.018520772457122803
brain coral 0.017239253968000412
sombrero 0.014159150421619415
honeycomb 0.013884241692721844
hen-of-the-woods 0.013712313957512379


-----------------
dinosaur7
-----------------
triceratops 0.19268618524074554
frilled lizard 0.09283135086297989
hippopotamus 0.07208016514778137
mongoose 0.0421772375702858
Komodo dragon 0.035403624176979065
Chesapeake Bay retriever 0.028893103823065758
Indian elephant 0.02703404426574707
baboon 0.021488789469003677
African elephant 0.018985560163855553
common iguana 0.01868784800171852
tusker 0.017965808510780334
marmoset 0.012654021382331848
macaque 0.01106947474181652
partri

In [6]:
# Create a mapping of class items to super class
classes = {
    'dinosaur': [51],
    'couch': [831],
    'mushroom': [947, 992, 993, 994, 995, 996, 997],
    'crab': [118, 119, 120, 121, 125],
    'aquariumfish': [1, 5, 107, 123, 124, 327, 395, 396, 397],
    'rose': []
}

# Displaying the names of the classes in imagenet
for key, value in classes.items():
    print('{}: {}'.format(key, [categories[idx] for idx in value]))

dinosaur: ['triceratops']
couch: ['studio couch']
mushroom: ['mushroom', 'agaric', 'gyromitra', 'stinkhorn', 'earthstar', 'hen-of-the-woods', 'bolete']
crab: ['Dungeness crab', 'rock crab', 'fiddler crab', 'king crab', 'hermit crab']
aquariumfish: ['goldfish', 'electric ray', 'jellyfish', 'spiny lobster', 'crayfish', 'starfish', 'gar', 'lionfish', 'puffer']
rose: []


In [7]:
# Traversing each image and getting top 15 probabilities using ViT
for label, path in basename_path_mapping.items():
    input_image = Image.open(path)
    preprocess = transforms.Compose([
        transforms.Resize((384, 384)),
        transforms.ToTensor(),
        transforms.Normalize(0.5, 0.5),
    ])
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

    # move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        model.to('cuda')

    with torch.no_grad():
        output = model(input_batch)
    # Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
    # print(output[0])
    # The output has unnormalized scores. To get probabilities, you can run a softmax on it.
    probabilities = torch.nn.functional.softmax(output[0], dim=0)
    
    # Getting the class probability for the image
    image_class = re.sub(r'\d', '', label)
    print('{}: {}'.format(label, sum([probabilities[idx] for idx in classes[image_class]])))

crab8: 0.5801572799682617
couch1: 0.5277934670448303
rose3: 0
mushroom5: 0.4125806391239166
rose5: 0
rose6: 0
rose4: 0
mushroom4: 0.2643871307373047
rose7: 0
mushroom8: 0.01379454042762518
dinosaur8: 0.08868378400802612
mushroom3: 0.02413623034954071
aquariumfish8: 0.05989142507314682
couch7: 0.007264884654432535
couch6: 0.0020429962314665318
rose2: 0
mushroom2: 0.05683346837759018
aquariumfish1: 0.1842738687992096
mushroom1: 0.32866334915161133
couch5: 0.0033420086838304996
aquariumfish2: 0.0020575884263962507
aquariumfish3: 0.01684151031076908
aquariumfish7: 0.04826376959681511
aquariumfish6: 0.6911879181861877
couch4: 0.0760180875658989
aquariumfish4: 0.316579133272171
aquariumfish5: 0.05061708390712738
couch8: 0.05742481350898743
dinosaur5: 0.0028989610727876425
crab1: 0.5912103652954102
couch3: 0.013202755711972713
dinosaur4: 0.0021999634336680174
dinosaur6: 0.38452383875846863
crab2: 0.7931938171386719
crab3: 0.03033953718841076
mushroom7: 0.14764738082885742
dinosaur7: 0.1926861