# Inception_v3 AKA GoogleNet_v3

Load the initial pre trained model from pytorch

In [1]:
import torch
model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
model.eval()

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 281MB/s] 


Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

Download an example image of a dog



In [2]:
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

Transform the image as needed for the model

*All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least 299. The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225]*

In [3]:
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

Configure and run the model, print the probabilities

In [4]:
# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
  output = model(input_batch)
# Tensor of shape 1000, with confidence scores over ImageNet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)
print(probabilities)

tensor([ 1.5651e-01,  1.1059e-01, -4.4388e-01, -6.1264e-02, -2.0064e-01,
         1.6754e-01,  5.8188e-01,  6.2146e-02, -7.6539e-02, -1.1179e+00,
        -2.1889e-01, -5.2645e-01, -1.1834e-01,  6.6330e-02,  8.4940e-01,
        -5.3192e-02, -4.7105e-01, -1.8546e-03,  2.7567e-01,  1.6968e-01,
         4.4267e-01, -3.8555e-01,  1.3717e-01, -5.2117e-01,  5.6665e-04,
        -2.3551e-01, -2.3237e-01, -7.9603e-03,  1.7020e-02, -1.2653e-01,
         2.0522e-01, -7.2862e-03,  1.3082e+00, -2.3156e-01,  9.9151e-01,
        -7.7928e-01,  4.8959e-01, -4.2294e-01, -2.6668e-01, -1.4976e-01,
         3.5121e-01, -1.7511e-01,  1.0286e-01,  4.2324e-01, -1.4387e-01,
         1.3311e-02, -4.6578e-01,  3.6011e-01,  1.5231e-01,  2.0301e-01,
        -1.3625e-01,  1.4966e-01,  2.8367e-01, -3.7219e-01,  9.7788e-01,
         7.9550e-01, -7.4014e-01, -7.8319e-02, -9.5705e-02,  1.2125e-01,
        -1.2760e-01,  3.6893e-01, -4.5279e-01, -7.4862e-01, -8.0980e-02,
         2.1753e-01,  2.7076e-01,  1.0095e+00,  4.0

Transform probabilities into actual labels

In [5]:
# Download ImageNet labels
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]
# Show top categories per image
top5_prob, top5_catid = torch.topk(probabilities, 5)
for i in range(top5_prob.size(0)):
    print(categories[top5_catid[i]], top5_prob[i].item())

--2024-07-22 09:10:34--  https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10472 (10K) [text/plain]
Saving to: ‘imagenet_classes.txt’


2024-07-22 09:10:35 (103 MB/s) - ‘imagenet_classes.txt’ saved [10472/10472]

Samoyed 0.8236806392669678
Arctic fox 0.01424469519406557
white wolf 0.012371598742902279
Pomeranian 0.007867556065320969
keeshond 0.006377127952873707
