## This shows how to the use the VGG16 with Batch Norm, Pre-trained model in Pytorch

#### 1. Import the model

In [1]:
from torchvision import models
import torch
# dir(models) # list of all available pytorch model for image recognition

VGG16_BN = models.vgg16_bn(pretrained=True)

#### 2. Load the image

In [2]:
# Import Pillow
from PIL import Image
img = Image.open("dog.jpg")

#### 3. Normalize and Load the image into pytorch tensor
    
    the tensor should follow the format CxHxW, channel, height, width

In [3]:
from torchvision import transforms
transform = transforms.Compose([
 transforms.Resize(256),
 transforms.CenterCrop(224),
 transforms.ToTensor(),
 transforms.Normalize(
 mean=[0.485, 0.456, 0.406],
 std=[0.229, 0.224, 0.225]
 )])

image = transform(img)
batch_image = torch.unsqueeze(image, 0)

image.shape

torch.Size([3, 224, 224])

#### 4. Set the network to evaluate mode

    this is needed to activate the pretrained model, and get the correct output

In [4]:
VGG16_BN.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

#### 5. Make a Prediction
    
    Pass the batch_image into the model to make a prediction

In [5]:
import time

start = time.time()

with torch.no_grad():
    out = VGG16_BN(batch_image)
    
end = time.time()

print('forward took ', (end-start),'seconds')
    
print(out.shape)

forward took  1.871323585510254 seconds
torch.Size([1, 1000])


#### 6. Load the Labels of the ImageNet dataset

In [6]:
with open('imagenet_classes.txt') as f:
    labels = [line.strip() for line in f.readlines()]

#### 7. Look at the top Preiction

In [7]:
_, index = torch.max(out, 1)

percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100

print(labels[index[0]], percentage[index[0]].item())

258, Samoyed 66.24825286865234


#### 8. Look at the top 5 predictions

In [8]:
_, indices = torch.sort(out, descending=True)
[(labels[idx], percentage[idx].item()) for idx in indices[0][:5]]

[('258, Samoyed', 66.24825286865234),
 ('259, Pomeranian', 17.07351303100586),
 ('270, white_wolf', 2.6645517349243164),
 ('261, keeshond', 2.022033452987671),
 ('231, collie', 1.6543662548065186)]