# 完全采用 VGG 16 预先训练的模型

## 载入套件

In [49]:
import torch
from torchvision import models
from torch import nn
import numpy as np
from torchsummary import summary

## 检查 GPU

In [50]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
"cuda" if torch.cuda.is_available() else "cpu"

'cuda'

## 使用较简单的VGG模型

In [51]:
model = models.vgg16(pretrained=True)

## 显示神经层名称

In [52]:
children_counter = 0
for n,c in model.named_children():
    print("Children Counter: ",children_counter," Layer Name: ",n)
    children_counter+=1

Children Counter:  0  Layer Name:  features
Children Counter:  1  Layer Name:  avgpool
Children Counter:  2  Layer Name:  classifier


## 显示神经层明细

In [53]:
model.modules

<bound method Module.modules of VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2,

In [54]:
torch.nn.Sequential(*list(model.children())[:])

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [55]:
model._modules.keys()

odict_keys(['features', 'avgpool', 'classifier'])

In [56]:
model.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [57]:
model.features[0]

Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [58]:
model.classifier[-1].weight.shape

torch.Size([1000, 4096])

In [59]:
model.classifier[-1].out_features

1000

In [60]:
model = model.to(device)
summary(model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

## 预测

In [61]:
from PIL import Image
from torchvision import transforms

filename = './images_test/cat.jpg'
input_image = Image.open(filename)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])
input_tensor = transform(input_image)
input_batch = input_tensor.unsqueeze(0).to(device) # 增加一维(笔数)

# 预测
model.eval()
with torch.no_grad():
    output = model(input_batch)

# 转成机率
probabilities = torch.nn.functional.softmax(output[0], dim=0)
print(probabilities)

tensor([1.2919e-07, 1.5584e-07, 1.4383e-08, 3.3418e-08, 1.0265e-08, 4.9484e-07,
        3.7471e-09, 1.5671e-06, 4.1735e-05, 1.9252e-07, 1.1889e-06, 8.8801e-07,
        1.7900e-07, 2.2456e-07, 1.6816e-07, 5.1253e-07, 5.8631e-07, 3.2773e-06,
        1.4087e-07, 6.9014e-08, 2.2485e-08, 1.1658e-06, 1.4162e-08, 1.9906e-07,
        5.3443e-08, 1.3154e-07, 5.0066e-08, 2.2081e-08, 4.4614e-08, 8.9073e-08,
        2.1973e-07, 8.9349e-08, 4.9333e-07, 2.0631e-08, 2.4685e-08, 1.3508e-07,
        2.5747e-07, 1.1297e-07, 7.7027e-07, 6.1187e-07, 1.0071e-07, 9.4148e-08,
        1.4972e-07, 7.9715e-07, 7.1481e-08, 2.0998e-07, 3.1283e-07, 3.9553e-07,
        4.4294e-08, 2.4552e-08, 2.4496e-08, 8.8340e-07, 9.2596e-08, 4.1781e-08,
        4.0205e-07, 8.5802e-08, 1.0914e-07, 5.9416e-08, 1.0600e-07, 2.6743e-07,
        9.5958e-07, 8.0703e-08, 3.8257e-07, 2.2054e-06, 1.3449e-07, 7.1887e-08,
        4.2432e-06, 1.6962e-07, 6.2754e-07, 5.3072e-08, 9.3610e-08, 5.3698e-08,
        5.1644e-08, 1.3515e-07, 5.8192e-

In [62]:
# 显示最大机率的类别代码
print(f'{torch.argmax(probabilities).item()}: {torch.max(probabilities).item()}')

285: 0.6240770816802979


In [63]:
# 显示最大机率的类别名称
with open("imagenet.categories", "r") as f:
    # 取第一栏
    categories = [s.strip().split(',')[0] for s in f.readlines()]
categories[torch.argmax(probabilities).item()]

'Egyptian cat'

In [64]:
filename = './images_test/tiger2.jpg'
input_image = Image.open(filename)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])
input_tensor = transform(input_image)
input_batch = input_tensor.unsqueeze(0).to(device) # 增加一维(笔数)

# 预测
model.eval()
with torch.no_grad():
    output = model(input_batch)

# 转成机率
probabilities = torch.nn.functional.softmax(output[0], dim=0)
max_item = torch.argmax(probabilities).item()
print(f'{max_item} {categories[max_item]}: {torch.max(probabilities).item()}')

292 tiger: 0.9213505983352661


## 使用 resnet50 模型

In [65]:
# 显示最大机率的类别名称
with open("imagenet_classes.txt", "r") as f:
    # 取第一栏
    categories = [s.strip() for s in f.readlines()]

In [66]:
# 载入 resnet50 模型
model = models.resnet50(pretrained=True).to(device)

# 预测
filename = './images_test/cat.jpg'
input_image = Image.open(filename)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])
input_tensor = transform(input_image)
input_batch = input_tensor.unsqueeze(0).to(device) # 增加一维(笔数)

model.eval()
with torch.no_grad():
    output = model(input_batch)

# 转成机率
probabilities = torch.nn.functional.softmax(output[0], dim=0)
max_item = torch.argmax(probabilities).item()
print(f'{max_item} {categories[max_item]}: {torch.max(probabilities).item()}')

285 Egyptian cat: 0.26286613941192627


## 官网程式，转换先Resize(256)，再CenterCrop(224)

In [67]:
# 载入 resnet50 模型
model = models.resnet50(pretrained=True).to(device)

# 预测
filename = './images_test/cat.jpg'
input_image = Image.open(filename)

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])
input_tensor = transform(input_image)
input_batch = input_tensor.unsqueeze(0).to(device) # 增加一维(笔数)

model.eval()
with torch.no_grad():
    output = model(input_batch)

# 转成机率
probabilities = torch.nn.functional.softmax(output[0], dim=0)
max_item = torch.argmax(probabilities).item()
print(f'{max_item} {categories[max_item]}: {torch.max(probabilities).item()}')

281 tabby: 0.2819097936153412


In [68]:
# 显示前5名
top5_prob, top5_catid = torch.topk(probabilities, 5)
for i in range(top5_prob.size(0)):
    print(f'{categories[top5_catid[i]]:12s}:{top5_prob[i].item()}')

tabby       :0.2819097936153412
tiger cat   :0.19214917719364166
Egyptian cat:0.18028706312179565
lynx        :0.17349961400032043
hamper      :0.01312144286930561


In [69]:
sum(probabilities.cpu().numpy())

0.9999999149172965

In [70]:
import numpy as np
probabilities.cpu().numpy().argsort()[-5:][::-1]

array([281, 282, 285, 287, 588], dtype=int64)

In [71]:
np.array(categories)[probabilities.cpu().numpy().argsort()[-5:][::-1]]

array(['tabby', 'tiger cat', 'Egyptian cat', 'lynx', 'hamper'],
      dtype='<U30')

In [72]:
filename = './images_test/tiger2.jpg'
input_image = Image.open(filename)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])
input_tensor = transform(input_image)
input_batch = input_tensor.unsqueeze(0).to(device) # 增加一维(笔数)

# 预测
model.eval()
with torch.no_grad():
    output = model(input_batch)

# 转成机率
probabilities = torch.nn.functional.softmax(output[0], dim=0)
max_item = torch.argmax(probabilities).item()
print(f'{max_item} {categories[max_item]}: {torch.max(probabilities).item()}')

292 tiger: 0.9283719062805176
