## 使用开源的ImageNet上预先训练好的神经网络来进行图像识别

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import datasets, transforms, models

import helper

## 卷积神经网络

大多数预先训练的模型要求输入为 224x224 像素的图像。同样地，我们需要匹配训练模型时进行的标准化。每个颜色通道都分别进行了标准化，均值为 `[0.485, 0.456, 0.406]`，标准差为 `[0.229, 0.224, 0.225]`。

In [2]:
data_dir = '../../../Cat_Dog_data'

train_transform = transforms.Compose([transforms.RandomRotation(30),  ## 随机旋转30°
                                      transforms.RandomResizedCrop(224),  ## 224x224
                                      transforms.RandomHorizontalFlip(),  ## 随机裁剪
                                      transforms.ToTensor(), ## 转换为张量
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

## 测试集不需要训练集的部分转换
test_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                                           [0.229, 0.224, 0.225])])

train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transform)
test_data = datasets.ImageFolder(data_dir + '/test', transform=test_transform)

trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=32)

我们可以载入一个模型，比如 [DenseNet](http://pytorch.org/docs/0.3.0/torchvision/models.html#id5)。现在让我们打印出这个模型的结构，以便了解细节。

In [3]:
model = models.densenet121(pretrained=True)
model

  nn.init.kaiming_normal(m.weight.data)


DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplac

In [2]:
model1 = models.vgg13(pretrained=True)
model1

Downloading: "https://download.pytorch.org/models/vgg13-c768596a.pth" to C:\Users\Zhao.LJ/.torch\models\vgg13-c768596a.pth
99.7%

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (15): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU(inplace)
    (17): Conv2d

从上面的输出可以看出，模型分两部分，特征层features和分类器classifier，特征层是一堆卷积层，作为特征检测器输入到分类器中；分类器是根据ImageNet上的数据训练的，所以${\color{red}{不能}}$用于解决我们指定的问题，我们需要替换这个分类器

In [4]:
## 不需要梯度，不更新特征参数
## 因为使用已经训练好的特征参数
for param in model.parameters():
    param.requires_grad = False

## 分类器
from collections import OrderedDict


classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(1024, 500)),  # 模型的特征检测器输入到分类器为1024个结点
                                       ('relu', nn.ReLU()),
                                       ('fc2', nn.Linear(500, 2)),  # 输出有2个，猫或狗
                                       ('output', nn.LogSoftmax(dim=1))]))
model.classifier = classifier

## 训练分类器

比较在GPU和CPU上训练的差别

In [5]:
import time

In [6]:
for cuda in [False, True]:

    criterion = nn.NLLLoss()
    # Only train the classifier parameters, feature parameters are frozen
    optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

    if cuda:
        # 移动模型参数到GPU，在GPU上运行
        model.cuda()
    else:
        model.cpu()

    for ii, (inputs, labels) in enumerate(trainloader):
        inputs, labels = Variable(inputs), Variable(labels)

        if cuda:
            # Move input and label tensors to the GPU
            inputs, labels = inputs.cuda(), labels.cuda()

        start = time.time()

        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if ii==3:
            break
        
    print(f"CUDA = {cuda}; Time per batch: {(time.time() - start)/3:.3f} seconds")

CUDA = False; Time per batch: 14.450 seconds
CUDA = True; Time per batch: 0.030 seconds


## 整体训练

In [7]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

In [8]:
epochs = 3
print_every = 40
steps = 0

model.cuda()

for e in range(epochs):
    running_loss = 0
    for ii, (inputs, labels) in enumerate(trainloader):
        steps += 1
        inputs, labels = inputs.cuda(), labels.cuda()
        
        optimizer.zero_grad()
        
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if steps % print_every == 0:
            print('Epoch: {}/{}'.format(e + 1, epochs),
                 "Loss: {:.4f}".format(running_loss / print_every))
            running_loss = 0

Epoch: 1/3 Loss: 0.2009
Epoch: 1/3 Loss: 0.2141
Epoch: 1/3 Loss: 0.1600
Epoch: 1/3 Loss: 0.1550
Epoch: 1/3 Loss: 0.1490
Epoch: 1/3 Loss: 0.2046
Epoch: 1/3 Loss: 0.1646
Epoch: 1/3 Loss: 0.1635
Epoch: 2/3 Loss: 0.0302
Epoch: 2/3 Loss: 0.1417
Epoch: 2/3 Loss: 0.1421
Epoch: 2/3 Loss: 0.1329
Epoch: 2/3 Loss: 0.1547
Epoch: 2/3 Loss: 0.1341
Epoch: 2/3 Loss: 0.1420
Epoch: 2/3 Loss: 0.1521
Epoch: 2/3 Loss: 0.1376
Epoch: 3/3 Loss: 0.0576
Epoch: 3/3 Loss: 0.1581
Epoch: 3/3 Loss: 0.1287
Epoch: 3/3 Loss: 0.1451
Epoch: 3/3 Loss: 0.1320
Epoch: 3/3 Loss: 0.1400
Epoch: 3/3 Loss: 0.1299
Epoch: 3/3 Loss: 0.1361
Epoch: 3/3 Loss: 0.1602


## 测试

In [10]:
correct = 0
total = 0
model.cpu()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 50 %


In [None]:
def do_deep_learning(model, trainloader, epochs, print_every, criterion, optimizer, device='cpu'):
    epochs = epochs
    print_every = print_every
    steps = 0

    # change to cuda
    model.to('cuda')

    for e in range(epochs):
        running_loss = 0
        for ii, (inputs, labels) in enumerate(trainloader):
            steps += 1

            inputs, labels = inputs.to('cuda'), labels.to('cuda')

            optimizer.zero_grad()

            # Forward and backward passes
            outputs = model.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if steps % print_every == 0:
                print("Epoch: {}/{}... ".format(e+1, epochs),
                      "Loss: {:.4f}".format(running_loss/print_every))

                running_loss = 0
    
def check_accuracy_on_test(testloader):    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
    
    
do_deep_learning(model, trainloader, 3, 40, criterion, optimizer, 'gpu')
check_accuracy_on_test(testloader)