In [2]:
from load_data import data_transform, FacialKeypointsDataset  # 导入自定义的人脸关键点数据类
from model import get_net  # 导入网络模型，也可以在该文件中定义


import torchvision
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

import torch.nn as nn
import torch.optim as optim
import torch
from d2l import torch as d2l

**可视化测试结果**

In [3]:
def net_sample_output(test_loader, net):

    # iterate through the test dataset
    for i, sample in enumerate(test_loader):

        # get sample data：images and ground truth keypoints
        images = sample['image']
        key_pts = sample['keypoints']

        # convert images to FloatTensors
        images, key_pts = images.to(devices[0]), key_pts.to(devices[0])

        key_pts = key_pts.view(key_pts.size(0), -1)

        # forward pass to get net output
        output_pts = net(images)
        
        l = loss(key_pts, output_pts)
        print(l / key_pts.size(0))

        # reshape to batch_size x 68 x 2
        output_pts = output_pts.view(output_pts.size()[0], 68, -1)

        # break after first batch image is tested
        if i == 0:
            return images, output_pts, key_pts


def show_all_keypoints(image, predicted_key_pts, gt_pts=None):
    """Show image with predicted keypoints"""
    # image is grayscale
    plt.imshow(image, cmap='gray')
    plt.scatter(predicted_key_pts[:, 0], predicted_key_pts[:, 1], s=20, marker='.', c='m')
    # plot ground truth points as green pts
    if gt_pts is not None:
        plt.scatter(gt_pts[:, 0], gt_pts[:, 1], s=20, marker='.', c='g')


# visualize the output
# by default this shows a batch of 10 images
def visualize_output(test_images, test_outputs, gt_pts=None, batch_size=10):
    plt.figure(figsize=(16, 8))
    for i in range(batch_size):
        ax = plt.subplot(1, batch_size, i + 1)

        # un-transform the image data
        image = test_images[i].data  # get the image from it's wrapper
        image = image.cpu().numpy()  # convert to numpy array from a Tensor
        image = np.transpose(image, (1, 2, 0))  # transpose to go from torch to numpy image

        # un-transform the predicted key_pts data
        predicted_key_pts = test_outputs[i].data
        predicted_key_pts = predicted_key_pts.cpu().numpy()
        # undo normalization of keypoints
        predicted_key_pts = predicted_key_pts * 50.0 + 100

        # plot ground truth points for comparison, if they exist
        ground_truth_pts = None
        if gt_pts is not None:
            ground_truth_pts = gt_pts[i]
            ground_truth_pts = ground_truth_pts * 50.0 + 100

        # call show_all_keypoints
        show_all_keypoints(np.squeeze(image), predicted_key_pts, ground_truth_pts)

        plt.axis('off')

    plt.show()

**训练函数**

In [4]:
def train_net(num_epochs, net, train_iter,  lr, wd, devices, lr_period, lr_decay):

    # prepare the net for training
    net.train()

    # constructer a optimizer
    optimizer = torch.optim.Adam([{'params':net.features.parameters()},
                                 {'params':net.output_new.parameters(), 'lr':6e-3}],
                                lr = lr, weight_decay=wd)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, lr_period, lr_decay)
    num_batches, timer = len(train_iter), d2l.Timer()

    legend = ['train loss']

    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],
                            legend=legend)


    for epoch in range(num_epochs):
        metric = d2l.Accumulator(2)

        # train on batches of data, assumes you already have train_loader
        for batch_i, data in enumerate(train_loader):
            images = data['image']
            key_pts = data['keypoints']

            timer.start()
            images, key_pts = images.to(devices[0]), key_pts.to(devices[0])

            # flatten pts
            key_pts = key_pts.view(key_pts.size(0), -1).to(torch.float32)

            # forward pass to get outputs
            output_pts = net(images)

            # calculate the loss between predicted and target keypoints
            l = loss(output_pts, key_pts)
            l = l.to(torch.float32)

            # backward pass to calculate the weight gradients
            l.backward()

            # update the weights
            optimizer.step()

            # zero the parameter (weight) gradients
            optimizer.zero_grad()

            metric.add(l, key_pts.shape[0])

            timer.stop()

            # print loss statistics
            # to convert loss into a scalar and add it to the running_loss, use .item()
            if (batch_i+1) % (num_batches // 5) == 0 or batch_i == num_batches -1:    # print every 10 batches
                print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(epoch + 1, batch_i+1, metric[0]/metric[1]))
                animator.add(epoch + (batch_i+1)/num_batches, (metric[0]/metric[1], None))

        measures = f'train loss {metric[0] / metric[1]:.3f}'
        scheduler.step()

    print(measures + f'\n{metric[1] * num_epochs / timer.sum():.1f}'
                     f' examples/sec on {str(devices)}')

**定义网络模型**

In [5]:
def try_all_gpus():  #@save
    """返回所有可用的GPU，如果没有GPU，则返回[cpu(),]"""
    devices = [torch.device(f'cuda:{i}')
             for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

# 可以从models.py中导入也可以自己在这里写
def get_net(devices):
    finetune_net = nn.Sequential()
    finetune_net.features = torchvision.models.resnet101(pretrained=True)

    # 改变输出层
    finetune_net.output_new = nn.Sequential(nn.Linear(1000, 256),
                                            nn.ReLU(),
                                            nn.Linear(256, 136))
    finetune_net.features.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

    # 模型参数转到cpu或可用的gpu上
    finetune_net = finetune_net.to(devices[0])

    # 冻结参数
    # for param in finetune_net.features.parameters():
    #     param.requires_grad = False

    return finetune_net

**加载数据**

In [6]:
# create the transformed dataset
transformed_dataset = FacialKeypointsDataset(csv_file='data/training_frames_keypoints.csv',
                                                root_dir='data/training/',
                                                transform=data_transform)

# load training data in batches
batch_size = 16

train_loader = DataLoader(transformed_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=0)

# create the test dataset
test_dataset = FacialKeypointsDataset(csv_file='data/test_frames_keypoints.csv',
                                        root_dir='data/test/',
                                        transform=data_transform)

test_loader = DataLoader(test_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=0)

**定义超参数以及损失函数**

In [7]:
devices, num_epochs, lr, wd = try_all_gpus(), 10, 9e-5, 0

lr_period, lr_decay, net = 2, 0.95, get_net(devices)

# define the loss and optimization
loss = nn.SmoothL1Loss(reduction='sum')

**开始训练**

In [1]:
train_net(num_epochs, net, train_loader, lr, wd, devices, lr_period,
    lr_decay)


KeyboardInterrupt



**保存网络参数**

In [None]:
# save model's paraments
model_dir = 'saved_models/'
model_name = 'keypoints_model_2.pt'

# after training, save your model parameters in the dir 'saved_models'
torch.save(net.state_dict(), model_dir + model_name)

**加载网络参数**

In [None]:
# load net paramenters
state_dict = torch.load('./saved_models/keypoints_model_2.pt')
net.load_state_dict(state_dict)

**在测试集上进行测试与可视化**

In [None]:
test_images, test_outputs, gt_pts = net_sample_output(test_loader, net)
gt_pts = gt_pts.cpu().numpy()
gt_pts = gt_pts.reshape(gt_pts.shape[0], -1, 2)
# call it
visualize_output(test_images, test_outputs, gt_pts)