In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [5]:
import argparse
import os
import numpy as np
import pandas as pd

In [6]:
from dataloaders import make_data_loader, make_data_loader2
from modeling.deeplab import *

#### 构建args

In [11]:
parser = argparse.ArgumentParser(description="PyTorch DeeplabV3Plus Training")
parser.add_argument('--backbone', type=str, default='mobilenet',
                    choices=['resnet', 'xception', 'drn', 'mobilenet'],
                    help='backbone name (default: resnet)')
parser.add_argument('--out-stride', type=int, default=16,
                    help='network output stride (default: 8)')
parser.add_argument('--dataset', type=str, default='apollo',
                    choices=['apollo'],
                    help='dataset name (default: apollo)')

parser.add_argument('--sync-bn', type=bool, default=None,
                    help='whether to use sync bn (default: auto)')
parser.add_argument('--freeze-bn', type=bool, default=False,
                    help='whether to freeze bn parameters (default: False)')
parser.add_argument('--loss-type', type=str, default='diceplusce',
                    choices=['ce', 'focal', 'dice', 'diceplusce'],
                    help='loss func type (default: ce)')
# training hyper params
parser.add_argument('--batch-size', type=int, default=None,
                    metavar='N', help='input batch size for \
                            training (default: auto)')
parser.add_argument('--test-batch-size', type=int, default=None,
                    metavar='N', help='input batch size for \
                                testing (default: auto)')

# cuda, seed and logging
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--gpu-ids', type=str, default='0',
                    help='use which gpu to train, must be a \
                    comma-separated list of integers only (default=0)')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
# checking point
parser.add_argument('--resume', type=str, default=None,
                    help='put the path to resuming file if needed')
parser.add_argument('--checkname', type=str, default=None,
                    help='set the checkpoint name')
# finetuning pre-trained models
parser.add_argument('--ft', action='store_true', default=False,
                    help='finetuning on a different dataset')

args = parser.parse_args([])
args.cuda = not args.no_cuda and torch.cuda.is_available()
if args.cuda:
    try:
        args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')]
    except ValueError:
        raise ValueError('Argument --gpu_ids must be a comma-separated list of integers only')

if args.sync_bn is None:
    if args.cuda and len(args.gpu_ids) > 1:
        args.sync_bn = True
    else:
        args.sync_bn = False


if args.batch_size is None:
    args.batch_size = 4 * len(args.gpu_ids)  # 此处修改batch_size

if args.test_batch_size is None:
    args.test_batch_size = args.batch_size


#### 首先构建输入prediction和target,　然后做测试

In [122]:
train_gen, val_gen, test_gen, nclass = make_data_loader2(args)

train_dir = './data_list/train_lite.csv'
train_list = pd.read_csv(train_dir)
val_dir = './data_list/val_lite.csv'
val_list = pd.read_csv(val_dir)
train_length = len(train_list)
val_length = len(val_list)

In [123]:
model = DeepLab(num_classes=nclass,
                backbone=args.backbone,
                output_stride=args.out_stride,
                sync_bn=args.sync_bn,
                freeze_bn=args.freeze_bn)

In [124]:
model = model.cuda()

In [125]:
num_img_tr = train_length / args.batch_size
num_img_tr

595.75

In [131]:
for iteration in range(1):
    samples = next(train_gen)
    image, target = samples['image'], samples['label']
    image, target = image.cuda(), target.cuda()

在无初始化条件下调用模型

In [31]:
output = model(image)

In [35]:
output.shape[1:]

torch.Size([8, 384, 1024])

In [36]:
target.shape

torch.Size([4, 384, 1024])

加载预训练模型

In [127]:
model_path = './run/apollo/deeplab-mobilenet/model_best.pth (0.79v100).tar'

In [128]:
checkpoint = torch.load(model_path)

In [132]:
model.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [133]:
output_trained = model(image)

In [138]:
target.shape

torch.Size([4, 384, 1024])

In [137]:
np.array(output_trained.shape)

array([   4,    8,  384, 1024])

#### 测试各种loss

##### CrossEntropyLoss

This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.

In [119]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output

tensor(2.4874, grad_fn=<NllLossBackward>)

In [120]:
target.shape

torch.Size([3])

In [121]:
output.backward()
output

tensor(2.4874, grad_fn=<NllLossBackward>)

##### LogSoftmax

logsoftmax不改变尺寸，注意不同dim的含义，参照https://blog.csdn.net/sunyueqinghit/article/details/101113251
以及https://www.cnblogs.com/jeshy/p/10933882.html

In [105]:
m = nn.LogSoftmax(dim=0)
input = torch.randn(2, 3, 4)
input

tensor([[[ 0.4283,  1.5400, -0.8837, -0.5143],
         [-0.0508, -0.4953,  0.1950, -0.2600],
         [ 0.2361, -1.4963,  1.8959,  1.6305]],

        [[-0.8789, -0.0157, -1.1951, -0.2315],
         [ 0.3002, -1.2076, -0.1301,  0.2501],
         [-0.9789, -1.0403, -0.5214,  0.6075]]])

In [106]:
output = m(input)
output

tensor([[[-0.2395, -0.1915, -0.5495, -0.8445],
         [-0.8839, -0.3991, -0.5438, -0.9804],
         [-0.2598, -0.9469, -0.0854, -0.3071]],

        [[-1.5467, -1.7472, -0.8609, -0.5617],
         [-0.5330, -1.1114, -0.8688, -0.4703],
         [-1.4749, -0.4909, -2.5027, -1.3301]]])

In [107]:
m = nn.LogSoftmax(dim=1)
output = m(input)
output

tensor([[[-0.8939, -0.1644, -2.9984, -2.3823],
         [-1.3729, -2.1997, -1.9197, -2.1280],
         [-1.0860, -3.2007, -0.2188, -0.2375]],

        [[-1.6402, -0.5084, -1.7686, -1.5959],
         [-0.4611, -1.7003, -0.7035, -1.1143],
         [-1.7402, -1.5329, -1.0949, -0.7569]]])

In [108]:
m = nn.LogSoftmax()
output = m(input)
output

  


tensor([[[-0.2395, -0.1915, -0.5495, -0.8445],
         [-0.8839, -0.3991, -0.5438, -0.9804],
         [-0.2598, -0.9469, -0.0854, -0.3071]],

        [[-1.5467, -1.7472, -0.8609, -0.5617],
         [-0.5330, -1.1114, -0.8688, -0.4703],
         [-1.4749, -0.4909, -2.5027, -1.3301]]])

##### NLLLoss

In [None]:
m = nn.LogSoftmax(dim=1)
loss = nn.NLLLoss()
# input is of size N x C = 3 x 5
input = torch.randn(3, 5, requires_grad=True)
# each element in target has to have 0 <= value < C
target = torch.tensor([1, 0, 4])
output = loss(m(input), target)
output.backward()

# 2D loss example (used, for example, with image inputs)
N, C = 5, 4
loss = nn.NLLLoss()
# input is of size N x C x height x width
data = torch.randn(N, 16, 10, 10)
conv = nn.Conv2d(16, C, (3, 3))
m = nn.LogSoftmax(dim=1)
# each element in target has to have 0 <= value < C
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
output = loss(m(conv(data)), target)
output.backward()

#### Dice Loss(自己写的)

Dice loss的输入input和target要求同形状，CrossEntropy的input中，pred和target的形状分别为［4,8,384,1024],［4,384,1024］
所以需要对target做one_hot编码，将其形状变为[4,8,384,1024],在计算dice_loss

One compelling reason for using cross-entropy over dice-coefficient or the similar IoU metric is that the gradients are nicer.

The gradients of cross-entropy wrt the logits is something like 𝑝−𝑡, where 𝑝 is the softmax outputs and 𝑡 is the target. Meanwhile, if we try to write the dice coefficient in a differentiable form: 2𝑝𝑡𝑝2+𝑡2 or 2𝑝𝑡𝑝+𝑡, then the resulting gradients wrt 𝑝 are much uglier: 2𝑡(𝑡2−𝑝2)(𝑝2+𝑡2)2 and 2𝑡2(𝑝+𝑡)2. It's easy to imagine a case where both 𝑝 and 𝑡 are small, and the gradient blows up to some huge value. In general, it seems likely that training will become more unstable.

##### one_hot编码转换

一般用scatter生成one_hot向量,　测试如下

In [224]:
index = torch.tensor([[[0,0],[1,0]], [[0,2],[1,0]], [[0,1],[0,0]], [[3,0],[0,0]]])
index.shape

torch.Size([4, 2, 2])

In [225]:
index = index.unsqueeze(dim=1)
index

tensor([[[[0, 0],
          [1, 0]]],


        [[[0, 2],
          [1, 0]]],


        [[[0, 1],
          [0, 0]]],


        [[[3, 0],
          [0, 0]]]])

In [226]:
onehot = torch.zeros(4, 4, 2, 2)
onehot.scatter_(1, index, 1)
print(onehot)

tensor([[[[1., 1.],
          [0., 1.]],

         [[0., 0.],
          [1., 0.]],

         [[0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]]],


        [[[1., 0.],
          [0., 1.]],

         [[0., 0.],
          [1., 0.]],

         [[0., 1.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]]],


        [[[1., 0.],
          [1., 1.]],

         [[0., 1.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]]],


        [[[0., 1.],
          [1., 1.]],

         [[0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]],

         [[1., 0.],
          [0., 0.]]]])


In [227]:
print(onehot.sum(), index.sum())

tensor(16.) tensor(8)


In [217]:
target.shape

torch.Size([4, 384, 1024])

In [222]:
index = target.unsqueeze(dim=1)
onehot = torch.zeros(4,8,384,1024)
onehot.scatter_(1,index.cpu().long(),1)
onehot.sum()

tensor(1572864.)

In [223]:
4*384*1024

1572864

In [172]:
def make_one_hot(input, num_classes):
    """Convert class index tensor to one hot encoding tensor.
    Args:
         input: A tensor of shape [N, 1, *]
         num_classes: An int of number of class
    Returns:
        A tensor of shape [N, num_classes, *]
    """
    shape = np.array(input.shape)
    shape[1] = num_classes
    shape = tuple(shape)
    result = torch.zeros(shape)
    result = result.scatter_(1, input.cpu(), 1)

    return result

In [173]:
target.unsqueeze(dim=1).shape

torch.Size([4, 1, 384, 1024])

In [174]:
target_onehot = make_one_hot(target.long().unsqueeze(dim=1), 8)
target_onehot.shape

torch.Size([4, 8, 384, 1024])

In [175]:
target.sum()

tensor(81749., device='cuda:0')

In [228]:
target_onehot.sum()

tensor(1572864., device='cuda:0')

In [187]:
target_onehot = target_onehot.cuda()

In [185]:
def BinaryDiceLoss(logit, target, smooth=1, p=2, reduction='mean'):
    logit = logit.contiguous().view(logit.shape[0], -1)
    target = target.contiguous().view(target.shape[0], -1)
    num = 2*torch.sum(torch.mul(logit, target), dim=1) + smooth
    den = torch.sum(logit.pow(p) + target.pow(p), dim=1) + smooth

    loss = 1 - num / den

    return loss.mean()


def DiceLoss(logit, target):
    total_loss = 0
    logit = F.softmax(logit, dim=1)
    for i in range(logit.shape[1]):
        dice_loss = BinaryDiceLoss(logit[:, i], target[:, i])
        total_loss += dice_loss
    return total_loss/target.shape[1]

In [180]:
output_trained.sum()

tensor(53.6216, device='cuda:0', grad_fn=<SumBackward0>)

In [188]:
DiceLoss(output_trained, target_onehot)

tensor(0.4318, device='cuda:0', grad_fn=<DivBackward0>)