## Bonus:這裡示範如何將SSD改成類似RetinaNet的結構

In [0]:
from layers.box_utils import *
from layers import box_utils
from layers import Detect
from layers import functions
from layers import modules
from math import sqrt as sqrt
from itertools import product as product
from torch.autograd import Variable
from torch.autograd import Function
from layers.box_utils import decode, nms
import torch.nn as nn

import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.utils.data as data
import numpy as np
import argparse
import torchvision
import pickle
import torch.nn.functional as F


## 建構一個有FPN+ASPP的BackBone

In [0]:
class ASPP(nn.Module):
    """
    Atrous spatial pyramid pooling (ASPP)
    """

    def __init__(self, in_ch, out_ch, rates):
        super(ASPP, self).__init__()
        for i, rate in enumerate(rates):
            self.add_module(
                "c{}".format(i),
                nn.Conv2d(in_ch, out_ch, (1,3), 1, padding=(0,rate), dilation=(1,rate), bias=True),
            )

        for m in self.children():
            nn.init.normal_(m.weight, mean=0, std=0.01)
            nn.init.constant_(m.bias, 0)

    def forward(self, x):
        return sum([stage(x) for stage in self.children()])
    
class Block(nn.Module):
    '''expand + depthwise + pointwise'''
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=(1,5), stride=stride, padding=(0,2), groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride==1 else out
        return out


class SeparableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
        super(SeparableConv2d,self).__init__()

        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)

    def forward(self,x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x
    
    
class SSD_FPN(nn.Module):
    # (expansion, out_planes, num_blocks, stride)

    def __init__(self, phase,Company, num_classes=2,RGB=False):
        super(SSD_FPN, self).__init__()
        
    
        self.phase = phase
        self.RGB=RGB
        self.num_classes = num_classes
        self.cfg_prior =Company
        self.cfg=[(1,  32, 1, 1),
                  (4,  32, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
                  (4,  64, 3, 2),
                  (4,  64, 4, 1),
                  (4,  128, 3, 2),
                  (4,  128, 3, 1),
                  (4,  256, 1, 1)]
        self.priorbox = PriorBox(self.cfg_prior)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.depth=768
        # NOTE: change conv1 stride 2 -> 1 for CIFAR10
        if self.RGB:
            self.conv1 = nn.Conv2d(3, 32, kernel_size=(1,5), stride=1, padding=(0,2), bias=False,)
        else:
            self.conv1 = nn.Conv2d(1, 32, kernel_size=(1,5), stride=1, padding=(0,2), bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv1_1=nn.Conv2d(32, 32, kernel_size=(3,5), stride=1, padding=(1,2), bias=False)
        
        self.bn1_1=nn.BatchNorm2d(32)
        
        self.conv1_2=nn.Conv2d(32, 64, kernel_size=(3,5), stride=1, padding=(1,2), bias=False)
        
        self.bn1_2=nn.BatchNorm2d(64)
        
        self.conv1_3=nn.Conv2d(64, 64,  kernel_size=(1,3),dilation=(1,2) , padding=(0,2), stride=1, bias=False)
        
        self.bn1_3=nn.BatchNorm2d(64)
        
        self.conv1_4=nn.Conv2d(64, 64, kernel_size=1, dilation=1, stride=1, padding=0, bias=False)
        
        self.bn1_4=nn.BatchNorm2d(64)
        
        
        self.layers = self._make_layers(in_planes=64)
        
        self.ASPP=ASPP(256,256,[2, 4, 6,8])
        self.smooth_layer=nn.Conv2d(256,256, kernel_size=(1,5), stride=1, padding=(0,2)) 
        
        
        self.conv2 = nn.Conv2d(512, self.depth, kernel_size=1, stride=1, padding=0, bias=False,)
        self.bn2 = nn.BatchNorm2d(self.depth)
        
        

        
        self.seprable=SeparableConv2d(self.depth,self.depth,3,1,1,1,False)
        self.sep_bn_1= nn.BatchNorm2d(self.depth)
        
        #self.ASPP_128=ASPP(self.depth,self.depth,[2, 4, 6])
        
        self.conv3= nn.Conv2d(self.depth, self.depth, kernel_size=(1,5), stride=1, padding=(0,2)) 
        self.bn3=nn.BatchNorm2d(self.depth)
        
        self.seprable_2=SeparableConv2d(self.depth,self.depth,3,2,1,1,False)
        self.sep_bn_2=nn.BatchNorm2d(self.depth)
        
        self.conv6 = nn.Conv2d(self.depth, self.depth, kernel_size=(1,3),dilation=(1,2) , padding=(0,2))
        self.conv7 = nn.Conv2d( self.depth, self.depth, kernel_size=(1,5),stride=2, padding=(0,2))
        self.bn7=nn.BatchNorm2d(self.depth)

        # Top layer
        self.toplayer = nn.Conv2d(self.depth, self.depth, kernel_size=1, stride=1, padding=0)  # Reduce channels

        # Smooth layers
        self.smooth1 = nn.Conv2d(self.depth, self.depth, kernel_size=(1,5), stride=1, padding=(0,2))
        self.smooth2 = nn.Conv2d(self.depth, self.depth, kernel_size=(1,5), stride=1,  padding=(0,2))

        # Lateral layers
        self.latlayer1 = nn.Conv2d(self.depth, self.depth, kernel_size=1, stride=1, padding=0)
        self.latlayer2 = nn.Conv2d( self.depth, self.depth, kernel_size=1, stride=1, padding=0)
        
        
                # Add localization and confidence lists
        '''這裡的6跟我們預設的default boxes很有關係，要跟config檔案配合'''
        '''默認是2個正方形，然後自己再添加，Aspect Ratio可以用Kmeans計算Ground Truth的中心'''
        '''ex. aspect ratio 是 [6,12,16,21] 就會有 6 個 defeault box'''
        self.loc = nn.ModuleList([
            nn.Conv2d(self.depth, 6 * 4, kernel_size=3, padding=1),
            nn.Conv2d(self.depth, 6 * 4, kernel_size=3, padding=1),
            nn.Conv2d(self.depth, 6 * 4, kernel_size=3, padding=1),
            nn.Conv2d(self.depth, 6 * 4, kernel_size=3, padding=1),
            nn.Conv2d(self.depth, 4 * 4, kernel_size=3, padding=1),
        ])

        self.conf = nn.ModuleList([
            nn.Conv2d(self.depth, 6 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(self.depth, 6 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(self.depth, 6 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(self.depth, 6 * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(self.depth, 4 * num_classes, kernel_size=3, padding=1),
        ])

        if phase == 'test':
            self.softmax = nn.Softmax()
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
        
    def _upsample_add(self, x, y):
        _,_,H,W = y.size()
        return F.upsample(x, size=(H,W), mode='bilinear') + y
    
    def _make_layers(self, in_planes):
        layers = []
        for expansion, out_planes, num_blocks, stride in self.cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)
    
    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict...')
            self.load_state_dict(torch.load(base_file,
                                 map_location=lambda storage, loc: storage))
            print('Finished!')
        else:
            print('Sorry only .pth and .pkl files supported.')

    def forward(self, x):
        
        out = self.bn1(F.leaky_relu(self.conv1(x),inplace=True))
        out = self.bn1_1(F.leaky_relu(self.conv1_1(out),inplace=True))
        out = F.max_pool2d(self.bn1_2(F.leaky_relu(self.conv1_2(out),inplace=True)),kernel_size=2, stride=2)
        out = self.bn1_3(F.leaky_relu(self.conv1_3(out),inplace=True))
        out = self.bn1_4(F.leaky_relu(self.conv1_4(out),inplace=True))
        
        
        
        out = self.layers(out)
        out_0 =self.ASPP(out)
        
        out=self.smooth_layer(out)
        out=torch.cat((out,out_0),dim=1)
        
        out = self.bn2(F.leaky_relu(self.conv2(out),inplace=True))

        
        c3= self.sep_bn_1(F.leaky_relu(self.seprable(out))) ##第一層

        
        c4= F.max_pool2d(self.bn3(F.leaky_relu(self.conv3(c3))),kernel_size=2, stride=2)##第二層 
        
        c5= self.sep_bn_2(F.leaky_relu( self.seprable_2(c4))) ##第三層 
        
        p6 = self.conv6(c5) ##第四層
        
        p7 = self.conv7(self.bn7(F.leaky_relu(p6,inplace=True))) ##第五層 
        
        # Top-down
        p5 = self.toplayer(c5)
        p4 = self._upsample_add(p5, self.latlayer1(c4))
        p3 = self._upsample_add(p4, self.latlayer2(c3))
        # Smooth
        p4 = self.smooth1(p4)
        p3 = self.smooth2(p3)
        
        sources = list()
        loc = list()
        conf = list()

        sources.append(p3)
        sources.append(p4)
        sources.append(p5)
        sources.append(p6)
        sources.append(p7)

        # apply multibox head to source layers
        for (x, l, c) in zip(sources, self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)

        if self.phase == "test":
            output = self.detect(
                loc.view(loc.size(0), -1, 4),                   # loc preds
                self.softmax(conf.view(-1, self.num_classes)),  # conf preds
                self.priors                                     # default boxes
            )
        else:
            output = (
                loc.view(loc.size(0), -1, 4),
                conf.view(conf.size(0), -1, self.num_classes),
                self.priors
            )
        
        return  output
        # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10

In [0]:
cfg={
    'num_classes': 21,
    'lr_steps': (280000, 360000, 400000),
    'max_iter': 400000,
    'feature_maps': [ 64,32, 16, 16,8],
    'min_dim': 512,
    'steps': [ 8, 16, 32,32,64 ],
    'min_sizes': [51.2, 102.4, 220.16, 337.92, 455.68],
    'max_sizes': [102.4, 220.16, 337.92, 455.68, 573.44],   
    'aspect_ratios': [ [6,12,16,21],[6,12,16,21], [6,12,16,21], [6,12,16,21], [6,12]],
    'variance': [1, 1],
    'clip': True,
    'name': 'Company',}

### 'aspect_ratios' : 使用六張Feature Map，每一張上方有預設的anchor boxes，Boxes aspect ratio可以自己設定
### 'feature_maps' : 使用feature map大小為[64x64, 32x32, 16x16 ,16x16 , 8x8]
### 'min_sizes'、'max_sizes'可藉由下方算式算出，由作者自行設計
### 'steps' : Feature map回放回原本512*512的比例，如64要回放為512大概就是8倍
### 'variance' : Training 的一個trick，加速收斂，詳見：https://github.com/rykov8/ssd_keras/issues/53

---

## 'min_sizes'、'max_sizes' 計算

In [0]:
import math
## source:https://blog.csdn.net/gbyy42299/article/details/81235891
min_dim = 512   ## 维度
# conv4_3 ==> 38 x 38
# fc7 ==> 19 x 19
# conv6_2 ==> 10 x 10
# conv7_2 ==> 5 x 5
# conv8_2 ==> 3 x 3
# conv9_2 ==> 1 x 1
mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', ] ## prior_box來源層，可以更改。很多改進都是基於此處的調整。
# in percent %
min_ratio = 20 ## 這裡即是論文中所說的Smin的= 0.2，Smax的= 0.9的初始值，經過下面的運算即可得到min_sizes，max_sizes。
max_ratio = 90
step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))## 取一個間距步長，即在下面用於循環給比取值時起一個間距作用。可以用一個具體的數值代替，這裡等於17。
min_sizes = []  ## 經過以下運算得到min_sizes和max_sizes。
max_sizes = []
for ratio in range(min_ratio, max_ratio + 1, step):
    ## 從min_ratio至max_ratio + 1每隔步驟= 17取一個值賦值給比。注意範圍函數的作用。
    ## min_sizes.append（）函數即把括號內部每次得到的值依次給了min_sizes。
    min_sizes.append(min_dim * ratio / 100.)
    max_sizes.append(min_dim * (ratio + step) / 100.)
min_sizes = [min_dim * 10 / 100.] + min_sizes
max_sizes = [min_dim * 20 / 100.] + max_sizes

## steps: 這一步要仔細理解，即計算卷積層產生的prior_box距離原圖的步長，先驗框中心點的坐標會乘以step，
## 相當於從特徵映射位置映射回原圖位置，比如conv4_3輸出特徵圖大小為38 *38，而輸入的圖片為300* 300，
## 所以38 *8約等於300，所以映射步長為8.這是針對300* 300的訓練圖片。
steps = [8, 16, 32, 64, 100, 300]  
aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
 
print('min_sizes: ',min_sizes)
print('max_sizes: ',max_sizes)


min_sizes:  [51.2, 102.4, 220.16, 337.92, 455.68]
max_sizes:  [102.4, 220.16, 337.92, 455.68, 573.44]


---

## Default anchor boxes設計原理，看懂收穫很多
##### 可以理解 SSD原文中 8732個anchors是怎麼來的
##### 理解原本8732如何變成34048

In [0]:
class PriorBox(object):
    """Compute priorbox coordinates in center-offset form for each source
    feature map.
    """
    def __init__(self, cfg):
        super(PriorBox, self).__init__()
        self.image_size = cfg['min_dim']
        # number of priors for feature map location (either 4 or 6)
        self.num_priors = len(cfg['aspect_ratios'])
        self.variance = cfg['variance'] or [0.1]
        self.feature_maps = cfg['feature_maps']
        self.min_sizes = cfg['min_sizes']
        self.max_sizes = cfg['max_sizes']
        self.steps = cfg['steps']
        self.aspect_ratios = cfg['aspect_ratios']
        self.clip = cfg['clip']
        self.version = cfg['name']
        for v in self.variance:
            if v <= 0:
                raise ValueError('Variances must be greater than 0')

    def forward(self):
        mean = []
        '''依照Feature map大小找出所有的pixel 中心'''
        '''下方這兩個loop會找出W個x軸pixel對上W個y軸pixel，假如現在是在38x38的feature map上，就會有38x38個值'''
        '''ex. [0,1],[0,2]..[0,37] [1,1],[1,2]..[1,37]..........[37,37]'''
        for k, f in enumerate(self.feature_maps):
            for i, j in product(range(f), repeat=2):
                f_k = self.image_size / self.steps[k] ## 如self.steps==8，就是先將原圖size normalize(/300)後再乘上8
                # unit center x,y
                '''中心點'''
                cx = (j + 0.5) / f_k
                cy = (i + 0.5) / f_k

                # aspect_ratio: 1
                # rel size: min_size
                '''/self.image_size 就是在做normalization '''
                s_k = self.min_sizes[k]/self.image_size
                '''小的正方形box'''
                mean += [cx, cy, s_k, s_k]

                # aspect_ratio: 1
                # rel size: sqrt(s_k * s_(k+1))
                '''大的正方形box'''
                s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
                mean += [cx, cy, s_k_prime, s_k_prime]

                # rest of aspect ratios
                for ar in self.aspect_ratios[k]:
                    '''aspect ratio 2,3'''
                    mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
                    '''aspect ratio 1/2,1/3，這裡先關掉，主要是看我們有沒有需要垂直的BBOX'''
                    '''開啟的話多一個aspect ratio 會多兩個BBOX，所以像上方原本2+4=6個10個boxes 會變成2+4*2=10個boxes'''
                   # mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
        # back to torch land
        output = torch.Tensor(mean).view(-1, 4)
        if self.clip:
            output.clamp_(max=1, min=0)
        return output

In [0]:
PriorBox_Demo=PriorBox(cfg)

In [0]:
print(PriorBox_Demo.forward().shape)

torch.Size([34048, 4])


---

## Loss 如何設計-這裡加入新觀念Focal Loss

#### OHEM v.s Focal Loss 

![title](OHEM&Focal.png)

In [0]:
from layers.box_utils import match, log_sum_exp


class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=None, size_average=False):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha,(float,int)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input_, target):
        if input_.dim()>2:
            input_ = input_.view(input_.size(0),input_.size(1),-1)  # N,C,H,W => N,C,H*W
            input_ = input_.transpose(1,2)    # N,C,H*W => N,H*W,C
            input_ = input_.contiguous().view(-1,input_.size(2))   # N,H*W,C => N*H*W,C
        target = target.view(-1,1)

        logpt = F.log_softmax(input_,dim=-1)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type()!=input_.data.type():
                self.alpha = self.alpha.type_as(input_.data)
            at = self.alpha.gather(0,target.data.view(-1))
            logpt = logpt * Variable(at)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()



class MultiBoxLoss(nn.Module):

    def __init__(self, num_classes, overlap_thresh, prior_for_matching,
                 bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
                 use_gpu=True,Focal_loss=True):
        super(MultiBoxLoss, self).__init__()
        self.use_gpu = use_gpu
        self.num_classes = num_classes
        self.threshold = overlap_thresh
        self.background_label = bkg_label
        self.encode_target = encode_target
        self.use_prior_for_matching = prior_for_matching
        self.do_neg_mining = neg_mining
        self.negpos_ratio = neg_pos
        self.neg_overlap = neg_overlap
        self.variance = cfg['variance']
        self.Focal_loss=Focal_loss
        self.F=FocalLoss()
     

    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, priors = predictions
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0
        num_pos = pos.sum(dim=1, keepdim=True)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
        
        batch_conf = conf_data.view(-1, self.num_classes)
        # Compute max conf across batch for hard negative mining
        if self.do_neg_mining:
            loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

            # Hard Negative Mining
            loss_c = loss_c.view(num, -1)
            loss_c[pos] = 0
            _, loss_idx = loss_c.sort(1, descending=True)
            _, idx_rank = loss_idx.sort(1)
            num_pos = pos.long().sum(1, keepdim=True)
            num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
            neg = idx_rank < num_neg.expand_as(idx_rank)

            # Confidence Loss Including Positive and Negative Examples
            pos_idx = pos.unsqueeze(2).expand_as(conf_data)
            neg_idx = neg.unsqueeze(2).expand_as(conf_data)
            conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
            targets_weighted = conf_t[(pos+neg).gt(0)]
            loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=True)
        else:
            if self.Focal_loss:
                loss_c=self.F(batch_conf,conf_t.view(-1))
            else:
                loss_c = F.cross_entropy(batch_conf, conf_t.view(-1), size_average=True)
        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        #double轉成torch.float64
        N = num_pos.data.sum().double()
        loss_l = loss_l.double()
        loss_c = loss_c.double()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c

## 產生我們Loss function，注意這裡的class要包含背景

In [0]:
Use_cuda=False
Use_Focal_Loss=True
criterion = MultiBoxLoss(21, 0.5, True, 0, False, 3, 0.5,False, Use_cuda,Use_Focal_Loss)

----

## 基本設定

In [0]:
RetinaNet=SSD_FPN('train',cfg,21,True)
net = RetinaNet



In [0]:
'''要不要使用gpu'''
Use_cuda=False

'''tensor type會依照cpu或gpu有所不同'''
if torch.cuda.is_available():
    if args.cuda:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    if not args.cuda:
        print("WARNING: It looks like you have a CUDA device, but aren't " +
              "using CUDA.\nRun with --cuda for optimal training speed.")
        torch.set_default_tensor_type('torch.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

'''使用GPU時可以開啟DataParallel，但當Input是不定大小時，要關掉'''
if Use_cuda:
    net = torch.nn.DataParallel(ssd_net)
    cudnn.benchmark = True
'''使用GPU時模型要轉成cuda'''
if Use_cuda:
    net = net.cuda()
    
batch_size_=1
optimizer = optim.Adam(net.parameters(),lr=0.00001/batch_size_)

---

## 訓練

## 這裡我們先示範輸入的 image,Label格式，真正在訓練時，準備成一樣格式即可

In [0]:
'''輸入影像格式，假設batch size 為 4'''
image_in=torch.tensor(torch.rand(4,3,512,512),dtype=torch.float32)
'''Label格式，沒有固定長度，看圖像中有幾個label就有幾個'''
label_0=[[ 0.1804,  0.6076,  0.7701,  0.8485, 0.0000],
       [ 0.2250,  0.0000,  0.9238,  0.5641, 3.0000],
       [ 0.2250,  0.0000,  0.9238,  0.5641, 19.0000],
       [ 0.2950,  0.0000,  0.8238,  0.3641, 6.0000],]
label_1=[[ 0.1804,  0.6076,  0.7701,  0.8485, 13.0000],
       [ 0.2250,  0.0000,  0.9238,  0.5641, 11.0000],
       [ 0.2250,  0.0000,  0.9238,  0.5641, 7.0000],
       [ 0.2950,  0.0000,  0.8238,  0.3641, 5.0000],]
label_2=[[ 0.1804,  0.6076,  0.7701,  0.8485, 0.0000],
       [ 0.2250,  0.0000,  0.9238,  0.5641, 3.0000],
       [ 0.2250,  0.0000,  0.9238,  0.5641, 14.0000],
       [ 0.2950,  0.0000,  0.8238,  0.3641, 6.0000],]
label_3=[[ 0.1804,  0.6076,  0.7701,  0.8485, 0.0000],
       [ 0.2250,  0.0000,  0.9238,  0.5641, 3.0000],
       [ 0.2250,  0.0000,  0.9238,  0.5641, 19.0000],
       [ 0.2950,  0.0000,  0.8238,  0.3641, 6.0000],]

  


In [0]:
epochs=300
iteration=1000

In [0]:
for epoch in range(epochs):
    n=0
    loss_sum=[]
    loc_loss=[]
    conf_loss=[]
    for number__ in range(iteration) :
        '''要用Variable包裝tensor才能送入模型'''
        if Use_cuda:
            image_ = Variable(image_in.cuda())
            y = [Variable(torch.tensor(label_0).cuda(), volatile=True),Variable(torch.tensor(label_1).cuda(), 
                volatile=True),Variable(torch.tensor(label_2).cuda(), volatile=True),Variable(torch.tensor(label_3).cuda(), volatile=True)]      
        else:
            image_ = Variable(image_in)
            y = [Variable(torch.tensor(label_0), volatile=True),Variable(torch.tensor(label_1), 
                volatile=True),Variable(torch.tensor(label_2), volatile=True),Variable(torch.tensor(label_3), volatile=True)]

        '''Forward Pass'''
        out = net(image_)
        '''Regression Loss and Classification Loss'''
        loss_l,loss_c = criterion(out,y )
        '''可以嘗試給不同權重'''
        loss = 10*loss_l+ loss_c
        '''Backward'''
        loss.backward()

        loc_loss.append(loss_l.data.cpu().numpy())
        conf_loss.append(loss_c.data.cpu().numpy())
        loss_sum.append(loss.data.cpu().numpy())
        '''更新參數'''
        optimizer.step()
        '''清空Gradients'''
        optimizer.zero_grad()
        
        n+=1
        if n%1==0:
            print('BBOX Regression Loss: ', np.mean(loc_loss))
            print('Classification Loss: ', np.mean(conf_loss))
    '''儲存權重'''
    torch.save(ssd_net.state_dict(),'weights/Ｗeights.pth')

  
  from ipykernel import kernelapp as app
  "See the documentation of nn.Upsample for details.".format(mode))


BBOX Regression Loss:  0.1282352744986158
Classification Loss:  860.8162270642201
