# parse_model_config(path)分析yolov3.cfg网络搭建

In [2]:
import torch
import random
import numpy as np
import torch.nn as nn
def parse_model_config(path):
    """Parses the yolo-v3 layer configuration file and returns module definitions"""
    file = open(path, 'r')
    lines = file.read().split('\n')
    lines = [x for x in lines if x and not x.startswith('#')]
    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
    module_defs = []
    for line in lines:
        if line.startswith('['): # This marks the start of a new block
            module_defs.append({})
            module_defs[-1]['type'] = line[1:-1].rstrip()
            if module_defs[-1]['type'] == 'convolutional':
                module_defs[-1]['batch_normalize'] = 0
        else:
            key, value = line.split("=")
            value = value.strip()
            module_defs[-1][key.rstrip()] = value.strip()

    return module_defs
path=r'/py/jupyter-notebook/yolov3-master/cfg/yolov3.cfg'
print(parse_model_config(path)[0]['height'])    #列表嵌套字典
parse_model_config(path)


416


[{'angle': '0',
  'batch': '16',
  'burn_in': '1000',
  'channels': '3',
  'decay': '0.0005',
  'exposure': '1.5',
  'height': '416',
  'hue': '.1',
  'learning_rate': '0.001',
  'max_batches': '500200',
  'momentum': '0.9',
  'policy': 'steps',
  'saturation': '1.5',
  'scales': '.1,.1',
  'steps': '400000,450000',
  'subdivisions': '1',
  'type': 'net',
  'width': '416'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '32',
  'pad': '1',
  'size': '3',
  'stride': '1',
  'type': 'convolutional'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '64',
  'pad': '1',
  'size': '3',
  'stride': '2',
  'type': 'convolutional'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '32',
  'pad': '1',
  'size': '1',
  'stride': '1',
  'type': 'convolutional'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '64',
  'pad': '1',
  'size': '3',
  'stride': '1',
  'type': 'convolutional'},
 {'activation': 'linear', 'from': '-3', 'type':

# route层的分析

# shortcut层的分析

# anchors extraction（yolo layer）

In [4]:
# module_def['mask']='0,1,2'为例
anchor_idxs = [int(x) for x in '0,1,2'.split(',')] 
print('anchor_idxs:{}\n'.format(anchor_idxs))

# module_def['anchors']='10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326'
anchors = [float(x) for x in '10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326'.split(',')]
print('anchors:{}\n'.format(anchors))

#按照宽高组合成9个先验框anchors
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
print('anchors:{}\n'.format(anchors))

#三个yolo层，每个yolo预测一个尺度，每个尺度使用与尺度匹配的3个anchor作为先验框（如这里mask为012选取最小的三个anchor回归最大的特征图52*52，其他的依次如此）
anchors = [anchors[i] for i in anchor_idxs]
print('anchors:{}\n'.format(anchors))


anchor_idxs:[0, 1, 2]

anchors:[10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0]

anchors:[(10.0, 13.0), (16.0, 30.0), (33.0, 23.0), (30.0, 61.0), (62.0, 45.0), (59.0, 119.0), (116.0, 90.0), (156.0, 198.0), (373.0, 326.0)]

anchors:[(10.0, 13.0), (16.0, 30.0), (33.0, 23.0)]



# YOLOLayer()

In [None]:
import torch
'''
首先给特征图编号，三个尺寸相似，以13*13特征图8为例分析
anchors:[(10.0, 13.0), (16.0, 30.0), (33.0, 23.0)]
stride:32(grid cell size)
'''
class YOLOLayer(nn.Module):

    def __init__(self, anchors, nC, img_dim, anchor_idxs, cfg):
        super(YOLOLayer, self).__init__()

        anchors = [(a_w, a_h) for a_w, a_h in anchors]  # (pixels)
        nA = len(anchors)

        self.anchors = anchors
        self.nA = nA  # number of anchors (3)
        self.nC = nC  # number of classes (80)
        self.bbox_attrs = 5 + nC
        self.img_dim = img_dim  # from hyperparams in cfg file, NOT from parser

        if anchor_idxs[0] == (nA * 2):  # 6
            stride = 32
        elif anchor_idxs[0] == nA:  # 3
            stride = 16
        else:
            stride = 8

        if cfg.endswith('yolov3-tiny.cfg'):
            stride *= 2

        # Build anchor grids
        nG = int(self.img_dim / stride)  # number grid points,如nG=13为特征图尺寸
        self.grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).float()  '''torch.Size([1, 1, 13, 13])'''
        self.grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).float()
        self.scaled_anchors = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors])#stride=32降采样步长 
        self.anchor_w = self.scaled_anchors[:, 0:1].view((1, nA, 1, 1))  #nA=3，anchor数目
        self.anchor_h = self.scaled_anchors[:, 1:2].view((1, nA, 1, 1))
        '''
        anchor_w的shape：torch.Size([1, 3, 1, 1])，内容为：
        tensor([[[[0.3125]],
                 [[0.5000]],
                 [[1.0312]]]])
        按照降采样步长（原图每个grid cell的尺寸），自己乘着试试看，如0.3125*32=10（第一个anchor的w）
        (atten)可见，anchor是经过降采样步长缩放的，而不是416图片尺寸
        '''
        self.weights = class_weights()                                  #统计了coco的80类gt出现的归一化比重作为权值

        self.loss_means = torch.ones(6)                                 #6维行向量，用1初始化
        self.tx, self.ty, self.tw, self.th = [], [], [], []             
        self.yolo_layer = anchor_idxs[0] / nA                           # 2, 1, 0

    def forward(self, p, targets=None, batch_report=False, var=None):
        FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor
        '''p为torch.Size([1, 255, 13, 13])，1是batch_size'''
        bs = p.shape[0]  '''batch size'''
        nG = p.shape[2]  # number of grid points

        if p.is_cuda and not self.grid_x.is_cuda:   #将所有参数都放到cuda上
            self.grid_x, self.grid_y = self.grid_x.cuda(), self.grid_y.cuda()
            self.anchor_w, self.anchor_h = self.anchor_w.cuda(), self.anchor_h.cuda()
            self.weights, self.loss_means = self.weights.cuda(), self.loss_means.cuda()

    
        p = p.view(bs, self.nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()  # prediction
        '''
        把255的列向量信息，加一个维度分开成3个box
        p的shape为：torch.Size([1, 255, 13, 13])-->torch.Size([1, 3, 13, 13, 85]) ，(bs, anchors, grid, grid, xywh+conf+class)
        注意，当前YOLO层分配了3个anchor,每个grid cell按照anchor尺寸预测三个box，上面是以13*13尺寸为例的
        '''
        
        # Training
        if targets is not None:
            MSELoss = nn.MSELoss()                      #实例化均方损失计算器，后面调用这个变量计算损失
            BCEWithLogitsLoss = nn.BCEWithLogitsLoss()  #交叉熵+sigmoid
            CrossEntropyLoss = nn.CrossEntropyLoss()    #多分类交叉熵

            # Get outputs
            #训练前向传入这里的x,y用sigmoid归一化（这里的xy是相对于当前位置的，转化为真实xy还需加上该grid cell的位置）
            x = torch.sigmoid(p[..., 0])  
            y = torch.sigmoid(p[..., 1])  
            p_conf = p[..., 4]            
            p_cls = p[..., 5:]            
            '''
            为什么是这个尺寸？13*13个cell，每个cell有3个anchor回归box，所有=以有3*13*13套坐标
            Center x,y：torch.Size([1, 3, 13, 13])   Conf：torch.Size([1, 3, 13, 13])
            Class：torch.Size([1, 3, 13, 13, 80])
            '''

            # Width and height (yolo method)
            w = p[..., 2] 
            h = p[..., 3] 
            width = torch.exp(w.data) * self.anchor_w   '''计算传播注意调用data方法提取张量（除data里面还有grad_fn，用于反向传播的梯度）'''
            height = torch.exp(h.data) * self.anchor_h
            '''
            w,h: torch.Size([1, 3, 13, 13]) 理由同上
            width，height： 计算的是预测box的真实wh（和论文说的一样，取e指数，乘anchor）
            '''

            # Width and height (power method)
            # w = torch.sigmoid(p[..., 2])  # Width
            # h = torch.sigmoid(p[..., 3])  # Height
            # width = ((w.data * 2) ** 2) * self.anchor_w
            # height = ((h.data * 2) ** 2) * self.anchor_h

            p_boxes = None
            if batch_report:    #计算mAP。
                # Predicted boxes: add offset and scale with anchors (in grid space, i.e. 0-13)
                gx = x.data + self.grid_x[:, :, :nG, :nG]
                gy = y.data + self.grid_y[:, :, :nG, :nG]
                p_boxes = torch.stack((gx - width / 2,
                                       gy - height / 2,
                                       gx + width / 2,
                                       gy + height / 2), 4)  # x1y1x2y2
                '''
                （1）x.data: torch.Size([1, 3, 13, 13])      self.grid_x[:, :, :nG, :nG]: torch.Size([1, 1, 13, 13])
                    gx: torch.Size([1, 3, 13, 13])
                    (atten)numpy和tensor中，如果数组的维度不一样，则默认在缺省的维度上全部相加，（列表不行）如：
                        a=np.array(([1,1],[2,2]))；b=np.array(([1,1]))；a+b=array([[2, 2],[3, 3]])
                    那么这里的意思是将grid point偏移加在预测的box坐标上。具体怎么加的？ 
                （2）grid_y[:, :, :nG, :nG]在后两维是13*13的矩阵，每维都是0-12,所以是将每个grid cell的位置编号加上去了，正好就是特征图的偏移
                '''

            tx, ty, tw, th, mask, tcls, TP, FP, FN, TC = \
                build_targets(p_boxes, p_conf, p_cls, targets, self.scaled_anchors, self.nA, self.nC, nG, batch_report)

            tcls = tcls[mask]
            if x.is_cuda:
                tx, ty, tw, th, mask, tcls = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda(), mask.cuda(), tcls.cuda()

            # Compute losses
            nT = sum([len(x) for x in targets])  # number of targets 总的gt的数目
            nM = mask.sum().float()  # number of anchors (assigned to targets)
            nB = len(targets)  # batch size
            k = nM / nB
            if nM > 0:
                lx = k * MSELoss(x[mask], tx[mask])
                ly = k * MSELoss(y[mask], ty[mask])
                lw = k * MSELoss(w[mask], tw[mask])
                lh = k * MSELoss(h[mask], th[mask])

                # self.tx.extend(tx[mask].data.numpy())
                # self.ty.extend(ty[mask].data.numpy())
                # self.tw.extend(tw[mask].data.numpy())
                # self.th.extend(th[mask].data.numpy())
                # print([np.mean(self.tx), np.std(self.tx)],[np.mean(self.ty), np.std(self.ty)],[np.mean(self.tw), np.std(self.tw)],[np.mean(self.th), np.std(self.th)])
                # [0.5040668, 0.2885492] [0.51384246, 0.28328574] [-0.4754091, 0.57951087] [-0.25998235, 0.44858757]
                # [0.50184494, 0.2858976] [0.51747805, 0.2896323] [0.12962963, 0.6263085] [-0.2722081, 0.61574113]
                # [0.5032071, 0.28825334] [0.5063132, 0.2808862] [0.21124361, 0.44760725] [0.35445485, 0.6427766]
                # import matplotlib.pyplot as plt
                # plt.hist(self.x)

                # lconf = k * BCEWithLogitsLoss(p_conf[mask], mask[mask].float())

                lcls = (k / 4) * CrossEntropyLoss(p_cls[mask], torch.argmax(tcls, 1))
                # lcls = (k * 10) * BCEWithLogitsLoss(p_cls[mask], tcls.float())
            else:
                lx, ly, lw, lh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0])

            # lconf += k * BCEWithLogitsLoss(p_conf[~mask], mask[~mask].float())
            lconf = (k * 64) * BCEWithLogitsLoss(p_conf, mask.float())

            # Sum loss components
            balance_losses_flag = False
            if balance_losses_flag:
                k = 1 / self.loss_means.clone()
                loss = (lx * k[0] + ly * k[1] + lw * k[2] + lh * k[3] + lconf * k[4] + lcls * k[5]) / k.mean()

                self.loss_means = self.loss_means * 0.99 + \
                                  FT([lx.data, ly.data, lw.data, lh.data, lconf.data, lcls.data]) * 0.01
            else:
                loss = lx + ly + lw + lh + lconf + lcls

            # Sum False Positives from unassigned anchors
            FPe = torch.zeros(self.nC)
            if batch_report:
                i = torch.sigmoid(p_conf[~mask]) > 0.5
                if i.sum() > 0:
                    FP_classes = torch.argmax(p_cls[~mask][i], 1)
                    FPe = torch.bincount(FP_classes, minlength=self.nC).float().cpu()  # extra FPs

            return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), \
                   nT, TP, FP, FPe, FN, TC

        else:
            stride = self.img_dim / nG
            p[..., 0] = torch.sigmoid(p[..., 0]) + self.grid_x  # x
            p[..., 1] = torch.sigmoid(p[..., 1]) + self.grid_y  # y
            p[..., 2] = torch.exp(p[..., 2]) * self.anchor_w  # width
            p[..., 3] = torch.exp(p[..., 3]) * self.anchor_h  # height
            p[..., 4] = torch.sigmoid(p[..., 4])  # p_conf
            p[..., :4] *= stride

            # reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
            return p.view(bs, self.nA * nG * nG, 5 + self.nC)


# Darknet的前向传播解读（重要）

In [8]:
  #每层加个打印就行了
def forward(self, x, targets=None, batch_report=False, var=0):
    self.losses = defaultdict(float)
    is_training = targets is not None
    layer_outputs = []
    output = []

    for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
        if module_def['type'] in ['convolutional', 'upsample', 'maxpool']:
            #print(x.shape)

            x = module(x)
            print('conv/up: ',x.shape) 
        elif module_def['type'] == 'route':
            layer_i = [int(x) for x in module_def['layers'].split(',')]
            x = torch.cat([layer_outputs[i] for i in layer_i], 1)
            print('route  : ',x.shape)      
        elif module_def['type'] == 'shortcut':
            layer_i = int(module_def['from'])
            x = layer_outputs[-1] + layer_outputs[layer_i]
            print('shortct: ',x.shape)    
        elif module_def['type'] == 'yolo':
            # Train phase: get loss
            if is_training:
                x, *losses = module[0](x, targets, batch_report, var)
                for name, loss in zip(self.loss_names, losses):
                    self.losses[name] += loss
            # Test phase: Get detections
            else:
                x = module(x)
                print('yolodec: ',x.shape)
            output.append(x)
        layer_outputs.append(x)
    
'''           
每层的输出：
torch.Size([1, 3, 416, 416])    #输入层原图（3*416*416），扩展了一个batch维度 
#下面有107层输出
conv/up:  torch.Size([1, 32, 416, 416])   #第一层卷积核32个，padding保持尺寸，输出的通道32
conv/up:  torch.Size([1, 64, 208, 208])   #开始1/2降采样，strid=2,padding，64卷积核，此处输出通道64
conv/up:  torch.Size([1, 32, 208, 208])   #1*1卷积核融合通道信息，32卷积核，channel=32
conv/up:  torch.Size([1, 64, 208, 208])   #进行3*3+padding的same卷积通道延伸为64便于后面的残差连接
shortct:  torch.Size([1, 64, 208, 208])   #shortcut连接，将其上第一行和第三行相加
conv/up:  torch.Size([1, 128, 104, 104])  #略，都一样
conv/up:  torch.Size([1, 64, 104, 104])
conv/up:  torch.Size([1, 128, 104, 104])
shortct:  torch.Size([1, 128, 104, 104])
conv/up:  torch.Size([1, 64, 104, 104])
conv/up:  torch.Size([1, 128, 104, 104])
shortct:  torch.Size([1, 128, 104, 104])
conv/up:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
shortct:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
shortct:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
shortct:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
shortct:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
shortct:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
shortct:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
shortct:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
shortct:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
shortct:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
shortct:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
shortct:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
shortct:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
shortct:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
shortct:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
shortct:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
shortct:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 1024, 13, 13])
conv/up:  torch.Size([1, 512, 13, 13])
conv/up:  torch.Size([1, 1024, 13, 13])
shortct:  torch.Size([1, 1024, 13, 13])
conv/up:  torch.Size([1, 512, 13, 13])
conv/up:  torch.Size([1, 1024, 13, 13])
shortct:  torch.Size([1, 1024, 13, 13])
conv/up:  torch.Size([1, 512, 13, 13])
conv/up:  torch.Size([1, 1024, 13, 13])
shortct:  torch.Size([1, 1024, 13, 13])
conv/up:  torch.Size([1, 512, 13, 13])
conv/up:  torch.Size([1, 1024, 13, 13])
shortct:  torch.Size([1, 1024, 13, 13])
conv/up:  torch.Size([1, 512, 13, 13])
conv/up:  torch.Size([1, 1024, 13, 13])
conv/up:  torch.Size([1, 512, 13, 13])
conv/up:  torch.Size([1, 1024, 13, 13])
conv/up:  torch.Size([1, 512, 13, 13])
conv/up:  torch.Size([1, 1024, 13, 13])
conv/up:  torch.Size([1, 255, 13, 13])   #最大尺度的三个box信息得出（coco:3*（80+4+1）=255），可输出到检测YOLO层
yolodec:  torch.Size([1, 507, 85])      #预测了507=13*13*3个box
route  :  torch.Size([1, 512, 13, 13])  #理解了！单个route层存在的意义，为了便于使用序贯模型，见最上面route层的分析
conv/up:  torch.Size([1, 256, 13, 13])
conv/up:  torch.Size([1, 256, 26, 26])  #上采样层从13*13扩展 到26*26
route  :  torch.Size([1, 768, 26, 26])  #26*26尺度的concat融合
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 512, 26, 26])
conv/up:  torch.Size([1, 255, 26, 26])  #中等尺度的输出，后接YOLO层检测
yolodec:  torch.Size([1, 2028, 85])     #预测了2028=26*26*3个box
route  :  torch.Size([1, 256, 26, 26])
conv/up:  torch.Size([1, 128, 26, 26])  
conv/up:  torch.Size([1, 128, 52, 52])  #对26*26上采样得到52*52特征图
route  :  torch.Size([1, 384, 52, 52])  #concat融合之前的（52*52*256）得到52*52尺度的特征图
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 128, 52, 52])
conv/up:  torch.Size([1, 256, 52, 52])
conv/up:  torch.Size([1, 255, 52, 52])  #最小尺度特征图
yolodec:  torch.Size([1, 8112, 85])     #8112=52*52*3个box
'''



