In [89]:
import torch
import torch.nn as nn

In [6]:
def parse_config(cfgfile):
    f = open(cfgfile,'r')
    lines = f.read().split('\n')
    lines = [line for line in lines if len(line)>0]
    lines = [line for line in lines if line[0] != '#']
    lines = [line.rstrip().lstrip() for line in lines]
    blocks = []
    block = {}
    for line in lines:
        if line[0] == '[':
            if len(block) != 0:
                blocks.append(block)
                block = {}
            block['type'] = line[1:-1]
        else:
            left = line.split('=')[0].rstrip() 
            right = line.split('=')[1].lstrip()
            block[left] = right
    blocks.append(block)
                
    return blocks    

In [7]:
cfgfile = 'yolov3.cfg'
blocks = parse_config(cfgfile)

In [12]:
for i in range(20):
    print(len(lines[i]))

5
9
8
15
10
8
14
9
10
10
12
12
7
16
14
6
0
19
12
20


In [17]:
print(len(lines))

658


In [27]:
for i in range(20):
    print(len(lines[i]))

5
9
8
15
10
8
14
9
10
10
12
12
7
16
14
6
19
12
20
12


In [30]:
print(len(lines))

648


In [32]:
for i in range(20):
    print(lines[i])

[net]
batch=16
subdivisions=1
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1


In [8]:
print(len(blocks))

108


<img src="./long.png">

<img src="./index.png">

In [9]:
print(blocks[0])

{'type': 'net', 'batch': '16', 'subdivisions': '1', 'width': '416', 'height': '416', 'channels': '3', 'momentum': '0.9', 'decay': '0.0005', 'angle': '0', 'saturation': '1.5', 'exposure': '1.5', 'hue': '.1', 'learning_rate': '0.001', 'burn_in': '1000', 'max_batches': '500200', 'policy': 'steps', 'steps': '400000,450000', 'scales': '.1,.1'}


In [10]:
print(blocks[1])

{'type': 'convolutional', 'batch_normalize': '1', 'filters': '32', 'size': '3', 'stride': '1', 'pad': '1', 'activation': 'leaky'}


In [11]:
print(blocks[2])

{'type': 'convolutional', 'batch_normalize': '1', 'filters': '64', 'size': '3', 'stride': '2', 'pad': '1', 'activation': 'leaky'}


In [13]:
print(blocks[3])

{'type': 'convolutional', 'batch_normalize': '1', 'filters': '32', 'size': '1', 'stride': '1', 'pad': '1', 'activation': 'leaky'}


[net]
#Testing
#batch=1
#subdivisions=1
#Training
batch=16
subdivisions=1
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky

In [14]:
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np

{'type': 'convolutional', 'batch_normalize': '1', 'filters': '32', 'size': '3', 'stride': '1', 'pad': '1', 'activation': 'leaky'}
conv层有75个
batchnorm层有72个
需要加以判断
可以发现yolo层之前的卷积层不含皮归一化层


另外：108层的构成是
        net_info:      1
        convlutional： 75
        yolo：         3
        shortcut:      23
        route:         4
        upsamble:      2

<img src="./yolov3.jpg"> 

In [35]:
class EmptyLayer(nn.Module):
    #空层用于占位
    def __init__(self):
        super(EmptyLayer, self).__init__()

In [65]:
class DetectionLayer(nn.Module):
    #将检测抽象化为一个层
    def __init__(self, anchors):
        super(DetectionLayer, self).__init__()
        self.anchors = anchors

[yolo]
mask = 6,7,8
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1

In [87]:
def create_net(blocks):
    
    net_info = blocks[0]
    prev_filters = 3
    output_filters = []
    net_list = nn.ModuleList()
    
    for index,layer in enumerate(blocks[1:]):
        onelayer = nn.Sequential()
        if layer['type'] == 'convolutional':
            try:
                batchnorm = int(layer['batch_normalize'])
                bias = True
            except:
                batchnorm = 0
            bias = False
            out_channels = int(layer['filters'])
            kernel_size = int(layer['size'])
            stride = int(layer['stride'])
            pad = int(layer['pad'])
            activation = layer['activation']
            conv = nn.Conv2d(in_channels = prev_filters,
                             out_channels = out_channels,
                             kernel_size = kernel_size,
                             stride = stride,
                             padding = pad,
                             bias = bias)
            onelayer.add_module("conv_{0}".format(index), conv)
            if batchnorm:
                bn = nn.BatchNorm2d(out_channels)
                onelayer.add_module("bn_{0}".format(index),bn)
            if activation == 'leaky':
                activaten = nn.LeakyReLU(0.1)
                onelayer.add_module("activate_{0}".format(index),activaten)
        
        elif layer['type'] == 'shortcut':
            shortcut = EmptyLayer()
            onelayer.add_module("shortcut_{}".format(index), shortcut)

            
        elif layer['type'] == 'route':
            route = EmptyLayer()
            two = layer['layers'].split(',')
            try:
                index1 = int(two[1])
                index2 = int(two[0])
                
            except:
                index1 = 0
                index2 = int(two[0])
            
            if index1 == 0:
                out_channels = output_filters[index + index2]
            else:
                if index1 < 0:
                    out_channels = output_filters[index + index1] + output_filters[index2]
                else:
                    out_channels = output_filters[index + index2] + output_filters[index1]
            onelayer.add_module("route_{0}".format(index),route)    
            
        elif layer['type'] == 'upsamble':
            stride = int(layer['stride'])
            upsam = nn.Upsample(scale_factor = stride, mode = "bilinear")
            onelayer.add_module("upsamble_{0}".format(index),upsam)
            
        elif layer['type'] == 'yolo':
            mask = layer['mask'].split(',')
            mask = [int(i) for i in mask]
            archors = layer['anchors'].split('  ')
            archor = [archors[i] for i in mask]
            archor = [(int(i.split(',')[0]),int(i.split(',')[1])) for i in archor]
            yolo = DetectionLayer(archor)
            onelayer.add_module("yolo_{0}".format(index),yolo)
            
        net_list.append(onelayer)
        prev_filters = out_channels
        output_filters.append(out_channels)
        
    return net_list
                    
                    
                
            

In [86]:
out = create_net(blocks)

[(116, 90), (156, 198), (373, 326)]
[(30, 61), (62, 45), (59, 119)]
[(10, 13), (16, 30), (33, 23)]


In [63]:
for index,i in enumerate(out):
    print(index,i)

0 32
1 64
2 32
3 64
4 64
5 128
6 64
7 128
8 128
9 64
10 128
11 128
12 256
13 128
14 256
15 256
16 128
17 256
18 256
19 128
20 256
21 256
22 128
23 256
24 256
25 128
26 256
27 256
28 128
29 256
30 256
31 128
32 256
33 256
34 128
35 256
36 256
37 512
38 256
39 512
40 512
41 256
42 512
43 512
44 256
45 512
46 512
47 256
48 512
49 512
50 256
51 512
52 512
53 256
54 512
55 512
56 256
57 512
58 512
59 256
60 512
61 512
62 1024
63 512
64 1024
65 1024
66 512
67 1024
68 1024
69 512
70 1024
71 1024
72 512
73 1024
74 1024
75 512
76 1024
77 512
78 1024
79 512
80 1024
81 255
82 255
83 512
84 256
85 256
86 768
87 256
88 512
89 256
90 512
91 256
92 512
93 255
94 255
95 256
96 128
97 128
98 384
99 128
100 256
101 128
102 256
103 128
104 256
105 255
106 255


In [88]:
out

ModuleList(
  (0): Sequential(
    (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activate_0): LeakyReLU(negative_slope=0.1)
  )
  (1): Sequential(
    (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activate_1): LeakyReLU(negative_slope=0.1)
  )
  (2): Sequential(
    (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1), bias=False)
    (bn_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activate_2): LeakyReLU(negative_slope=0.1)
  )
  (3): Sequential(
    (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activate_3): Lea

nn.MSELoss() 
nn.BCELoss()

In [None]:
class YOLOlayer(nn.Module):
    
    def __init__(self, anchors):
        super(YOLOlayer, self).__init__()
        self.anchors = anchors
        self.anchor_num = len(anchors)
        self.img_dim = 416
        self.classes = 80
        self.box_attrs = self.classes + 5
        self.ignore_thred = 0.5
        self.lambda = 1
        
        self.mse_loss = nn.MSELoss()
        self.bce_loss = nn.BCELoss()
        
    def forward(self,x,target = None):
        #共有三层yolo层 13*13 26*26 52*52 
        #假定此处为16*255*13*13
        batch_size = x.size(0)
        fmap_size = x.size(2)
        stride = self.img_dim / fmap_size
        #类型占位
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        #[16,255,13,13]-->[16,3,85,13,13]-->[16,3,13,13,85]
        predict = x.view(batch_size,self.anchor_num,self.box_attrs,fmap_size.fmap_size).permute(0,1,3,4,2).contiguous()
        
        #yolo层对输入进行映射变化
        x = torch.sigmoid(predic[..., 0])        #[16,3,13,13]  
        y = torch.sigmoid(predic[..., 1])       
        w = predic[..., 2]                         
        h = predic[..., 3]                         
        conf = torch.sigmoid(predic[..., 4])       
        class_conf = torch.sigmoid(predict[..., 5:]) 
        
        #13*13的网格
        
        grid_x = torch.linspace(0,fmap_size-1,fmap_size).repeat(fmap_size,1).repeat(self.anchor_num*batch_size,1,1).view(x.shape).type(FloatTensor)
        grid_y = torch.linspace(0,fmap_size-1,fmap_size).repeat(fmap_size,1).t().repeat(self.anchor_num*batch_size,1,1).view(y.shape).type(FloatTensor)
        anchors_scaled = [(anchor_x/stride,anchor_y/stride) for anchor_x,anchor_y in self.anchors]
        
        #取锚框坐标
        anchor_x = FloatTensor(anchors_scaled).index_select(1,LongTensor([0]))#[3,1]
        anchor_y = FloatTensor(anchors_scaled).index_select(1,LongTensor([1]))#[3,1]
        #锚框转换到了 [16,3,13,13]
        anchor_x = anchor_x.repeat(batch_size, 1).repeat(1, 1, fmap_size*fmap_size).view(w.shape)
        anchor_y = anchor_y.repeat(batch_size, 1).repeat(1, 1, fmap_size*fmap_size).view(w.shape)
        #存放预测结果的地方，尺寸为[16,3,13,13,4]
        predict_boxes = FloatTensor(predic[..., :4].shape)
        predict_boxes[...,0] = 
        predict_boxes[...,1] = 
        predict_boxes[...,2] =
        predict_boxes[...,3] =
        
        
        
        
        
        


        
        

In [111]:
anch = [(0,1),(0,2),(0,3)]

In [127]:
anch_y = FloatTensor(anch).index_select(1,LongTensor([1]))
anch_y.reapeat()

tensor([[ 1.],
        [ 2.],
        [ 3.]])

In [115]:
FloatTensor = torch.FloatTensor
LongTensor = torch.LongTensor

In [109]:
from __future__ import division

In [110]:
3/5

0.6

<img src="./box.png">

In [None]:
class Darknet(nn.Module):