In [1]:
import torch.nn.functional as F

from utils.google_utils import *
from utils.parse_config import *
from utils.utils import *

ONNX_EXPORT = False

In [88]:
from models import *

In [8]:
path = 'cfg/fashion.cfg'
img_size=(416, 416)
arc='default'

# parse_model_cfg(path) in utils.parse_config

In [2]:
import numpy as np

# Parses the yolo-v3 layer configuration file and returns module definitions
file = open(path, 'r')
lines = file.read().split('\n')

In [3]:
lines

['[net]',
 '# Testing',
 '#batch=1',
 '#subdivisions=1',
 '# Training',
 'batch=64',
 'subdivisions=4',
 'width=416',
 'height=416',
 'channels=3',
 'momentum=0.9',
 'decay=5e-5',
 'angle=0',
 'saturation = 1.5',
 'exposure = 1.5',
 'hue=.1',
 '',
 'learning_rate=1e-4',
 'burn_in=1000',
 'max_batches = 500200',
 'policy=steps',
 'steps=400000,450000',
 'scales=.1,.1',
 '',
 '[convolutional]',
 'batch_normalize=1',
 'filters=32',
 'size=3',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '',
 '# Downsample',
 '',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=3',
 'stride=2',
 'pad=1',
 'activation=leaky',
 '',
 '[convolutional]',
 'batch_normalize=1',
 'filters=32',
 'size=1',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=3',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '',
 '[shortcut]',
 'from=-3',
 'activation=linear',
 '',
 '# Downsample',
 '',
 '[convolutional]',
 'batch_normalize=1',
 'filters=128',


In [4]:
lines = [x for x in lines if x and not x.startswith('#')] # 주석처리가 안된 부분의 line 가져옴
lines

['[net]',
 'batch=64',
 'subdivisions=4',
 'width=416',
 'height=416',
 'channels=3',
 'momentum=0.9',
 'decay=5e-5',
 'angle=0',
 'saturation = 1.5',
 'exposure = 1.5',
 'hue=.1',
 'learning_rate=1e-4',
 'burn_in=1000',
 'max_batches = 500200',
 'policy=steps',
 'steps=400000,450000',
 'scales=.1,.1',
 '[convolutional]',
 'batch_normalize=1',
 'filters=32',
 'size=3',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=3',
 'stride=2',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=32',
 'size=1',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=3',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[shortcut]',
 'from=-3',
 'activation=linear',
 '[convolutional]',
 'batch_normalize=1',
 'filters=128',
 'size=3',
 'stride=2',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=1',
 'stride=1',
 'pad

In [5]:
lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces , 공백 제거
lines

['[net]',
 'batch=64',
 'subdivisions=4',
 'width=416',
 'height=416',
 'channels=3',
 'momentum=0.9',
 'decay=5e-5',
 'angle=0',
 'saturation = 1.5',
 'exposure = 1.5',
 'hue=.1',
 'learning_rate=1e-4',
 'burn_in=1000',
 'max_batches = 500200',
 'policy=steps',
 'steps=400000,450000',
 'scales=.1,.1',
 '[convolutional]',
 'batch_normalize=1',
 'filters=32',
 'size=3',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=3',
 'stride=2',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=32',
 'size=1',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=3',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[shortcut]',
 'from=-3',
 'activation=linear',
 '[convolutional]',
 'batch_normalize=1',
 'filters=128',
 'size=3',
 'stride=2',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=1',
 'stride=1',
 'pad

In [22]:
mdefs = []  # module definitions
for line in lines:
    if line.startswith('['):  # This marks the start of a new block
        mdefs.append({})
        mdefs[-1]['type'] = line[1:-1].rstrip()
        if mdefs[-1]['type'] == 'convolutional':
            mdefs[-1]['batch_normalize'] = 0  # pre-populate with zeros (may be overwritten later)
    else:
        key, val = line.split("=")
        key = key.rstrip()

        if 'anchors' in key:
            mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2))  # np anchors
        else:
            mdefs[-1][key] = val.strip()

In [23]:
mdefs

[{'type': 'net',
  'batch': '64',
  'subdivisions': '4',
  'width': '416',
  'height': '416',
  'channels': '3',
  'momentum': '0.9',
  'decay': '5e-5',
  'angle': '0',
  'saturation': '1.5',
  'exposure': '1.5',
  'hue': '.1',
  'learning_rate': '1e-4',
  'burn_in': '1000',
  'max_batches': '500200',
  'policy': 'steps',
  'steps': '400000,450000',
  'scales': '.1,.1'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '2',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '1',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'shortcut', 'from': '-3', 'activation': 

## return mdefs ...

## YOLOLayer

In [77]:
class YOLOLayer(nn.Module):
    def __init__(self, anchors, nc, img_size, yolo_index, arc):
        super(YOLOLayer, self).__init__()

        self.anchors = torch.Tensor(anchors)
        self.na = len(anchors)  # number of anchors (3)
        self.nc = nc  # number of classes (80)
        self.nx = 0  # initialize number of x gridpoints
        self.ny = 0  # initialize number of y gridpoints
        self.arc = arc

    def forward(self, p, img_size, var=None):
        bs, ny, nx = p.shape[0], p.shape[-2], p.shape[-1]
        if (self.nx, self.ny) != (nx, ny):
            create_grids(self, img_size, (nx, ny), p.device, p.dtype)

        # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85)  # (bs, anchors, grid, grid, classes + xywh)
        p = p.view(bs, self.na, self.nc + 5, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous()  # prediction
        # Contiguous() -> Non-contiguous tensor to conti
        if self.training:
            return p

        else:  # inference
            # s = 1.5  # scale_xy  (pxy = pxy * s - (s - 1) / 2)
            io = p.clone()  # inference output
            io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy  # xy
            io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh  # wh yolo method
            # io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh  # wh power method
            io[..., :4] *= self.stride

            if 'default' in self.arc:  # seperate obj and cls
                torch.sigmoid_(io[..., 4:])
            elif 'BCE' in self.arc:  # unified BCE (80 classes)
                torch.sigmoid_(io[..., 5:])
                io[..., 4] = 1
            elif 'CE' in self.arc:  # unified CE (1 background + 80 classes)
                io[..., 4:] = F.softmax(io[..., 4:], dim=4)
                io[..., 4] = 1

            if self.nc == 1:
                io[..., 5] = 1  # single-class model https://github.com/ultralytics/yolov3/issues/235

            # reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
            return io.view(bs, -1, 5 + self.nc), p

## create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32)

In [76]:
def create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32):
    nx, ny = ng  # x and y grid size
    self.img_size = max(img_size)
    self.stride = self.img_size / max(ng)

    # build xy offsets
    yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
    self.grid_xy = torch.stack((xv, yv), 2).to(device).type(type).view((1, 1, ny, nx, 2))

    # build wh gains
    self.anchor_vec = self.anchors.to(device) / self.stride # Stride에 대한 비율값
    self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device).type(type)
    self.ng = torch.Tensor(ng).to(device)
    self.nx = nx
    self.ny = ny

### Example

In [31]:
ng = (13,13)
nx, ny = ng
img_size = 416
stride = img_size / max(ng)

In [33]:
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) # (13,13) grid 좌표

In [38]:
yv, xv

(tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
         [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1],
         [ 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2],
         [ 3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3],
         [ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4],
         [ 5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5],
         [ 6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6],
         [ 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7],
         [ 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8],
         [ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9],
         [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
         [11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11],
         [12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12]]),
 tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
         [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
         [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,

In [39]:
grid_xy = torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)) # 2 : x,y grid offset

In [71]:
anchors= np.array([[         10,          13], # width, height 
         [         16,          30],
         [         33,          23],
         [         30,          61],
         [         62,          45],
         [         59,         119],
         [        116,          90],
         [        156,         198],
         [        373,         326]])

anchors = anchors[[0,1,2]]
anchors = torch.Tensor(anchors)

In [72]:
anchors

tensor([[10., 13.],
        [16., 30.],
        [33., 23.]])

In [73]:
na = len(anchors) # number of anchors

anchor_vec = anchors / stride
anchor_wh = anchor_vec.view(1, na, 1, 1, 2)
ng = torch.Tensor(ng)
nx = nx
ny = ny

## create_modules(module_defs, img_size, arc) in models.py

In [24]:
module_defs = mdefs ## parse_model_cfg(cfg)

In [26]:
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams['channels'])]
module_list = nn.ModuleList()
routs = []  # list of layers which rout to deeper layes
yolo_index = -1

In [27]:
hyperparams

{'type': 'net',
 'batch': '64',
 'subdivisions': '4',
 'width': '416',
 'height': '416',
 'channels': '3',
 'momentum': '0.9',
 'decay': '5e-5',
 'angle': '0',
 'saturation': '1.5',
 'exposure': '1.5',
 'hue': '.1',
 'learning_rate': '1e-4',
 'burn_in': '1000',
 'max_batches': '500200',
 'policy': 'steps',
 'steps': '400000,450000',
 'scales': '.1,.1'}

In [78]:
for i, mdef in enumerate(module_defs):
    modules = nn.Sequential() # 순차적으로 실행하도록 담는 Container 개념

    if mdef['type'] == 'convolutional':
        bn = int(mdef['batch_normalize'])
        filters = int(mdef['filters'])
        kernel_size = int(mdef['size'])
        pad = (kernel_size - 1) // 2 if int(mdef['pad']) else 0
        modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1],
                                               out_channels=filters,
                                               kernel_size=kernel_size,
                                               stride=int(mdef['stride']),
                                               padding=pad,
                                               bias=not bn))
        if bn:
            modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1))
        if mdef['activation'] == 'leaky':  # TODO: activation study https://github.com/ultralytics/yolov3/issues/441
            modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
            # modules.add_module('activation', nn.PReLU(num_parameters=1, init=0.10))
            # modules.add_module('activation', Swish())

    elif mdef['type'] == 'upsample':
        modules = nn.Upsample(scale_factor=int(mdef['stride']), mode='nearest')

    elif mdef['type'] == 'route':  # nn.Sequential() placeholder for 'route' layer
        layers = [int(x) for x in mdef['layers'].split(',')]
        filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers])
        routs.extend([l if l > 0 else l + i for l in layers])
        # if mdef[i+1]['type'] == 'reorg3d':
        #     modules = nn.Upsample(scale_factor=1/float(mdef[i+1]['stride']), mode='nearest')  # reorg3d

    elif mdef['type'] == 'shortcut':  # nn.Sequential() placeholder for 'shortcut' layer
        filters = output_filters[int(mdef['from'])]
        layer = int(mdef['from'])
        routs.extend([i + layer if layer < 0 else layer])

    elif mdef['type'] == 'yolo':
        yolo_index += 1
        mask = [int(x) for x in mdef['mask'].split(',')]  # anchor mask
        modules = YOLOLayer(anchors=mdef['anchors'][mask],  # anchor list
                            nc=int(mdef['classes']),  # number of classes
                            img_size=img_size,  # (416, 416)
                            yolo_index=yolo_index,  # 0, 1 or 2
                            arc=arc)  # yolo architecture
        
        # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
        try:
            if arc == 'defaultpw' or arc == 'Fdefaultpw':  # default with positive weights
                b = [-4, -3.6]  # obj, cls
            elif arc == 'default':  # default no pw (40 cls, 80 obj)
                b = [-5.5, -4.0]
            elif arc == 'uBCE':  # unified BCE (80 classes)
                b = [0, -8.5]
            elif arc == 'uCE':  # unified CE (1 background + 80 classes)
                b = [10, -0.1]
            elif arc == 'Fdefault':  # Focal default no pw (28 cls, 21 obj, no pw)
                b = [-2.1, -1.8]
            elif arc == 'uFBCE' or arc == 'uFBCEpw':  # unified FocalBCE (5120 obj, 80 classes)
                b = [0, -6.5]
            elif arc == 'uFCE':  # unified FocalCE (64 cls, 1 background + 80 classes)
                b = [7.7, -1.1]

            bias = module_list[-1][0].bias.view(len(mask), -1)  # 255 to 3x85
            bias[:, 4] += b[0] - bias[:, 4].mean()  # obj
            bias[:, 5:] += b[1] - bias[:, 5:].mean()  # cls
            # bias = torch.load('weights/yolov3-spp.bias.pt')[yolo_index]  # list of tensors [3x85, 3x85, 3x85]
            module_list[-1][0].bias = torch.nn.Parameter(bias.view(-1))
            # utils.print_model_biases(model)
        except:
            print('WARNING: smart bias initialization failure.')

    else:
        print('Warning: Unrecognized Layer Type: ' + mdef['type'])

    # Register module list and number of output filters
    module_list.append(modules)
    output_filters.append(filters)


In [79]:
module_list

ModuleList(
  (0): Sequential(
    (Conv2d): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (BatchNorm2d): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (1): Sequential(
    (Conv2d): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (BatchNorm2d): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (2): Sequential(
    (Conv2d): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (BatchNorm2d): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (3): Sequential(
    (Conv2d): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (BatchNorm2d): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine

## return module_list, routs

## get_yolo_layers(model)

In [82]:
def get_yolo_layers(model):
    return [i for i, x in enumerate(model.module_defs) if x['type'] == 'yolo']  # [82, 94, 106] for yolov3

In [83]:
class Darknet(nn.Module):
    # YOLOv3 object detection model

    def __init__(self, cfg, img_size=(416, 416), arc='default'):
        super(Darknet, self).__init__()

        self.module_defs = parse_model_cfg(cfg)
        self.module_list, self.routs = create_modules(self.module_defs, img_size, arc)
        self.yolo_layers = get_yolo_layers(self)

        # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
        self.version = np.array([0, 2, 5], dtype=np.int32)  # (int32) version info: major, minor, revision
        self.seen = np.array([0], dtype=np.int64)  # (int64) number of images seen during training

    def forward(self, x, var=None):
        img_size = x.shape[-2:]
        layer_outputs = []
        output = []
        # forward function
        for i, (mdef, module) in enumerate(zip(self.module_defs, self.module_list)):
            mtype = mdef['type']
            if mtype in ['convolutional', 'upsample', 'maxpool']:
                x = module(x)
            elif mtype == 'route':
                layers = [int(x) for x in mdef['layers'].split(',')]
                if len(layers) == 1:
                    x = layer_outputs[layers[0]]
                else:
                    try:
                        x = torch.cat([layer_outputs[i] for i in layers], 1)
                    except:  # apply stride 2 for darknet reorg layer
                        layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5])
                        x = torch.cat([layer_outputs[i] for i in layers], 1)
                    # print(''), [print(layer_outputs[i].shape) for i in layers], print(x.shape)
            elif mtype == 'shortcut':
                x = x + layer_outputs[int(mdef['from'])]
            elif mtype == 'yolo':
                x = module(x, img_size)
                output.append(x)
            layer_outputs.append(x if i in self.routs else [])

        if self.training:
            return output

        else:
            io, p = list(zip(*output))  # inference output, training output
            return torch.cat(io, 1), p

    # for inference
    def fuse(self):
        # Fuse Conv2d + BatchNorm2d layers throughout model
        fused_list = nn.ModuleList()
        for a in list(self.children())[0]:
            if isinstance(a, nn.Sequential):
                for i, b in enumerate(a):
                    if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
                        # fuse this bn layer with the previous conv2d layer
                        conv = a[i - 1]
                        fused = torch_utils.fuse_conv_and_bn(conv, b)
                        a = nn.Sequential(fused, *list(a.children())[i + 1:])
                        break
            fused_list.append(a)
        self.module_list = fused_list
        # model_info(self)  # yolov3-spp reduced from 225 to 152 layers

In [89]:
model = Darknet(path, arc=arc)

In [90]:
model

Darknet(
  (module_list): ModuleList(
    (0): Sequential(
      (Conv2d): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (BatchNorm2d): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (1): Sequential(
      (Conv2d): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (BatchNorm2d): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (2): Sequential(
      (Conv2d): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (BatchNorm2d): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (3): Sequential(
      (Conv2d): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      

In [94]:
model.named_parameters()

<generator object Module.named_parameters at 0x7fc1b018b4c0>