## 실습 Setting
* YOLOv3 model을 config에서 parsing 및 생성하기 위한 세팅


In [None]:
!git clone https://github.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch.git
%cd YOLO_v3_tutorial_from_scratch

## Prerequisites
* Part 1 내용 숙지
* Pytorch에 대한 기본 지식 : nn.Module, nn.Sequential, torch.nn.parameter classes

In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np
from util import * 
from darknet import EmptyLayer, DetectionLayer, Darknet

## Configuration File

cfg/yolov3.cfg 에 존재하는 block(or operator) 종류

: block [arguments, ...]
* convolution [batch_normalize, filters, size, stride, pad, activation]
* shortcut [from, activation]
* upsample [stride]
* route [layers (from 1 or 2)] <- 2 이면 concat layer로 이해하면 됨
* yolo [mask, anchors, classes, num, jitter, ignore_thresh, truth_thresh, random]
* net [batch, subdivisions, width, height, channels, momentum, decay, angle, saturation, exposure, hue]

## Parsing the configuration file

### parse_cfg function

In [2]:
def parse_cfg(cfgfile):
    """
    Takes a configuration file
    
    Returns a list of blocks. Each blocks describes a block in the neural
    network to be built. Block is represented as a dictionary in the list
    
    """
    # 1. 불필요한 문자열들 제거 및 정제
    file = open(cfgfile, 'r')
    lines = file.read().split('\n')                        # store the lines in a list
    lines = [x for x in lines if len(x) > 0]               # get read of the empty lines 
    lines = [x for x in lines if x[0] != '#']              # get rid of comments
    lines = [x.rstrip().lstrip() for x in lines]           # get rid of fringe whitespaces
    
    # 2. 각 block parsing하여 list에 정렬
    block = {}
    blocks = []
    
    for line in lines:
        if line[0] == "[":               # This marks the start of a new block
            if len(block) != 0:          # If block is not empty, implies it is storing values of previous block.
                blocks.append(block)     # add it the blocks list
                block = {}               # re-init the block
            block["type"] = line[1:-1].rstrip()     
        else:
            key,value = line.split("=") 
            block[key.rstrip()] = value.lstrip()
    blocks.append(block)
    
    return blocks    

1. 불필요한 문자열 제거 및 정제

In [3]:
cfgfile = 'cfg/yolov3.cfg'

file = open(cfgfile, 'r')
lines = file.read().split('\n') # store the lines in a list
lines = [x for x in lines if len(x) > 0] # get read of the empty lines 
lines = [x for x in lines if x[0] != '#'] # get rid of comments
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
lines

['[net]',
 'batch=1',
 'subdivisions=1',
 'width= 416',
 'height = 416',
 'channels=3',
 'momentum=0.9',
 'decay=0.0005',
 'angle=0',
 'saturation = 1.5',
 'exposure = 1.5',
 'hue=.1',
 'learning_rate=0.001',
 'burn_in=1000',
 'max_batches = 500200',
 'policy=steps',
 'steps=400000,450000',
 'scales=.1,.1',
 '[convolutional]',
 'batch_normalize=1',
 'filters=32',
 'size=3',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=3',
 'stride=2',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=32',
 'size=1',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=3',
 'stride=1',
 'pad=1',
 'activation=leaky',
 '[shortcut]',
 'from=-3',
 'activation=linear',
 '[convolutional]',
 'batch_normalize=1',
 'filters=128',
 'size=3',
 'stride=2',
 'pad=1',
 'activation=leaky',
 '[convolutional]',
 'batch_normalize=1',
 'filters=64',
 'size=1',
 'stride=1',


2. 각 block parsing하여 list에 정렬

In [4]:
block = {}
blocks = []

for line in lines:
    if line[0] == "[":               # This marks the start of a new block
        if len(block) != 0:          # If block is not empty, implies it is storing values of previous block.
            blocks.append(block)     # add it the blocks list
            block = {}               # re-init the block
        block["type"] = line[1:-1].rstrip()   
    else:
        key,value = line.split("=") 
        block[key.rstrip()] = value.lstrip()
blocks.append(block)

In [5]:
blocks

[{'type': 'net',
  'batch': '1',
  'subdivisions': '1',
  'width': '416',
  'height': '416',
  'channels': '3',
  'momentum': '0.9',
  'decay': '0.0005',
  'angle': '0',
  'saturation': '1.5',
  'exposure': '1.5',
  'hue': '.1',
  'learning_rate': '0.001',
  'burn_in': '1000',
  'max_batches': '500200',
  'policy': 'steps',
  'steps': '400000,450000',
  'scales': '.1,.1'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '2',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '1',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'shortcut', 'from': '-3', 'activation'

## Creating the building blocks


### create_modules function

In [6]:
def create_modules(blocks):
    # 1. network 관련 정보 추출
    net_info = blocks[0]     #Captures the information about the input and pre-processing    
    module_list = nn.ModuleList()
    prev_filters = 3
    output_filters = []
    
    # 2. block type에 따라 argments를 받아와서 torch 모듈로 생성
    for index, block in enumerate(blocks[1:]):
        module = nn.Sequential()

        # convolution type 모듈 추가
        if block["type"] == "convolutional":
            activation = block["activation"]
            # batch normalization 여부 확인
            try:
                batch_normalize = int(block["batch_normalize"])
                bias = False
            except:
                batch_normalize = 0
                bias = True
            # filter, padding, kernel size, stride 정보 확인
            filters = int(block["filters"])
            padding = int(block["pad"])
            kernel_size = int(block["size"])
            stride = int(block["stride"])
            # padding 계산
            if padding:
                pad = (kernel_size - 1) // 2 # same padding
            else:
                pad = 0 # valid padding
            
            # convolution layer 생성
            conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=bias)
            module.add_module("conv_{0}".format(index), conv)
            # batch normalization layer 생성
            if batch_normalize:
                bn = nn.BatchNorm2d(filters)
                module.add_module("batch_norm_{0}".format(index), bn)
            # activation function 생성
            if activation == "leaky":
                activn = nn.LeakyReLU(0.1, inplace=True)
                module.add_module("leaky_{0}".format(index), activn)
        
        # upsampling type module 추가
        elif block["type"] == "upsample":
            stride = int(block["stride"])
            upsampling = nn.Upsample(scale_factor=2, mode="nearest")
            module.add_module("upsample_{}".format(index), upsampling)

        # route(concatenate) type module 추가
        elif block["type"] == "route":
            block["layers"] = block["layers"].split(',')
            # start of a route
            start = int(block["layers"][0])
            # end, if there exists one.
            try:
                end = int(block["layers"][1])
            except:
                end = 0
            # Positive annotation
            if start > 0:
                start = start - index   
            if end > 0:
                end = end - index
            route = EmptyLayer()
            module.add_module("route_{0}".format(index), route)
            
            # filters 계산
            if end < 0:
                filters = output_filters[index + start] + output_filters[index + end]
            else:
                filters = output_filters[index + start]
        
        # shortcut type module 추가
        elif block["type"] == "shortcut":
            shortcut = EmptyLayer()
            module.add_module("shortcut_{}".format(index), shortcut)

        # yolo type module 추가
        elif block["type"] == "yolo":
            mask = block["mask"].split(",")
            mask = [int(x) for x in mask] # 사용할 anchor box index
            anchors = block["anchors"].split(",")
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in mask] # [(10, 13), (16, 30), (33, 23)]
            detection = DetectionLayer(anchors)
            module.add_module("Detection_{}".format(index), detection)
            
        # module list에 sequential module 추가
        module_list.append(module)
        prev_filters = filters
        output_filters.append(filters)
        
    return (net_info, module_list)

1. network 관련 정보 추출

In [7]:
net_info = blocks[0] # Captures the information about the input and pre-processing
module_list = nn.ModuleList()
prev_filters = 3
output_filters = []

In [8]:
blocks[0]

{'type': 'net',
 'batch': '1',
 'subdivisions': '1',
 'width': '416',
 'height': '416',
 'channels': '3',
 'momentum': '0.9',
 'decay': '0.0005',
 'angle': '0',
 'saturation': '1.5',
 'exposure': '1.5',
 'hue': '.1',
 'learning_rate': '0.001',
 'burn_in': '1000',
 'max_batches': '500200',
 'policy': 'steps',
 'steps': '400000,450000',
 'scales': '.1,.1'}

2. block type에 따라 argments를 받아와서 torch 모듈로 생성

* convolutional
* upsample
* route
* shortcut
* yolo

In [9]:
blocks[1:]

[{'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '2',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '32',
  'size': '1',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '3',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'shortcut', 'from': '-3', 'activation': 'linear'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '128',
  'size': '3',
  'stride': '2',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '64',
  'size': '1',
  'stride': '1',
  'pad': '1',
  'activation': 'leaky'},
 {'type': 'convolutional',
  'batch_normalize': '1',
  'filters': '128',
 

In [10]:
for index, block in enumerate(blocks[1:]):
    module = nn.Sequential()

    # convolution type 모듈 추가
    if block["type"] == "convolutional":
        activation = block["activation"]
        # batch normalization 여부 확인
        try:
            batch_normalize = int(block["batch_normalize"])
            bias = False
        except:
            batch_normalize = 0
            bias = True
        # filter, padding, kernel size, stride 정보 확인
        filters = int(block["filters"])
        padding = int(block["pad"])
        kernel_size = int(block["size"])
        stride = int(block["stride"])
        # padding 계산
        if padding:
            pad = (kernel_size - 1) // 2 # same padding
        else:
            pad = 0 # valid padding
        
        # convolution layer 생성
        conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=bias)
        module.add_module("conv_{0}".format(index), conv)
        # batch normalization layer 생성
        if batch_normalize:
            bn = nn.BatchNorm2d(filters)
            module.add_module("batch_norm_{0}".format(index), bn)
        # activation function 생성
        if activation == "leaky":
            activn = nn.LeakyReLU(0.1, inplace=True)
            module.add_module("leaky_{0}".format(index), activn)
       
    # upsampling type module 추가
    elif block["type"] == "upsample":
        stride = int(block["stride"])
        upsampling = nn.Upsample(scale_factor=2, mode="nearest")
        module.add_module("upsample_{}".format(index), upsampling)

    # route(concatenate) type module 추가
    elif block["type"] == "route":
        block["layers"] = block["layers"].split(',')
        # start of a route
        start = int(block["layers"][0])
        # end, if there exists one.
        try:
            end = int(block["layers"][1])
        except:
            end = 0
        # Positive annotation
        if start > 0:
            start = start - index   
        if end > 0:
            end = end - index
        route = EmptyLayer()
        module.add_module("route_{0}".format(index), route)
        
        # filters 계산
        if end < 0:
            filters = output_filters[index + start] + output_filters[index + end]
        else:
            filters = output_filters[index + start]
    
    # shortcut type module 추가
    elif block["type"] == "shortcut":
        shortcut = EmptyLayer()
        module.add_module("shortcut_{}".format(index), shortcut)

    # yolo type module 추가
    elif block["type"] == "yolo":
        mask = block["mask"].split(",")
        mask = [int(x) for x in mask] # 사용할 anchor box index
        anchors = block["anchors"].split(",")
        anchors = [int(a) for a in anchors]
        anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
        anchors = [anchors[i] for i in mask] # [(10, 13), (16, 30), (33, 23)]
        detection = DetectionLayer(anchors)
        module.add_module("Detection_{}".format(index), detection)
        
    # module list에 sequential module 추가
    module_list.append(module)
    prev_filters = filters
    output_filters.append(filters)

In [11]:

# return 
net_info, module_list


({'type': 'net',
  'batch': '1',
  'subdivisions': '1',
  'width': '416',
  'height': '416',
  'channels': '3',
  'momentum': '0.9',
  'decay': '0.0005',
  'angle': '0',
  'saturation': '1.5',
  'exposure': '1.5',
  'hue': '.1',
  'learning_rate': '0.001',
  'burn_in': '1000',
  'max_batches': '500200',
  'policy': 'steps',
  'steps': '400000,450000',
  'scales': '.1,.1'},
 ModuleList(
   (0): Sequential(
     (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
     (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (leaky_0): LeakyReLU(negative_slope=0.1, inplace=True)
   )
   (1): Sequential(
     (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
     (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (leaky_1): LeakyReLU(negative_slope=0.1, inplace=True)
   )
   (2): Sequential(
     (conv_2): Conv2d(64, 32, kern

### Testing the code

In [12]:
blocks = parse_cfg("cfg/yolov3.cfg")
print(create_modules(blocks))

({'type': 'net', 'batch': '1', 'subdivisions': '1', 'width': '416', 'height': '416', 'channels': '3', 'momentum': '0.9', 'decay': '0.0005', 'angle': '0', 'saturation': '1.5', 'exposure': '1.5', 'hue': '.1', 'learning_rate': '0.001', 'burn_in': '1000', 'max_batches': '500200', 'policy': 'steps', 'steps': '400000,450000', 'scales': '.1,.1'}, ModuleList(
  (0): Sequential(
    (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky_0): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (1): Sequential(
    (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky_1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (2): Sequential(
    (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    