In [56]:
import os
import matplotlib.pyplot as plt
import mxnet as mx
from mxnet.gluon import nn
from gluoncv.utils import viz
from mxnet import nd

In [57]:
'''
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
'''
class Mish(nn.HybridBlock):
    def __init__(self):
        super(Mish, self).__init__()

    def hybrid_forward(self, F, x):        
        return x * F.tanh(F.Activation(data = x, act_type = 'softrelu'))

In [58]:
'''
ConvBlock
[Conv2d ─ BN ─ ACTIVATION]
'''
def ConvBlock(channels, kernel_size, strides, padding, use_bias=False, groups=1, activation='mish'):
    block = nn.HybridSequential()
    block.add(nn.Conv2D(int(channels), kernel_size=kernel_size, strides=strides, padding=padding, groups=groups, use_bias=use_bias))

    if not use_bias:
        block.add(nn.BatchNorm(in_channels=int(channels)))

    if activation == 'leaky':
        block.add(nn.LeakyReLU(0.1))
    elif activation == 'mish':
        block.add(Mish())

    return block


'''
RedidualBlock
[   ┌───────────────────────┐    ]
[In ┴ Conv(1x1) ─ Conv(3x3) ┴ out]
'''
class ResidualBlock(nn.HybridBlock):
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = ConvBlock(channels, kernel_size=1, strides=1, groups=1, padding=0)
        self.conv2 = ConvBlock(channels, kernel_size=3, strides=1, groups=16, padding=1)

    def hybrid_forward(self, F, x):
        # print('F: ', F)
        # print('x: ', x.shape, type(x))

        block = self.conv1(x)
        block = self.conv2(block)
        out = block + x

        return out

In [59]:
class CSP(nn.HybridBlock):
    def __init__(self, channels, block_size=1):
        super(CSP, self).__init__()
        self.conv0 = ConvBlock(channels, kernel_size=3, strides=2, padding=1)
        self.conv1 = ConvBlock(channels/2, kernel_size=1, strides=1, padding=0)
        self.resblocks = self.make_residual_blocks(channels/2, block_size)
        
        self.conv2 = ConvBlock(channels/2, kernel_size=1, strides=1, padding=0)
        self.conv3 = ConvBlock(channels/2, kernel_size=1, strides=1, padding=0)
        self.conv4 = ConvBlock(channels, kernel_size=1, strides=1, padding=0)
        
    def hybrid_forward(self, F, x):
        x = self.conv0(x)
        short_cut = x        
        x = self.conv1(x)
        x = self.resblocks(x)
        x = self.conv2(x)
        short_cut = self.conv3(short_cut)
        x = F.concat(x, short_cut, dim=1)
        x = self.conv4(x)
        
        return x
        
    def make_residual_blocks(self, channels, block_size):
        layer = nn.HybridSequential()
        for i in range(block_size):
            layer.add(ResidualBlock(channels))        
        return layer

In [60]:
net = CSP(128, 2)
net.initialize()
x = nd.random.normal(shape=(1, 128, 304, 304))
y = net(x)
print('y: ', y.shape, type(y))

y:  (1, 128, 152, 152) <class 'mxnet.ndarray.ndarray.NDArray'>


In [73]:
class DarkNet(nn.HybridBlock):
    def __init__(self, num_classes=1000, input_size=416):
        super(DarkNet, self).__init__()
        self.layer_num = 0
        self.num_classes = num_classes
        self.input_size = input_size

        self.input_layer = nn.Conv2D(channels=32, kernel_size=3, strides=1, padding=1, use_bias=False)

        self.layer0 = CSP(64, 2)
        self.layer1 = CSP(128, 4)
        self.layer2 = CSP(256, 8)
        self.layer3 = CSP(512, 8)
        self.layer4 = CSP(1024, 8)
        self.layer5 = CSP(2048, 4)

        self.global_avg_pool = nn.GlobalAvgPool2D()
        self.fc = nn.Dense(self.num_classes)

    def hybrid_forward(self, F, x):
        x = self.input_layer(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.global_avg_pool(x)
        x = self.fc(x)
        
        return x

In [74]:
net = DarkNet(num_classes=5, input_size=416)
net.initialize()
x = nd.random.normal(shape=(1, 3, 416, 416))
y = net(x)
print('y: ', y.shape, type(y))

y:  (1, 5) <class 'mxnet.ndarray.ndarray.NDArray'>


In [75]:
net.summary(x)

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
               Input                            (1, 3, 416, 416)               0
            Conv2D-1                           (1, 32, 416, 416)             864
            Conv2D-2                           (1, 64, 208, 208)           18432
         BatchNorm-3                           (1, 64, 208, 208)             256
              Mish-4                           (1, 64, 208, 208)               0
            Conv2D-5                           (1, 32, 208, 208)            2048
         BatchNorm-6                           (1, 32, 208, 208)             128
              Mish-7                           (1, 32, 208, 208)               0
            Conv2D-8                           (1, 32, 208, 208)            1024
         BatchNorm-9                           (1, 32, 208, 208)             128
             Mish-10        