In [10]:
!pwd

/Users/SheepLi/Desktop/yolo-v4-tf.keras


In [1]:
import numpy as np
import tensorflow as tf
from pprint import pprint

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
from tensorflow.keras import layers, regularizers, initializers, activations, models
import tensorflow.keras.backend as K

In [3]:
def mish(x):
    return x*activations.tanh(K.softplus(x))

In [4]:
def conv(x, filters, kernel_size, downsampling=False, activation='leaky', batch_norm=True):
    if downsampling:
        x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(x) # top & left padding
        padding = 'valid'
        strides = 2
    else:
        padding = 'same'
        strides = 1
    x = layers.Conv2D(filters, 
                      kernel_size, 
                      strides=strides, 
                      padding=padding, 
                      use_bias=not batch_norm, 
                      kernel_regularizer=regularizers.l2(0.0005),
                      kernel_initializer=initializers.RandomNormal(stddev=0.01), 
                      bias_initializer=initializers.Zeros())(x)
    if batch_norm:
        x = layers.BatchNormalization()(x)
    if activation == 'mish':
        x = mish(x)
    elif activation == 'leaky':
        x = layers.LeakyReLU(alpha=0.1)(x)
    return x

'''
filters1: 1x1, 
filters2: 3x3
'''
def residual_block(x, filters1, filters2, activation='leaky'):
    y = conv(x, filters1, kernel_size=1, activation=activation)
    y = conv(y, filters2, kernel_size=3, activation=activation)
    return layers.Add()([x, y])

'''
Cross Stage Partial Network (CSPNet)
transition_bottleneck_dims: 1x1 bottleneck
output_dims: 3x3
'''
def csp_block(x, residual_out, repeat, residual_bottleneck=False):
    route = x
    route = conv(route, residual_out, 1, activation="mish")
    x = conv(x, residual_out, 1, activation="mish")
    for i in range(repeat):
        x = residual_block(x, 
                           residual_out // 2 if residual_bottleneck else residual_out, 
                           residual_out, 
                           activation="mish")
    x = conv(x, residual_out, 1, activation="mish")

    x = layers.Concatenate()([x, route])
    return x

def darknet53(x):
    x = conv(x, 32, 3)
    x = conv(x, 64, 3, downsampling=True)

    for i in range(1):
        x = residual_block(x, 32, 64)
    x = conv(x, 128, 3, downsampling=True)

    for i in range(2):
        x = residual_block(x, 64, 128)
    x = conv(x, 256, 3, downsampling=True)

    for i in range(8):
        x = residual_block(x, 128, 256)
    route_1 = x
    x = conv(x, 512, 3, downsampling=True)

    for i in range(8):
        x = residual_block(x, 256, 512)
    route_2 = x
    x = conv(x, 1024, 3, downsampling=True)

    for i in range(4):
        x = residual_block(x, 512, 1024)

    return route_1, route_2, x

def cspdarknet53(input):
    x = conv(input, 32, 3)
    x = conv(x, 64, 3, downsampling=True)
    
    x = csp_block(x, residual_out=64, repeat=1, residual_bottleneck=True)
    x = conv(x, 64, 1, activation='mish')
    x = conv(x, 128, 3, activation='mish', downsampling=True)
    
    x = csp_block(x, residual_out=64, repeat=2)
    x = conv(x, 128, 1, activation='mish')
    x = conv(x, 256, 3, activation='mish', downsampling=True)
    
    x = csp_block(x, residual_out=128, repeat=8)
    x = conv(x, 256, 1, activation='mish')
    route1 = x
    x = conv(x, 512, 3, activation='mish', downsampling=True)

    x = csp_block(x, residual_out=256, repeat=8)
    x = conv(x, 512, 1, activation='mish')
    route2 = x
    x = conv(x, 1024, 3, activation='mish', downsampling=True)
    
    x = csp_block(x, residual_out=512, repeat=4)

    x = conv(x, 1024, 1, activation="mish")
    x = conv(x, 512, 1)
    x = conv(x, 1024, 3)
    x = conv(x, 512, 1)
    
    x = layers.Concatenate()([
        layers.MaxPooling2D(pool_size=13, strides=1, padding='same')(x),
        layers.MaxPooling2D(pool_size=9, strides=1, padding='same')(x),
        layers.MaxPooling2D(pool_size=5, strides=1, padding='same')(x),
        x
    ])
    x = conv(x, 512, 1)
    x = conv(x, 1024, 3)
    x = conv(x, 512, 1)
    return models.Model(input, [route1, route2, x])
#     return route1, route2, x


def yolov4(x, num_classes):
    cspdarknet = cspdarknet53(x)
    route1, route2, conv_output = cspdarknet.output

    route = conv_output
    x = conv(conv_output, 256, 1)
    x = layers.UpSampling2D()(x)
    route2 = conv(route2, 256, 1)
    x = layers.Concatenate()([route2, x])

    x = conv(x, 256, 1)
    x = conv(x, 512, 3)
    x = conv(x, 256, 1)
    x = conv(x, 512, 3)
    x = conv(x, 256, 1)

    route2 = x
    x = conv(x, 128, 1)
    x = layers.UpSampling2D()(x)
    route1 = conv(route1, 128, 1)
    x = layers.Concatenate()([route1, x])
    
    x = conv(x, 128, 1)
    x = conv(x, 256, 3)
    x = conv(x, 128, 1)
    x = conv(x, 256, 3)
    x = conv(x, 128, 1)

    route1 = x
    x = conv(x, 256, 3)
    conv_sbbox = conv(x, 3*(num_classes+5), 1, activation=None, batch_norm=False)

    x = conv(route1, 256, 3, downsampling=True)
    x = layers.Concatenate()([x, route2])
    
    x = conv(x, 256, 1)
    x = conv(x, 512, 3)
    x = conv(x, 256, 1)
    x = conv(x, 512, 3)
    x = conv(x, 256, 1)

    route2 = x
    x = conv(x, 512, 3)
    conv_mbbox = conv(x, 3*(num_classes+5), 1, activation=None, batch_norm=False)

    x = conv(route2, 512, 3, downsampling=True)
    x = layers.Concatenate()([x, route])
    
    x = conv(x, 512, 1)
    x = conv(x, 1024, 3)
    x = conv(x, 512, 1)
    x = conv(x, 1024, 3)
    x = conv(x, 512, 1)

    x = conv(x, 1024, 3)
    conv_lbbox = conv(x, 3*(num_classes+5), 1, activation=None, batch_norm=False)

    return [conv_sbbox, conv_mbbox, conv_lbbox]

def decode(feature_maps, output_size, num_classes, strides, anchors, i, xyscale):
    conv_output = feature_maps
    XYSCALE = xyscale
    STRIDES = strides
    ANCHORS = anchors
    print(feature_maps.shape)
    output_size = strides[i]
    feature_maps = tf.reshape(feature_maps,
                                 (tf.shape(feature_maps)[0], 
                                  output_size,
                                  output_size,
                                  3,
                                  5+num_classes)) # (batch_size, grid_size, grid_size, 3, 5+classes)
#     conv_output = tf.reshape(conv_output,
#                              (tf.shape(conv_output)[0], output_size, output_size, 3, 5 + num_classes))
    conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(feature_maps, 
                                                                          (2, 2, 1, num_classes),
                                                                         axis=-1)
    # (?, 16, 16, 3, 2) (?, 16, 16, 3, 2) (?, 16, 16, 3, 1) (?, 16, 16, 3, 15)
    print('conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob')
    print(conv_raw_dxdy.shape, conv_raw_dwdh.shape, conv_raw_conf.shape, conv_raw_prob.shape)
#     conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, num_classes),
#                                                                           axis=-1)
    # Create xy grid
    xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size)) # (output_size, output_size) * 2
    xy_grid = tf.stack(xy_grid, axis=-1) # (grid x, grid y, 2)
    xy_grid = xy_grid[tf.newaxis, :, :, tf.newaxis, :] # (1, gx, gy, 1, 2)
    xy_grid = tf.tile(xy_grid, 
                      [tf.shape(feature_maps)[0], 1, 1, 3, 1]) # (batch_size, grid x, grid y, 3, 2)
    xy_grid = tf.cast(xy_grid, tf.float32)

    # Convert raw output to prediction
    pred_xy = ((tf.sigmoid(conv_raw_dxdy) * xyscale[i]) - 0.5 * (xyscale[i] - 1) + xy_grid) * strides[i] # (bs, grid_size, grid_size, 3, 2)
    pred_wh = (tf.exp(conv_raw_dwdh) * anchors[i]) # (bs, grid_size, grid_size, 3, 2)
    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1) # (bs, grid_size, grid_size, 3, 4)

    pred_conf = tf.sigmoid(conv_raw_conf)
    pred_prob = tf.sigmoid(conv_raw_prob)

    pred_prob = pred_conf * pred_prob # (bs, 16, 16, 3, 4)
    return pred_xywh, pred_prob
    

In [5]:
input = layers.Input((128, 128, 3))
yolov4_output = yolov4(input, 15)
# model1 = models.Model(input, output)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [6]:
xyscale = [1.2, 1.1, 1.05] # cfg.YOLO.XYSCALE
anchors = [12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401] # utils.get_anchors(cfg.YOLO.ANCHORS)
anchors = np.array(anchors).reshape((3, 3, 2)) # (stage, bbox per grid, xy)
strides = [8, 16, 32]

output_tensors = []
for i, feature_map in enumerate(yolov4_output[:]):
    print('yolo fm shape: ', feature_map)
    bbox_tensor = decode(feature_maps=feature_map, 
                           output_size=16, 
                           num_classes=15, 
                           strides=strides, 
                           anchors=anchors, 
                           i=i, 
                           xyscale=xyscale)
    output_tensors.append(feature_map)
    output_tensors.append(bbox_tensor)
    
model_yolo = models.Model(input, output_tensors)

yolo fm shape:  Tensor("conv2d_93/BiasAdd:0", shape=(?, 16, 16, 60), dtype=float32)
(?, 16, 16, 60)
conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob
(?, 8, 8, 3, 2) (?, 8, 8, 3, 2) (?, 8, 8, 3, 1) (?, 8, 8, 3, 15)
yolo fm shape:  Tensor("conv2d_101/BiasAdd:0", shape=(?, 8, 8, 60), dtype=float32)
(?, 8, 8, 60)
conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob
(?, 16, 16, 3, 2) (?, 16, 16, 3, 2) (?, 16, 16, 3, 1) (?, 16, 16, 3, 15)
yolo fm shape:  Tensor("conv2d_109/BiasAdd:0", shape=(?, 4, 4, 60), dtype=float32)
(?, 4, 4, 60)
conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob
(?, 32, 32, 3, 2) (?, 32, 32, 3, 2) (?, 32, 32, 3, 1) (?, 32, 32, 3, 15)


In [7]:

len(model_yolo.layers), model_yolo.layers[93], model_yolo.layers[101], model_yolo.layers[109]
model_yolo.output

[<tf.Tensor 'conv2d_93/BiasAdd:0' shape=(?, 16, 16, 60) dtype=float32>,
 (<tf.Tensor 'concat:0' shape=(?, 8, 8, 3, 4) dtype=float32>,
  <tf.Tensor 'mul_73:0' shape=(?, 8, 8, 3, 15) dtype=float32>),
 <tf.Tensor 'conv2d_101/BiasAdd:0' shape=(?, 8, 8, 60) dtype=float32>,
 (<tf.Tensor 'concat_1:0' shape=(?, 16, 16, 3, 4) dtype=float32>,
  <tf.Tensor 'mul_77:0' shape=(?, 16, 16, 3, 15) dtype=float32>),
 <tf.Tensor 'conv2d_109/BiasAdd:0' shape=(?, 4, 4, 60) dtype=float32>,
 (<tf.Tensor 'concat_2:0' shape=(?, 32, 32, 3, 4) dtype=float32>,
  <tf.Tensor 'mul_81:0' shape=(?, 32, 32, 3, 15) dtype=float32>)]

In [8]:
# num = 0
# for i, l in enumerate(model_yolo.layers[-200:]):
#     params = l.count_params()
#     if params > 0 and 'conv2d' in l.name:
#         print(num, params, ' ---- ',l.name,)
#         num += 1
total = 0
for i in range(100, 110):
    l = model_yolo.get_layer(f'conv2d_{i}') if i > 0 else model_yolo.get_layer('conv2d')
    print(i, l.count_params(), l.name)
    total += l.count_params()
total

100 1179648 conv2d_100
101 30780 conv2d_101
102 1179648 conv2d_102
103 524288 conv2d_103
104 4718592 conv2d_104
105 524288 conv2d_105
106 4718592 conv2d_106
107 524288 conv2d_107
108 4718592 conv2d_108
109 61500 conv2d_109


18180216

In [9]:
model_yolo.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 128, 128, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 128, 128, 32) 128         conv2d[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 128, 128, 32) 0           batch_normalization[0][0]        
____________________________________________________________________________________________

In [None]:
def load_weights(model, weights_file, model_name='yolov4', is_tiny=False):
    if is_tiny:
        if model_name == 'yolov3':
            layer_size = 13
            output_pos = [9, 12]
        else:
            layer_size = 21
            output_pos = [17, 20]
    else:
        if model_name == 'yolov3':
            layer_size = 75
            output_pos = [58, 66, 74]
        else:
            layer_size = 110
            output_pos = [93, 101, 109]
    wf = open(weights_file, 'rb')
    major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)

    j = 0
    for i in range(layer_size):
        conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
        bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'

        conv_layer = model.get_layer(conv_layer_name)
        filters = conv_layer.filters
        k_size = conv_layer.kernel_size[0]
        in_dim = conv_layer.input_shape[-1]

        if i not in output_pos:
            # darknet weights: [beta, gamma, mean, variance]
            bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
            # tf weights: [gamma, beta, mean, variance]
            bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
            bn_layer = model.get_layer(bn_layer_name)
            j += 1
        else:
            conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)

        # darknet shape (out_dim, in_dim, height, width)
        conv_shape = (filters, in_dim, k_size, k_size)
        conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
        # tf shape (height, width, in_dim, out_dim)
        conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])

        if i not in output_pos:
            conv_layer.set_weights([conv_weights])
            bn_layer.set_weights(bn_weights)
        else:
            conv_layer.set_weights([conv_weights, conv_bias])

    assert len(wf.read()) == 0, 'failed to read all data'
    wf.close()

In [51]:
model = models.Model(input, output_tensors)

In [52]:
model.summary()

Model: "model_10"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d_518 (Conv2D)             (None, 128, 128, 32) 864         input_7[0][0]                    
__________________________________________________________________________________________________
batch_normalization_506 (BatchN (None, 128, 128, 32) 128         conv2d_518[0][0]                 
__________________________________________________________________________________________________
leaky_re_lu_156 (LeakyReLU)     (None, 128, 128, 32) 0           batch_normalization_506[0][0]    
___________________________________________________________________________________________

In [15]:
input = layers.Input((128, 128, 3))
output = yolov4(input, 15)
model1 = models.Model(input, output)

In [16]:
model1.outputs

[<tf.Tensor 'conv2d_281/BiasAdd:0' shape=(?, 16, 16, 60) dtype=float32>,
 <tf.Tensor 'conv2d_289/BiasAdd:0' shape=(?, 8, 8, 60) dtype=float32>,
 <tf.Tensor 'conv2d_297/BiasAdd:0' shape=(?, 4, 4, 60) dtype=float32>]

In [17]:
model1.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d_188 (Conv2D)             (None, 128, 128, 32) 864         input_4[0][0]                    
__________________________________________________________________________________________________
batch_normalization_185 (BatchN (None, 128, 128, 32) 128         conv2d_188[0][0]                 
__________________________________________________________________________________________________
leaky_re_lu_45 (LeakyReLU)      (None, 128, 128, 32) 0           batch_normalization_185[0][0]    
____________________________________________________________________________________________

In [100]:
input = layers.Input((128, 128, 3))
output = cspdarknet53(input)
model2 = models.Model(input, output)

In [101]:
model2.output

(<tf.Tensor 'mul_697:0' shape=(?, 16, 16, 256) dtype=float32>,
 <tf.Tensor 'mul_718:0' shape=(?, 8, 8, 512) dtype=float32>,
 <tf.Tensor 'leaky_re_lu_260/LeakyRelu:0' shape=(?, 4, 4, 512) dtype=float32>)

In [102]:
model2.summary()

Model: "model_11"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_21 (InputLayer)           [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d_942 (Conv2D)             (None, 128, 128, 32) 864         input_21[0][0]                   
__________________________________________________________________________________________________
batch_normalization_941 (BatchN (None, 128, 128, 32) 128         conv2d_942[0][0]                 
__________________________________________________________________________________________________
leaky_re_lu_253 (LeakyReLU)     (None, 128, 128, 32) 0           batch_normalization_941[0][0]    
___________________________________________________________________________________________

In [94]:
# tf.keras.utils.plot_model(model2, show_shapes=True, show_layer_names=True)

In [27]:
# x = tf.reshape(tf.constant([[0, 1, 2], [3, 4, 5], [7, 8, 9]]), ((1, 3, 3, 1)))
# x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(x)
# pprint(tf.keras.backend.eval(x).reshape(4, 4))

array([[0, 0, 0, 0],
       [0, 0, 1, 2],
       [0, 3, 4, 5],
       [0, 7, 8, 9]], dtype=int32)
