In [1]:
from train import *

2.0.0-alpha0


In [44]:

def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
    '''data generator for fit_generator'''
    n = len(annotation_lines)
    i = 0
    while True:
        image_data = []
        box_data = []
        for b in range(batch_size):
            if i == 0:
                np.random.shuffle(annotation_lines)
            image, box = get_random_data(annotation_lines[i], input_shape, random=True)
            image_data.append(image)
            box_data.append(box)
            i = (i + 1) % n
        image_data = np.array(image_data)
        box_data = np.array(box_data)
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
#         yield [image_data, *y_true], np.zeros(batch_size)
        yield ( image_data,*y_true)


def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
    n = len(annotation_lines)
    if n == 0 or batch_size <= 0: return None
    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)


In [60]:


def create_model(input_shape, anchors, num_classes, update_callback, load_pretrained=True, freeze_body=2,
                 weights_path='model_data/yolo_weights.h5', input_tensor=None):
    '''create the training model'''
    K.clear_session()  # get a new session
    image_input = input_tensor if input_tensor is not None else Input(shape=(None, None, 3))
    h, w = input_shape
    num_anchors = len(anchors)

    y_true = [Input(shape=(h // {0: 32, 1: 16, 2: 8}[l], w // {0: 32, 1: 16, 2: 8}[l], \
                           num_anchors // 3, num_classes + 5)) for l in range(3)]

    model_body = yolo_body(image_input, num_anchors // 3, num_classes)
    print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))

    if load_pretrained:
        model_body.load_weights(weights_path, by_name=True)
        print('Load weights {}.'.format(weights_path))
        if freeze_body in [1, 2]:
            # Freeze darknet53 body or freeze all but 3 output layers.
            num = (185, len(model_body.layers) - 3)[freeze_body - 1]
            for i in range(num): model_body.layers[i].trainable = False
            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))

    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
                        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5,
                                   'update_callback': update_callback})(
        [*model_body.output, *y_true])
    model = Model([model_body.input, *y_true], model_loss)

    return model


def model_body(input_shape, anchors, num_classes, update_callback, load_pretrained=True, freeze_body=2,
                 weights_path='model_data/yolo_weights.h5'):
    K.clear_session()  # get a new session
    image_input = input_tensor if input_tensor is not None else Input(shape=(None, None, 3))
    h, w = input_shape
    num_anchors = len(anchors)

    y_true = [Input(shape=(h // {0: 32, 1: 16, 2: 8}[l], w // {0: 32, 1: 16, 2: 8}[l], \
                           num_anchors // 3, num_classes + 5)) for l in range(3)]

    model_body = yolo_body(image_input, num_anchors // 3, num_classes)
    print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))

    if load_pretrained:
        model_body.load_weights(weights_path, by_name=True)
        print('Load weights {}.'.format(weights_path))
        if freeze_body in [1, 2]:
            # Freeze darknet53 body or freeze all but 3 output layers.
            num = (185, len(model_body.layers) - 3)[freeze_body - 1]
            for i in range(num): model_body.layers[i].trainable = False
            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))

    
    return model_body

def loss_wrapper(outputs, pred, anchors, num_classes):
    return yolo_loss([*outputs, *pred], anchors=anchors, num_classes=num_classes, ignore_thresh= 0.5)


In [45]:
from train import get_classes, get_anchors
annotation_path = 'data/input.csv'
log_dir = 'logs/000/'
classes_path = 'model_data/stdogs_classes.txt'
anchors_path = 'model_data/yolo_anchors.txt'
val_split = 0.5

class_names = get_classes(classes_path)
num_classes = len(class_names)
anchors = get_anchors(anchors_path)

input_shape = (416, 416)  # multiple of 32, hw

update_callback = UpdateCallBack()

is_tiny_version = len(anchors) == 6  # default setting



# model, outputs, y_true = create_model(input_shape, anchors, num_classes, update_callback,
#                      freeze_body=2,
#                      weights_path='model_data/darknet53.weights.h5')  # make sure you know what you freeze

with open(annotation_path) as f:
    lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
num_val = int(len(lines) * val_split)
num_train = len(lines) - num_val



In [46]:
import tensorflow as tf
batch_size = 2
generator = data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes)
# for x in generator:
#     print(x[1],*[a.shape for a in x[0]])
    
data = tf.data.Dataset.from_generator(
    lambda: generator
    ,output_types= (tf.float32,tf.float32,tf.float32,tf.float32,)
    ,output_shapes=(
        tf.TensorShape(  [None, 416, 416, 3])    
        ,tf.TensorShape( [None, 13, 13, 3, 7])
        ,tf.TensorShape( (None, 26, 26, 3, 7) )
       ,tf.TensorShape(  (None, 52, 52, 3, 7) )
        )
)

for x in data.take(2):
    assert x is not None

# # lambda: generator

In [47]:
body = model_body(input_shape, anchors, num_classes, update_callback,
                     freeze_body=2,
                     weights_path='model_data/darknet53.weights.h5')

Load weights model_data/darknet53.weights.h5.
Freeze the first 264 layers of total 267 layers.


In [49]:
from yolo3.model import *
ignore_thresh=0.5
yolo_outputs = outputs
num_layers = len(anchors) // 3  # default setting
# yolo_outputs = args[:num_layers]
y_true = [y1,y2,y3]
anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
input_shape = K.cast(K.shape(outputs[0])[1:3] * 32, K.dtype(y_true[0]))
grid_shapes = [K.cast(K.shape(outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
loss = 0
# m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
# mf = K.cast(m, K.dtype(yolo_outputs[0]))
m = batch_size
mf = 1.0 * batch_size
l = 0

object_mask = y_true[l][..., 4:5]
true_class_probs = y_true[l][..., 5:]

grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                             anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
pred_box = K.concatenate([pred_xy, pred_wh])

# Darknet raw box to calculate loss.
raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])

raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

# Find ignore mask, iterate over each of batch.
ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
object_mask_bool = K.cast(object_mask, 'bool')
y_true[l][..., 4:5]
def loop_body(b, ignore_mask):
    true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0])
    iou = box_iou(pred_box[b], true_box)
    best_iou = K.max(iou, axis=-1)
    ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))

    return b + 1, ignore_mask

from tensorflow.python.ops import control_flow_ops
_, ignore_mask = control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask])
ignore_mask = ignore_mask.stack()
ignore_mask = K.expand_dims(ignore_mask, -1)
       # K.binary_crossentropy is helpful to avoid exp overflow.
xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2],
                                                               from_logits=True)
wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4])
confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                  (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                                            from_logits=True) * ignore_mask

class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[..., 5:7], from_logits=True)

extend_true_class_probs = tf.concat(
    [true_class_probs, 1 - tf.reduce_sum(true_class_probs, axis=4, keepdims=True)], axis=4)

# class_center = tf.Variable("class_center")
multi_mask = 1 - ignore_mask / tf.norm(ignore_mask, 1, keepdims=True)
multi_class_loss = K.squeeze(multi_mask, 4) * tf.nn.softmax_cross_entropy_with_logits(
    labels=extend_true_class_probs
    , logits=raw_pred[..., 7:]
)

xy_loss = K.sum(xy_loss) / mf
wh_loss = K.sum(wh_loss) / mf
confidence_loss = K.sum(confidence_loss) / mf
class_loss = K.sum(class_loss) / mf
multi_class_loss = K.sum(multi_class_loss) / mf
# TODO
loss += (xy_loss + wh_loss + confidence_loss
         + class_loss
         + multi_class_loss
         )

pd.DataFrame(ignore_mask[0,:,:,0,0].numpy())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [51]:
true_class_probs = extend_true_class_probs = y1[..., 5:]
object_mask = y1[..., 4:5]
num_pos = tf.reduce_sum(true_class_probs)
tf.reduce_sum(object_mask)
import pandas as pd 
pd.DataFrame(tf.reduce_sum(true_class_probs,4)[0,:,:,2].numpy())
# pos = tf.reduce_sum(true_class_probs, axis=4, )
# all = tf.cast(tf.reduce_prod(tf.shape(true_class_probs)), K.dtype(pos))
# weight = 1 - (1 - num_pos / all) * pos
# num_pos,pos,all,weight,object_mask

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
value

<tf.Tensor: id=10589, shape=(2, 13, 13, 1024), dtype=float32, numpy=
array([[[[-1.04879430e-02, -1.07159540e-02,  1.75983775e-02, ...,
          -4.49262699e-03,  1.38306785e-02,  3.35401557e-02],
         [-7.14836130e-03, -9.53560323e-03, -4.06341255e-03, ...,
          -3.33916512e-03,  1.35371760e-02,  1.18266024e-01],
         [-7.12771853e-03, -5.43480227e-03, -5.20021934e-03, ...,
          -7.42414361e-03, -2.77819927e-05,  9.05145854e-02],
         ...,
         [-2.87397392e-03, -1.34260207e-02, -7.36235594e-03, ...,
          -8.08860455e-03, -1.67885015e-03,  6.29409105e-02],
         [ 1.15454709e-02, -9.37745441e-03, -5.56799723e-03, ...,
          -9.79582034e-03, -2.55709211e-03,  9.42437798e-02],
         [ 2.43433192e-02, -5.85773261e-03, -2.27801572e-03, ...,
          -9.87595599e-03, -5.69750369e-03, -2.84646849e-05]],

        [[-1.87905785e-02, -3.16805136e-03, -8.95383675e-03, ...,
           3.18535254e-03, -2.54888879e-03,  6.22500964e-02],
         [-1.548277

In [53]:
#test extends label
z=tf.Variable(y1[...,5:])

def extends(true_class_probs):

    extend_true_class_probs = tf.concat(
                [true_class_probs, 1 - tf.reduce_sum(true_class_probs, axis=4, keepdims=True)], axis=4)
    return tf.argmax(extend_true_class_probs, axis=4)

# tf.reduce_sum(extends(z)[:][0] == 2 )
# z[...,0].assign(tf.ones_like(z[...,1])) 
# tf.reduce_sum(extends(z)[:][0] == 0 )

In [54]:
def moveing_avg(variable, value, update_weight=0.05):    
    return variable.assign(variable * (1-update_weight) + update_weight*value )


def update(center, value, keys):
    z=extends(keys)
    ez = tf.cast(tf.expand_dims(z,4), value[0].dtype)
    v_shape = tf.shape(value)
    reshape_value = tf.reshape(value,[*v_shape[:-1],1 ,v_shape[-1]])
    group_bys = tf.reduce_mean(ez * reshape_value, axis=[0,1,2] )
    return moveing_avg(center, group_bys)


def update_centers():
    gap = outputs[3:6]
    keys = [x[..., 5:7] for x in [y1,y2,y3]]
    for (center, v,k) in zip(
        model.model_global.centers
        ,gap
        ,keys
        ):
        update(center, v, k)

In [59]:
logdir = "logs/variables/" + datetime.now().strftime("%Y%m%d-%H%M%S")
from tensorflow.summary import create_file_writer
file_writer = create_file_writer(logdir + "/metrics")
file_writer.set_as_default()


In [62]:
outputs = None
num_epoch=10
optimizer = tf.keras.optimizers.Adam()
for epoch in range(num_epoch):
    for x in data:
        image,y1,y2,y3 = x
        with tf.GradientTape() as tape:
            outputs = body(image)

            loss = loss_wrapper(outputs, [y1,y2,y3],anchors,num_classes )
            grads = tape.gradient(loss, body.trainable_weights)
            
            optimizer.apply_gradients(zip(grads, body.trainable_variables))
            
            print(loss)
#             update_centers()
        
        
        

('[xy_loss', 0) (' wh_loss', 0) (' confidence_loss', 7372.75781) (' class_loss', 0) (' multi_class_loss]', 8966.92383)
tf.Tensor(21614.002, shape=(), dtype=float32)
('[xy_loss', 0) (' wh_loss', 0) (' confidence_loss', 7362.88867) (' class_loss', 0) (' multi_class_loss]', 8955.56543)
tf.Tensor(21642.232, shape=(), dtype=float32)
('[xy_loss', 0) (' wh_loss', 0) (' confidence_loss', 7372.20605) (' class_loss', 0) (' multi_class_loss]', 8964.34766)


KeyboardInterrupt: 