## Yolo2

In [17]:
import os
import numpy as np
import tqdm
import renom as rm
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from renom.cuda import set_cuda_active, release_mem_pool
from darknet19 import Darknet19Detection, Darknet19Classification
from utils import Yolov2Distributor, load_bbox, create_anchor
import h5py

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
set_cuda_active(True)

[0 1 2 3]


Reshape([[10., 10.],
         [10., 10.]], dtype=float32)

In [None]:
img_list, annotation_list = load_bbox("")

if not os.path.exists("anchor.txt"):
    box_list = []
    size_list = []
    for anot in annotation_list:
        for a in anot:
            box_list.append(a['bndbox'])
            size_list.append(a['size'])
    anchor = create_anchor(box_list, size_list, base_size=base_size)
    with open("anchor.txt", "w") as writer:
        for a in anchor:
            writer.write("{},{}\n".format(a[2], a[3]))
else:
    anchor = []
    with open("anchor.txt", "r") as reader:
        for r in reader.readlines():
            anchor.append([float(a) for a in r.strip().split(",")])

In [None]:
def draw_box(draw, bbox, class_name=None, transform=True):
    font_path = '/usr/share/fonts/truetype/ubuntu-font-family/Ubuntu-R.ttf'
    if transform:
        cx, cy, cw, ch = bbox
        cx1 = cx - cw/2.
        cy1 = cy - ch/2.
        cx2 = cx + cw/2.
        cy2 = cy + ch/2.
    else:
        cx1, cy1, cx2, cy2 = bbox
        cw = cx2 - cx1
        ch = cy2 - cy1
        cx = cx1 + cw/2.
        cy = cy1 + ch/2.

    if class_name is None:
        for j in range(-2, 3):
            draw.rectangle([cx1 + j,
                            cy1 + j,
                            cx2 + j,
                            cy2 + j], outline=(0, 255, 0))
        return
    else:
        for j in range(-2, 3):
            draw.rectangle([cx1 + j,
                            cy1 + j,
                            cx2 + j,
                            cy2 + j], outline=(255, 0, 0))
    draw.font = ImageFont.truetype(font_path, 25)
    text_x = cx - cw/2
    text_y = cy - ch/2
    text_w, text_h = draw.font.getsize(class_name)
    draw.rectangle([text_x, text_y, text_x+text_w, text_y+text_h], outline=(255, 0, 0), fill=(255, 0, 0))
    draw.text((text_x, text_y), class_name, (0, 0, 0))


In [None]:
dist = Yolov2Distributor(img_list, annotation_list)

for x, y in dist.detection_batch(1, img_size=(224, 224)):
    img = Image.fromarray(x[0].transpose(1, 2, 0).astype(np.uint8))
    draw = ImageDraw.Draw(img)
    keep = np.where(y[0, 0, :, :] > 0)
    box = []
    for k in range(len(keep[0])):
        lb = y[0, :, keep[0][k], keep[1][k]]
        tx = lb[1]
        ty = lb[2]
        tw = lb[3]
        th = lb[4]
        draw_box(draw, (tx, ty, tw, th))
    plt.imshow(img)
    plt.show()
    break

In [None]:
# classifier = Darknet19Classification()
# classifier.load("result/model_006.h5")
model = Darknet19Detection()
model.save("yolov2_2.h5")
# model._base = classifier._base

opt = rm.Sgd(0.0001, 0.9)
lr_list = [1e-6]*7 + [1e-3]*61 + [1e-4]*40
batch = 16
batch_loop = int(np.ceil(len(dist)//batch))

In [None]:
# np.array([320, 320], dtype=np.int)
# np.array([352, 352], dtype=np.int)
# np.array([384, 384], dtype=np.int)
# np.array([416, 416], dtype=np.int)
# np.array([448, 448], dtype=np.int)
# np.array([480, 480], dtype=np.int)
# np.array([512, 512], dtype=np.int)
# np.array([544, 544], dtype=np.int)
# np.array([576, 576], dtype=np.int)

In [None]:
for size in [320, 352, 384, 416, 448]:
    release_mem_pool()
    for e in range(0, 101):
        img_size = (size, size)
        scale_w = size/base_size[0]
        scale_h = size/base_size[1]
        display_loss = 0
        bar = tqdm.tqdm(range(batch_loop))
        model.set_models(inference=False)
        for i, (x, y) in enumerate(dist.detection_batch(batch, img_size=img_size)):
            if e == 0:
                opt._lr = 1e-8 + (1e-6 - 1e-8)/batch_loop * i
            else:
                opt._lr = lr_list[e]
            with model.train():
                z = model(x/255.*2 - 1)
                target, mask = model.build_target(z.as_ndarray(), y,
                        [[a[0]*scale_w, a[1]*scale_h] for a in anchor], img_size)
                dif = (z - target)*mask
                loss = rm.sum(dif*dif)*0.5/np.sum(y[:, 0] > 0)
                reg_loss = loss + model.weight_decay()

            reg_loss.grad().update(opt)
            display_loss += loss.as_ndarray()[0]
            bar.set_description("epoch:{:03d} loss:{:5.3f}".format(e, loss.as_ndarray()[0]))
            bar.update(1)
        display_loss /= (i+1)
        bar.set_description("epoch:{:03d} avg loss:{:5.3f}".format(e, display_loss))
        bar.update(0)
        bar.close()
        
        if not (e%5 == 0 and e): continue
        model.set_models(inference=True)
        z = model(x/255.*2 - 1)
        b_list, s_list = model.transform_to_original_scale(z.as_ndarray(), [[a[0]*scale_w, a[1]*scale_h] for a in anchor], img_size)
        for i, x_img in enumerate(x):
            img = Image.fromarray(x_img.transpose(1, 2, 0).astype(np.uint8))
            draw = ImageDraw.Draw(img)
            for box, score in zip(b_list[i], s_list[i]):
                if score[0] > 0.3:
                    # print(score[1])
                    draw_box(draw, box)
            plt.imshow(img)
            plt.show()
            if i > 2:break


In [None]:
b_list, s_list = model.transform_to_original_scale(z.as_ndarray(), [[a[0]*scale_w, a[1]*scale_h] for a in anchor], img_size)
for i, x_img in enumerate(x):
    img = Image.fromarray(x_img.transpose(1, 2, 0).astype(np.uint8))
    draw = ImageDraw.Draw(img)
    for box, score in zip(b_list[i], s_list[i]):
        if score[0] > 0.1:
            print(score[1])
            draw_box(draw, box)
    plt.imshow(img)
    plt.show()
    if i > 7:break

In [None]:
s_list

In [None]:
a = np.zeros((2, 2))
b = np.arange(2).reshape(1, 2)
print((a+b)[:, 1])

In [None]:
model.save('yolo2.h5')