In [1]:
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
%matplotlib inline
import pdb

In [2]:
from easydict import EasyDict as edict
from matplotlib import pyplot as plt
from torchvision import transforms as trans
from PIL import Image
from pathlib import Path
from models.Yolo_model import Yolo_model, build_targets
import numpy as np
# np.seterr(all='raise')
import torch
from torch import nn, optim
# import torch.nn.functional as F
from utils.vis_utils import *
from utils.box_utils import *
from utils.dataset_tools import *
from utils.utils import *
from models.Yolo_head import Yolo_loss
from tensorboardX import SummaryWriter
from tqdm import tqdm_notebook as tqdm
from imgaug import augmenters as iaa
from Yolo_trainer_Notebook import Yolo
from torch.utils.data import DataLoader

In [3]:
conf = edict()

conf.coco_anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
                     [59, 119], [116, 90], [156, 198], [373, 326]]
conf.train_path = Path('/home/f/nvme/coco2017/train2017/')
conf.train_anno_path = Path(
    '/home/f/nvme/coco2017/annotations/instances_train2017.json')
conf.val_path = Path('/home/f/nvme/coco2017/val2017/')
conf.val_anno_path = Path(
    '/home/f/nvme/coco2017/annotations/instances_val2017.json')
conf.log_path = Path('/home/f/learning/yolo/log')
conf.model_path = Path('/home/f/learning/yolo/model')
conf.save_path = Path('/home/f/learning/yolo/save')
conf.ids_path = 'data/ids.npy'

conf.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

maps,correct_id_2_class = get_id_maps(conf)

loading annotations into memory...
Done (t=9.43s)
creating index...
index created!


In [4]:
conf.num_anchors = 3
conf.batch_size = 16
conf.input_size = 416
conf.scales = [32,16,8]

conf.running_norm = 0.
# conf.gdclip = 3000.
conf.num_workers = 8
conf.batch_size = 16
conf.gdclip = None
conf.coord_scale = 2.
conf.noobject_scale = 0.5
conf.object_scale = 5
conf.class_scale = 5.
conf.ignore_thresh = 0.5
conf.evaluate_iou_threshold = 0.5
conf.predict_confidence_threshold = 0.5

model = Yolo_model(conf)
model.to(conf.device)
conf.mean = model.res50_pyramid.model.mean
conf.std = model.res50_pyramid.model.std

conf.mse_loss = nn.MSELoss(size_average=False)
conf.bce_loss = nn.BCEWithLogitsLoss

conf.board_loss_every = 5
conf.evaluate_every = 5
conf.board_pred_image_every = 5
# conf.board_loss_every = len(train_loader) // 100
# conf.evaluate_every = len(train_loader) // 10
# conf.board_pred_image_every = len(train_loader) // 2
# conf.save_every = len(train_loader) // 2
# conf.board_grad_norm = len(train_loader) // 10

In [5]:
train_ds = Coco_dataset(conf,conf.train_path,conf.train_anno_path,maps)
val_ds = Coco_dataset(conf,conf.val_path,conf.val_anno_path,maps)
train_loader = DataLoader(train_ds,batch_size=conf.batch_size,shuffle=True,collate_fn=coco_collate_fn,pin_memory=True,num_workers=conf.num_workers)
val_loader = DataLoader(val_ds,batch_size=conf.batch_size,shuffle=False,collate_fn=coco_collate_fn,pin_memory=True,num_workers=conf.num_workers)

loading annotations into memory...
Done (t=8.99s)
creating index...
index created!
loading annotations into memory...
Done (t=0.29s)
creating index...
index created!


In [6]:
yolo = Yolo(conf,model,train_loader,val_loader,None,None)

In [7]:
imgs,bboxes_group,labels_group = next(iter(train_loader))

In [8]:
imgs = imgs.to(conf.device)
for i,label in enumerate(labels_group):
    labels_group[i] = label.to(conf.device)
for i,bboxes in enumerate(bboxes_group):
    bboxes_group[i] = bboxes.to(conf.device)

In [9]:
preds = yolo.model(imgs)

In [10]:
preds.loss_feats[0].shape,preds.loss_feats[1].shape,preds.loss_feats[2].shape

(torch.Size([16, 3, 13, 13, 85]),
 torch.Size([16, 3, 26, 26, 85]),
 torch.Size([16, 3, 52, 52, 85]))

In [11]:
preds.pred_bboxes_group[0].shape,preds.pred_bboxes_group[1].shape,preds.pred_bboxes_group[2].shape

(torch.Size([16, 3, 13, 13, 4]),
 torch.Size([16, 3, 26, 26, 4]),
 torch.Size([16, 3, 52, 52, 4]))

In [12]:
yolo.model.head.anchors_group[0],yolo.model.head.anchors_group[1],yolo.model.head.anchors_group[2]

(tensor([[ 116.,   90.],
         [ 156.,  198.],
         [ 373.,  326.]], device='cuda:0'), tensor([[  30.,   61.],
         [  62.,   45.],
         [  59.,  119.]], device='cuda:0'), tensor([[ 10.,  13.],
         [ 16.,  30.],
         [ 33.,  23.]], device='cuda:0'))

In [13]:
warm_up = False

In [14]:
l = 0

In [15]:
b = 6

In [23]:
pred_bboxes_group = preds.pred_bboxes_group
anchors_group = yolo.model.head.anchors_group

In [None]:
nA = len(pred_bboxes_group)
nB = len(bboxes_group)
anchors_concat = torch.cat(anchors_group).unsqueeze(0)
anchor_maxes = anchors_concat / 2.
anchor_mins = -anchor_maxes
targets = [torch.zeros([nB,nA,nF,nF,5],device=conf.device)\
               for nF in [bboxes.shape[2] for bboxes in pred_bboxes_group]]
gt_mask = [torch.zeros([nB,nA,nF,nF],device=conf.device)\
               for nF in [bboxes.shape[2] for bboxes in pred_bboxes_group]]
coord_mask = [torch.zeros([nB,nA,nF,nF],device=conf.device)\
               for nF in [bboxes.shape[2] for bboxes in pred_bboxes_group]]
conf_weight = [conf.noobject_scale * torch.ones([nB,nA,nF,nF],device=conf.device)\
               for nF in [bboxes.shape[2] for bboxes in pred_bboxes_group]]
    
if warm_up:
        targets[l][...,:2] = 0.5
        targets[l][...,2:4] = 0.
        coord_mask[l] = 1

bboxes_xy = bboxes_group[b][:,:2]
bboxes_wh = bboxes_group[b][:,2:]
iou = cal_iou_wh(bboxes_group[b][:,2:],anchors_concat)
best_anchor = torch.argmax(iou, dim=-1)

In [None]:
cur_pred_boxes = pred_bboxes_group[l][b].view(-1,4)
gt_boxes = bboxes_group[b]
ious = cal_ious(xcycwh_2_xywh(cur_pred_boxes),xcycwh_2_xywh(gt_boxes))
max_ious,_ = torch.max(ious,1)
idx = max_ious > conf.ignore_thresh
idx = idx.view(pred_bboxes_group[l][b].shape[:-1])
conf_weight[l][b][idx] = 0.

In [90]:
targets, gt_mask, conf_weight, coord_mask = build_targets(
    conf,
    preds.pred_bboxes_group,
    bboxes_group,
    labels_group,
    yolo.model.head.anchors_group,
    warm_up=True,
    debug=False)

In [91]:
[compare_tensors(coord_mask[i], gt_mask[i]) for i in range(3)]

[tensor(8089., device='cuda:0'),
 tensor(32417., device='cuda:0'),
 tensor(29., device='cuda:0')]

In [100]:
np.where(targets[0][b][:,:,:,2].cpu().numpy() != 0.)

(array([1, 2]), array([9, 7]), array([9, 7]))

In [75]:
anchors_group[0]/32

tensor([[  3.6250,   2.8125],
        [  4.8750,   6.1875],
        [ 11.6562,  10.1875]], device='cuda:0')

In [68]:
bboxes_group[b]/32

tensor([[  9.6875,   9.0469,   6.5625,   7.8438],
        [  7.1719,   7.2656,  11.5938,  11.4062]], device='cuda:0')

In [71]:
np.where(gt_mask[l][b].cpu().numpy() == 1)

(array([1, 2]), array([9, 7]), array([9, 7]))

In [62]:
np.where(conf_weight[l][b].cpu().numpy() == 5.)

(array([1, 2]), array([9, 7]), array([9, 7]))

In [73]:
conf_weight[l][b][1, 9, 9]

tensor(5., device='cuda:0')

In [36]:
pred_bboxes_group[l][b][2, 6, 4]

tensor([ 145.8432,  214.1226,  351.7752,  354.8152], device='cuda:0')

In [30]:
bboxes_group[b]

tensor([[ 310.0000,  289.5000,  210.0000,  251.0000],
        [ 229.5000,  232.5000,  371.0000,  365.0000]], device='cuda:0')

In [55]:
l = 0

In [59]:
[[
    cal_ious(
        xcycwh_2_xywh(pred_bboxes_group[l][b][2, 6, 8].unsqueeze(0)),
        xcycwh_2_xywh(bboxes_group[b][i].unsqueeze(0))).item()
    for i in range(len(bboxes_group[b]))
] for l in [0,1,2]]

[[0.35337454080581665, 0.7120802998542786],
 [-0.0, 0.06484775245189667],
 [0.0, 0.005149302538484335]]

In [60]:
[[
    cal_ious_xcycwh(
        pred_bboxes_group[l][b][2, 6, 8].unsqueeze(0),
        bboxes_group[b][i].unsqueeze(0)).item()
    for i in range(len(bboxes_group[b]))
] for l in [0,1,2]]

[[0.3533744513988495, 0.7120801210403442],
 [-0.0, 0.06484775245189667],
 [0.0, 0.005149302538484335]]

In [None]:
bboxes_group[b][3].unsqueeze(0)

In [None]:
box = np.array([[3,0,4,9]]).astype(np.float)

In [None]:
torch.tensor(box)

In [None]:
xywh_2_xcycwh(torch.tensor(box))