In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import torch
import numpy as np
from datetime import datetime

from faster_rcnn import network
from faster_rcnn.faster_rcnn import FasterRCNN, RPN
from faster_rcnn.utils.timer import Timer

import faster_rcnn.roi_data_layer.roidb as rdl_roidb
from faster_rcnn.roi_data_layer.layer import RoIDataLayer
from faster_rcnn.datasets.factory import get_imdb
from faster_rcnn.fast_rcnn.config import cfg, cfg_from_file

try:
    from termcolor import cprint
except ImportError:
    cprint = None

try:
    from pycrayon import CrayonClient
except ImportError:
    CrayonClient = None


def log_print(text, color=None, on_color=None, attrs=None):
    if cprint is not None:
        cprint(text, color=color, on_color=on_color, attrs=attrs)
    else:
        print(text)

In [3]:
# hyper-parameters
# ------------
#imdb_name = 'voc_2007_trainval'
imdb_name = 'kittipose_train'
cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
pretrained_model = '/home/pculbert/Documents/faster_rcnn_pytorch/VGG_imagenet.npy'
output_dir = 'trained_models/saved_pose_model_test'

start_step = 0
end_step = 100000
lr_decay_steps = {60000, 80000}
lr_decay = 1./10

rand_seed = 1024
_DEBUG = True
use_tensorboard = False
remove_all_log = False   # remove all historical experiments in TensorBoard
exp_name = None # the previous experiment name in TensorBoard

# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

In [4]:
# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

In [5]:
# load data
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
# roidb is a database, containing bounding box info for all training images
data_layer = RoIDataLayer(roidb, imdb.num_classes)

<bound method kittipose.default_roidb of <faster_rcnn.datasets.kittipose.kittipose object at 0x7fa0bcec2390>>
Remove empty annotations:  005066 004339 004040 003383 001752 001091 000547 Done. 
kittipose_train gt roidb loaded from /home/pculbert/Documents/faster_rcnn_pytorch/data/cache/kittipose_train_gt_roidb.pkl


In [9]:
# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
network.load_pretrained_npy(net, pretrained_model)
# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = 'models/saved_model3/faster_rcnn_60000.h5'
# network.load_net(model_file, net)
# exp_name = 'vgg16_02-19_13-24'
# start_step = 60001
#lr /= 10.
# network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01)

net.cuda()
net.train()

params = list(net.parameters())
# optimizer = torch.optim.Adam(params[-8:], lr=lr)
optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# tensorboad
use_tensorboard = use_tensorboard and CrayonClient is not None
if use_tensorboard:
    cc = CrayonClient(hostname='127.0.0.1')
    if remove_all_log:
        cc.remove_all_experiments()
    if exp_name is None:
        exp_name = datetime.now().strftime('vgg16_%m-%d_%H-%M')
        exp = cc.create_experiment(exp_name)
    else:
        exp = cc.open_experiment(exp_name)
        
#for param in net.parameters():
#    print(type(param.data), param.size())

In [10]:
# training

file_rpn_ce = open(output_dir+'/loss_rpn_ce.txt', 'w')
file_rpn_box = open(output_dir+'/loss_rpn_box.txt', 'w')
file_rcnn_ce = open(output_dir+'/loss_rcnn_ce.txt', 'w')
file_rcnn_box = open(output_dir+'/loss_rcnn_box.txt', 'w')

train_loss = 0.
tp, tf, fg, bg = 0., 0., 0., 0.
step_cnt = 0.
re_cnt = False
t = Timer()
t.tic()
#for step in range(start_step, end_step+1):
for step in range(200):

    # get one batch
    blobs = data_layer.forward()
    im_data = blobs['data'] # one image, shape = (1, H, W, 3)
    im_info = blobs['im_info'] # stores H, W, scale
    gt_boxes = blobs['gt_boxes'] # groundtruth boxes
    gt_poses = blobs['gt_poses']
    gt_ishard = blobs['gt_ishard']
    dontcare_areas = blobs['dontcare_areas']
    dontcare_poses = blobs['dontcare_poses']
    disp_data = blobs['data_disp'] # disparity map

    # forward
    net(im_data, im_info, disp_data, gt_boxes, gt_poses, gt_ishard, dontcare_areas, dontcare_poses)
    loss = net.loss + net.rpn.loss

    if _DEBUG:
        tp += float(net.tp)
        tf += float(net.tf)
        fg += net.fg_cnt
        bg += net.bg_cnt

    train_loss += loss.data[0]
    step_cnt += 1

    # backward
    optimizer.zero_grad()
    loss.backward()
    network.clip_gradient(net, 10.)
    optimizer.step()
    
    # log losses to file
    file_rpn_ce.write(str(net.rpn.cross_entropy.data.cpu().numpy()))
    file_rpn_ce.write('\n')
    file_rpn_box.write(str(net.rpn.loss_box.data.cpu().numpy()))
    file_rpn_box.write('\n')
    file_rcnn_ce.write(str(net.cross_entropy.data.cpu().numpy()))
    file_rcnn_ce.write('\n')
    file_rcnn_box.write(str(net.loss_box.data.cpu().numpy()))
    file_rcnn_box.write('\n')

    #if step % disp_interval == 0:
    if step % 1 == 0:
        duration = t.toc(average=False)
        fps = step_cnt / duration

        log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch)' % (
            step, blobs['im_name'], train_loss / step_cnt, fps, 1./fps)
        log_print(log_text, color='green', attrs=['bold'])

        if _DEBUG:
            #log_print('\tTP: %.2f%%, TF: %.2f%%, fg/bg=(%d/%d)' % (tp/float(fg*100.), tf/float(bg*100.), fg/float(step_cnt), bg/float(step_cnt)))
            log_print('\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box: %.4f, rcnn_pose: %.4f' % (
                net.rpn.cross_entropy.data.cpu().numpy(), net.rpn.loss_box.data.cpu().numpy(),
                net.cross_entropy.data.cpu().numpy(), net.loss_box.data.cpu().numpy(), net.loss_pose.data.cpu().numpy())
            )
        re_cnt = True

    if use_tensorboard and step % log_interval == 0:
        exp.add_scalar_value('train_loss', train_loss / step_cnt, step=step)
        exp.add_scalar_value('learning_rate', lr, step=step)
        if _DEBUG:
            exp.add_scalar_value('true_positive', tp/float(fg*100.), step=step)
            exp.add_scalar_value('true_negative', tf/float(bg*100.), step=step)
            losses = {'rpn_cls': float(net.rpn.cross_entropy.data.cpu().numpy()),
                      'rpn_box': float(net.rpn.loss_box.data.cpu().numpy()),
                      'rcnn_cls': float(net.cross_entropy.data.cpu().numpy()),
                      'rcnn_box': float(net.loss_box.data.cpu().numpy()),
                      'rcnn_pose': float(net.loss_pose.data.cpu().numpy())}
            exp.add_scalar_dict(losses, step=step)

    if (step % 10000 == 0) and step > 0:
        save_name = os.path.join(output_dir, 'faster_rcnn_{}.h5'.format(step))
        network.save_net(save_name, net)
        print('save model: {}'.format(save_name))
    if step in lr_decay_steps:
        lr *= lr_decay
        optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay)

    if re_cnt:
        tp, tf, fg, bg = 0., 0., 0, 0
        train_loss = 0
        step_cnt = 0
        t.tic()
        re_cnt = False
        
file_rpn_ce.close()
file_rpn_box.close()
file_rcnn_ce.close()
file_rcnn_box.close()

step 0, image: 003871.jpg, loss: 86.8284, fps: 3.83 (0.26s per batch)
	rpn_cls: 0.6194, rpn_box: 1.7699, rcnn_cls: 2.5218, rcnn_box: 6.5504, rcnn_pose: 0.4839
step 1, image: 004451.jpg, loss: 208.0184, fps: 3.77 (0.27s per batch)
	rpn_cls: 0.6366, rpn_box: 2.7274, rcnn_cls: 4.3271, rcnn_box: 17.4559, rcnn_pose: 1.2214
step 2, image: 001085.jpg, loss: 99.3372, fps: 3.84 (0.26s per batch)
	rpn_cls: 0.6924, rpn_box: 1.7934, rcnn_cls: 2.0870, rcnn_box: 7.8117, rcnn_pose: 0.5066
step 3, image: 000319.jpg, loss: 102.7524, fps: 3.75 (0.27s per batch)
	rpn_cls: 0.6838, rpn_box: 2.1854, rcnn_cls: 2.2576, rcnn_box: 7.7305, rcnn_pose: 0.6520
step 4, image: 000343.jpg, loss: 66.2983, fps: 3.84 (0.26s per batch)
	rpn_cls: 0.6651, rpn_box: 1.4305, rcnn_cls: 2.6542, rcnn_box: 4.8311, rcnn_pose: 0.3631
step 5, image: 000370.jpg, loss: 63.5163, fps: 3.75 (0.27s per batch)
	rpn_cls: 0.6723, rpn_box: 1.2220, rcnn_cls: 2.0997, rcnn_box: 4.8196, rcnn_pose: 0.3279
step 6, image: 001054.jpg, loss: 88.2856, f

step 52, image: 003351.jpg, loss: 16.7684, fps: 3.76 (0.27s per batch)
	rpn_cls: 0.6579, rpn_box: 1.3209, rcnn_cls: 0.8977, rcnn_box: 0.0166, rcnn_pose: 1.8374
step 53, image: 004034.jpg, loss: 11.7006, fps: 3.73 (0.27s per batch)
	rpn_cls: 0.6593, rpn_box: 0.4113, rcnn_cls: 1.2817, rcnn_box: 0.2788, rcnn_pose: 2.8582
step 54, image: 004478.jpg, loss: 13.4919, fps: 3.78 (0.26s per batch)
	rpn_cls: 0.6742, rpn_box: 1.0203, rcnn_cls: 1.0453, rcnn_box: 0.1057, rcnn_pose: 0.5123
step 55, image: 001060.jpg, loss: 17.9815, fps: 3.27 (0.31s per batch)
	rpn_cls: 0.6683, rpn_box: 1.4697, rcnn_cls: 1.0363, rcnn_box: 0.1134, rcnn_pose: 0.4463
step 56, image: 000743.jpg, loss: 20.6616, fps: 3.58 (0.28s per batch)
	rpn_cls: 0.6740, rpn_box: 1.7408, rcnn_cls: 0.9735, rcnn_box: 0.0698, rcnn_pose: 0.9082
step 57, image: 004667.jpg, loss: 6.4977, fps: 3.69 (0.27s per batch)
	rpn_cls: 0.6622, rpn_box: 0.3349, rcnn_cls: 0.8928, rcnn_box: 0.0711, rcnn_pose: 0.8824
step 58, image: 005833.jpg, loss: 10.0832

step 104, image: 000512.jpg, loss: 7.8717, fps: 3.81 (0.26s per batch)
	rpn_cls: 0.5660, rpn_box: 0.0647, rcnn_cls: 0.8386, rcnn_box: 0.1118, rcnn_pose: 4.7024
step 105, image: 000891.jpg, loss: 7.9640, fps: 3.71 (0.27s per batch)
	rpn_cls: 0.5801, rpn_box: 0.1276, rcnn_cls: 0.8452, rcnn_box: 0.2048, rcnn_pose: 3.2142
step 106, image: 003828.jpg, loss: 4.3374, fps: 3.79 (0.26s per batch)
	rpn_cls: 0.6025, rpn_box: 0.1361, rcnn_cls: 0.9250, rcnn_box: 0.1011, rcnn_pose: 0.4379
step 107, image: 003280.jpg, loss: 6.4161, fps: 3.71 (0.27s per batch)
	rpn_cls: 0.6475, rpn_box: 0.3926, rcnn_cls: 1.0224, rcnn_box: 0.0670, rcnn_pose: 0.1502
step 108, image: 005123.jpg, loss: 10.8121, fps: 3.71 (0.27s per batch)
	rpn_cls: 0.6331, rpn_box: 0.8287, rcnn_cls: 0.9671, rcnn_box: 0.0726, rcnn_pose: 0.1995
step 109, image: 003688.jpg, loss: 11.9476, fps: 3.71 (0.27s per batch)
	rpn_cls: 0.6479, rpn_box: 0.8539, rcnn_cls: 1.2885, rcnn_box: 0.0289, rcnn_pose: 1.1833
step 110, image: 002511.jpg, loss: 4.4

step 156, image: 005287.jpg, loss: 5.8286, fps: 3.63 (0.28s per batch)
	rpn_cls: 0.6121, rpn_box: 0.3384, rcnn_cls: 0.9336, rcnn_box: 0.0808, rcnn_pose: 0.0916
step 157, image: 004927.jpg, loss: 12.2721, fps: 3.61 (0.28s per batch)
	rpn_cls: 0.6006, rpn_box: 0.9030, rcnn_cls: 0.8323, rcnn_box: 0.0112, rcnn_pose: 1.6976
step 158, image: 005619.jpg, loss: 12.0576, fps: 3.62 (0.28s per batch)
	rpn_cls: 0.6147, rpn_box: 0.9281, rcnn_cls: 0.9780, rcnn_box: 0.0205, rcnn_pose: 0.9787
step 159, image: 005734.jpg, loss: 5.8849, fps: 3.61 (0.28s per batch)
	rpn_cls: 0.5750, rpn_box: 0.3474, rcnn_cls: 0.8310, rcnn_box: 0.0184, rcnn_pose: 0.8210
step 160, image: 002426.jpg, loss: 8.4472, fps: 3.64 (0.27s per batch)
	rpn_cls: 0.4686, rpn_box: 0.1665, rcnn_cls: 0.7298, rcnn_box: 0.1006, rcnn_pose: 4.5780
step 161, image: 000965.jpg, loss: 14.3934, fps: 3.53 (0.28s per batch)
	rpn_cls: 0.5825, rpn_box: 0.9247, rcnn_cls: 1.6147, rcnn_box: 0.2799, rcnn_pose: 0.1509
step 162, image: 001940.jpg, loss: 10

In [8]:
import gc
for obj in gc.get_objects():
    if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
        print(type(obj), obj.size())

(<class 'torch.autograd.variable.Variable'>, torch.Size([1]))
(<class 'torch.autograd.variable.Variable'>, torch.Size([1]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([300, 5]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([300, 640, 7, 7]))
(<class 'torch.cuda.IntTensor'>, torch.Size([300, 640, 7, 7]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([300, 1000]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([300, 1000]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([300, 360]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([300, 60]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([300, 4096]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([300, 4096]))
(<class 'torch.autograd.variable.Variable'>, torch.Size([1]))
(<class 'torch.autograd.variable.Variable'>, torch.Size([1]))
(<class 'torch.autograd.variable.Variable'>, torch.Size([1]))
(<class 'torch.autograd.variable.Variable'>, torch.Size([1]))
(<class 'torch.cuda.FloatTensor'>, torch.Size([1]))
(<class 'torch.cuda.FloatTensor'>,