In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import _init_paths
import os
import torch
import torch.utils.model_zoo as model_zoo
from torch.nn.parameter import Parameter
import numpy as np
from datetime import datetime

import cPickle as pkl
import network
from wsddn import WSDDN
from utils.timer import Timer

import roi_data_layer.roidb as rdl_roidb
from roi_data_layer.layer import RoIDataLayer
from datasets.factory import get_imdb
from fast_rcnn.config import cfg, cfg_from_file

try:
    from termcolor import cprint
except ImportError:
    cprint = None

def log_print(text, color=None, on_color=None, attrs=None):
    if cprint is not None:
        cprint(text, color=color, on_color=on_color, attrs=attrs)
    else:
        print(text)

# hyper-parameters
# ------------
imdb_name = 'voc_2007_trainval'
cfg_file = 'experiments/cfgs/wsddn.yml'
pretrained_model = 'data/pretrained_model/alexnet_imagenet.npy'
output_dir = 'models/saved_model'
visualize = True
vis_interval = 5000

start_step = 0
end_step = 50000
lr_decay_steps = {150000}
lr_decay = 1./10

rand_seed = 1024
_DEBUG = False
use_tensorboard = False
use_visdom = False
log_grads = False

remove_all_log = False   # remove all historical experiments in TensorBoard
exp_name = None # the previous experiment name in TensorBoard
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config file and get hyperparameters
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS


# load imdb and create data later
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)


voc_2007_trainval ss roidb loaded from /home/ubuntu/code/visual_learning_and_recognition/hw2/code/data/cache/voc_2007_trainval_selective_search_roidb.pkl


In [2]:
net = WSDDN(classes=imdb.classes, debug=_DEBUG)


('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')


In [3]:
network.weights_normal_init(net, dev=0.001)
if os.path.exists('pretrained_alexnet.pkl'):
    pret_net = pkl.load(open('pretrained_alexnet.pkl','r'))
else:
    pret_net = model_zoo.load_url('https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth')
    pkl.dump(pret_net, open('pretrained_alexnet.pkl','wb'), pkl.HIGHEST_PROTOCOL)
own_state = net.state_dict()
for name, param in pret_net.items():
    if name not in own_state:
        continue
    if isinstance(param, Parameter):
        param = param.data
    try:
        own_state[name].copy_(param)
        print('Copied {}'.format(name))
    except:
        print('Did not find {}'.format(name))
        continue

# Move model to GPU and set train mode
net.cuda()
net.train()


Copied features.0.weight
Copied features.0.bias
Copied features.3.weight
Copied features.3.bias
Copied features.6.weight
Copied features.6.bias
Copied features.8.weight
Copied features.8.bias
Copied features.10.weight
Copied features.10.bias


WSDDN(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (roi_pool): RoIPool(
  )
  (classifier): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU(inplace)
    (2): Dropout(p=0.5)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace)
  )
  (score_cls): Sequential(
    (0): Linear(in_features

In [4]:
# Create optimizer for network parameters
params = list(net.parameters())
optimizer = torch.optim.SGD(params[2:], lr=lr, 
                            momentum=momentum, weight_decay=weight_decay)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# training
train_loss = 0
tp, tf, fg, bg = 0., 0., 0, 0
step_cnt = 0
re_cnt = False
t = Timer()
t.tic()


In [5]:
step = 0

In [None]:
blobs = data_layer.forward()

> [0;32m/home/ubuntu/code/visual_learning_and_recognition/hw2/code/faster_rcnn/roi_data_layer/minibatch.py[0m(62)[0;36mget_weak_minibatch[0;34m()[0m
[0;32m     60 [0;31m        [0mgt_classes[0m [0;34m=[0m [0mroidb[0m[0;34m[[0m[0mim_i[0m[0;34m][0m[0;34m[[0m[0;34m'gt_classes'[0m[0;34m][0m[0;34m[0m[0m
[0m[0;32m     61 [0;31m        [0;32mfrom[0m [0mIPython[0m[0;34m.[0m[0mcore[0m[0;34m.[0m[0mdebugger[0m [0;32mimport[0m [0mTracer[0m[0;34m;[0m [0mTracer[0m[0;34m([0m[0;34m)[0m[0;34m([0m[0;34m)[0m [0;31m#labels may be none[0m[0;34m[0m[0m
[0m[0;32m---> 62 [0;31m        [0;32mfor[0m [0mcls_idx[0m [0;32min[0m [0mxrange[0m[0;34m([0m[0mlen[0m[0;34m([0m[0mgt_classes[0m[0;34m)[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m     63 [0;31m            [0mlabels_blob[0m[0;34m[[0m[0mim_i[0m[0;34m][0m[0;34m[[0m[0mcls_idx[0m[0;34m][0m [0;34m=[0m [0;36m1[0m[0;34m[0m[0m
[0m[0;32m     64 [0;31m[

ipdb> len(gt_classes)
4005
