In [1]:
import _init_paths
import os
import sys
import numpy as np
import argparse
import pprint
import pdb
import time
import cv2
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision.transforms as transforms
from torch.utils.data.sampler import Sampler

from roi_data_layer.roidb import combined_roidb, rank_roidb_ratio, filter_roidb
from roi_data_layer.roibatchLoader import roibatchLoader
from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
from model.utils.net_utils import weights_normal_init, save_net, load_net, \
    adjust_learning_rate, save_checkpoint, clip_gradient

from model.faster_rcnn.vgg16 import vgg16
from model.faster_rcnn.resnet import resnet

from frcnn_helper import *
from scipy.special import softmax

import pickle

import FedUtils
import KD_utils


In [2]:
from roi_data_layer.roidb import combined_roidb, rank_roidb_ratio, filter_roidb
from torch.utils.data.sampler import Sampler

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device='cpu'

In [4]:
class Arguments():
    def __init__(self):
        self.class_agnostic =False
        self.epochs = 3
        self.lr = 0.001
        self.lr_decay_step = 4
        self.dataset = 'KAIST'
        self.net = 'vgg16'
        self.batch_size = 24
        
        self.cuda = True
        #self.round = 10
        self.mGPUs=True
        self.optimizer ="sgd"
        self.k=3
        self.num_workers = 2
        #self.device = th.device("cpu")
        
args = Arguments()

In [5]:
data_cache_path = 'data/cache'
imdb_classes =  ('__background__',  # always index 0
                          'person',
                          'people','cyclist'
                         )

## load data

In [6]:
def load_client_dataset(imdb_name):
    #dataloader_list = []
    #iter_epochs_list = []
    #for imdb_name in imdb_list:
    pkl_file = os.path.join(data_cache_path, imdb_name + '_gt_roidb.pkl')

    with open(pkl_file, 'rb') as f:
        roidb = pickle.load(f)

    roidb = filter_roidb(roidb)

    ratio_list, ratio_index = rank_roidb_ratio(roidb)

    train_size = len(roidb)
    print(train_size)
    iters_per_epoch = int(train_size / args.batch_size)
    print('iters_per_epoch: ' + str(iters_per_epoch))
    #iter_epochs_list.append(iters_per_epoch)
    sampler_batch = sampler(train_size, args.batch_size)

    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, imdb_classes, training=True)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
                                             sampler=sampler_batch, num_workers=args.num_workers)
    #dataloader_list.append(dataloader)
    return dataloader,iters_per_epoch

In [7]:
class sampler(Sampler):
    def __init__(self, train_size, batch_size):
        self.num_data = train_size
        self.num_per_batch = int(train_size / batch_size)
        self.batch_size = batch_size
        self.range = torch.arange(0, batch_size).view(1, batch_size).long()
        self.leftover_flag = False
        if train_size % batch_size:
            self.leftover = torch.arange(self.num_per_batch * batch_size, train_size).long()
            self.leftover_flag = True

    def __iter__(self):
        rand_num = torch.randperm(self.num_per_batch).view(-1, 1) * self.batch_size
        self.rand_num = rand_num.expand(self.num_per_batch, self.batch_size) + self.range

        self.rand_num_view = self.rand_num.view(-1)

        if self.leftover_flag:
            self.rand_num_view = torch.cat((self.rand_num_view, self.leftover), 0)

        return iter(self.rand_num_view)

    def __len__(self):
        return self.num_data

In [8]:
imdb_name = 'KAIST_train'
dataloader,iters_per_epoch  = load_client_dataset(imdb_name)

before filtering, there are 43244 images...
after filtering, there are 43244 images...
43244
iters_per_epoch: 1801


In [9]:
data_iter = iter(dataloader)
data = next(data_iter)

In [10]:
len(data)

4

### 1. image

In [11]:
data[0].size()


torch.Size([24, 3, 600, 750])

### 2. img_info

In [12]:
data[1].size()


torch.Size([24, 3])

### 3. bounding box (batch, 20, xyxyc)

In [13]:
data[2].size()


torch.Size([24, 20, 5])

In [14]:
data[2][5][1]

tensor([147.6562, 254.2969, 189.8438, 353.9062,   1.0000])

In [15]:
data[2][5][2]

tensor([0., 0., 0., 0., 0.])

### 4. num boxes 每張圖片bounding box的數量

In [16]:
data[3].size()


torch.Size([24])

In [17]:
data[3][1]

tensor(7)

In [18]:
im_data = torch.FloatTensor(1)
im_info = torch.FloatTensor(1)
num_boxes = torch.LongTensor(1)
gt_boxes = torch.FloatTensor(1)

# ship to cuda

im_data = im_data.to(device)
im_info = im_info.to(device)
num_boxes = num_boxes.to(device)
gt_boxes = gt_boxes.to(device)

# make variable
im_data = Variable(im_data)
im_info = Variable(im_info)
num_boxes = Variable(num_boxes)
gt_boxes = Variable(gt_boxes)

with torch.no_grad():
    im_data.resize_(data[0].size()).copy_(data[0])
    im_info.resize_(data[1].size()).copy_(data[1])
    gt_boxes.resize_(data[2].size()).copy_(data[2])
    num_boxes.resize_(data[3].size()).copy_(data[3])



In [19]:
im_info[0][1]
im_info[0][2]

tensor(1.1719, device='cuda:0')

## load model

In [20]:

teacher_model_path = '/home/superorange5/Research/2019_deepMI3/deepMI3/faster-RCNN/models/vgg16/KAIST/fasterRD/faster_rcnn_KAIST_train_rd_30_1081.pth'
fasterRCNN_teacher = FedUtils.load_model_KD(imdb_classes, teacher_model_path, args, cfg)


Loading pretrained weights from data/pretrained_model/vgg16_caffe.pth


In [21]:
fasterRCNN_student = FedUtils.initial_network_KD(imdb_classes, args)

Loading pretrained weights from data/pretrained_model/vgg16_caffe.pth


In [22]:
fasterRCNN_student.eval()
fasterRCNN_student.cuda()

DataParallel(
  (module): vgg16(
    (RCNN_rpn): _RPN(
      (RPN_Conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (RPN_cls_score): Conv2d(512, 24, kernel_size=(1, 1), stride=(1, 1))
      (RPN_bbox_pred): Conv2d(512, 48, kernel_size=(1, 1), stride=(1, 1))
      (RPN_proposal): _ProposalLayer()
      (RPN_anchor_target): _AnchorTargetLayer()
    )
    (RCNN_proposal_target): _ProposalTargetLayer()
    (RCNN_roi_pool): ROIPool(output_size=(7, 7), spatial_scale=0.0625)
    (RCNN_roi_align): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0)
    (RCNN_base): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6)

In [23]:
im_data.size()
x=im_data

In [24]:
for layer in fasterRCNN_student.module.RCNN_base:
    print(layer)
    x=layer(x)
    print(x.size())

Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([24, 64, 600, 750])
ReLU(inplace)
torch.Size([24, 64, 600, 750])
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([24, 64, 600, 750])
ReLU(inplace)
torch.Size([24, 64, 600, 750])
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
torch.Size([24, 64, 300, 375])
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([24, 128, 300, 375])
ReLU(inplace)
torch.Size([24, 128, 300, 375])
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([24, 128, 300, 375])
ReLU(inplace)
torch.Size([24, 128, 300, 375])
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
torch.Size([24, 128, 150, 187])
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([24, 256, 150, 187])
ReLU(inplace)
torch.Size([24, 256, 150, 187])
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [25]:
base_feat = x
base_feat.size()

torch.Size([24, 512, 37, 46])

In [26]:
base_feat = base_feat.to('cpu')

# RCNN RPN

In [27]:
from model.rpn.rpn import _RPN
dout_base_model=512


In [28]:
RPN_Conv = nn.Conv2d(dout_base_model, 512, 3, 1, 1, bias=True)
rpnconv = RPN_Conv(base_feat)
rpn_conv1 = F.relu( rpnconv  , inplace=True)


In [29]:
rpn_conv1.size()

torch.Size([24, 512, 37, 46])

## RPN RS (class)

In [30]:
def reshape(x, d):
    input_shape = x.size()
    x = x.view(
        input_shape[0],
        int(d),
        int(float(input_shape[1] * input_shape[2]) / float(d)),
        input_shape[3]
    )
    return x

In [31]:
anchor_scales = [4,8,16,32]
anchor_ratios = [0.5,1,2]
feat_stride = 16
nc_score_out = len(anchor_scales) * len(anchor_ratios) * 2  #24

nc_bbox_out = len(anchor_scales) * len(anchor_ratios) *4

In [32]:
# get rpn classification score


RPN_cls_score = nn.Conv2d(512, nc_score_out, 1, 1, 0)  

rpn_cls_score = RPN_cls_score(rpn_conv1)  ##for rpn_loss_cls   CE(rpn_cls_score, rpn_label)
print(rpn_cls_score.size())
rpn_cls_score_reshape = reshape(rpn_cls_score, 2)
rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
rpn_cls_prob = reshape(rpn_cls_prob_reshape, nc_score_out)

print(rpn_cls_prob.size())

torch.Size([24, 24, 37, 46])
torch.Size([24, 24, 37, 46])


In [35]:
rpn_cls_score[0]

tensor([[[ 1.2640e-02,  6.3850e-01,  3.2820e-01,  ...,  1.0602e-01,
           8.0470e-02, -2.5607e-02],
         [-3.7883e-01, -5.1347e-01, -3.2023e-01,  ..., -1.6869e-01,
          -7.6109e-02,  6.2011e-02],
         [-8.6917e-02, -3.3601e-03,  5.0398e-03,  ...,  3.1201e-01,
           8.3233e-02,  8.1401e-02],
         ...,
         [ 1.5612e-01,  2.4260e-03, -8.1096e-03,  ...,  3.0656e-01,
           2.8281e-01,  1.8273e-01],
         [ 1.0896e-01, -1.0094e-01, -1.4334e-01,  ...,  2.3013e-02,
           4.2606e-02,  1.0938e-01],
         [ 8.3189e-03, -1.5102e-01, -1.3473e-01,  ..., -1.3687e-04,
          -3.6680e-02, -5.2678e-03]],

        [[-1.5406e-01, -1.4064e-01,  4.9144e-02,  ..., -8.5790e-02,
          -1.0858e-01, -1.8033e-02],
         [-2.5004e-02, -3.4821e-03, -1.6671e-01,  ...,  4.7991e-02,
          -1.1528e-01, -1.5717e-01],
         [-6.9628e-02,  2.2042e-01,  2.2273e-03,  ...,  3.0319e-01,
           5.2642e-01, -7.7081e-02],
         ...,
         [ 4.5214e-02,  2

In [33]:
rpn_cls_score[0]

tensor([[[ 1.2640e-02,  6.3850e-01,  3.2820e-01,  ...,  1.0602e-01,
           8.0470e-02, -2.5607e-02],
         [-3.7883e-01, -5.1347e-01, -3.2023e-01,  ..., -1.6869e-01,
          -7.6109e-02,  6.2011e-02],
         [-8.6917e-02, -3.3601e-03,  5.0398e-03,  ...,  3.1201e-01,
           8.3233e-02,  8.1401e-02],
         ...,
         [ 1.5612e-01,  2.4260e-03, -8.1096e-03,  ...,  3.0656e-01,
           2.8281e-01,  1.8273e-01],
         [ 1.0896e-01, -1.0094e-01, -1.4334e-01,  ...,  2.3013e-02,
           4.2606e-02,  1.0938e-01],
         [ 8.3189e-03, -1.5102e-01, -1.3473e-01,  ..., -1.3687e-04,
          -3.6680e-02, -5.2678e-03]],

        [[-1.5406e-01, -1.4064e-01,  4.9144e-02,  ..., -8.5790e-02,
          -1.0858e-01, -1.8033e-02],
         [-2.5004e-02, -3.4821e-03, -1.6671e-01,  ...,  4.7991e-02,
          -1.1528e-01, -1.5717e-01],
         [-6.9628e-02,  2.2042e-01,  2.2273e-03,  ...,  3.0319e-01,
           5.2642e-01, -7.7081e-02],
         ...,
         [ 4.5214e-02,  2

## RPN PS (reg)

In [33]:
# get rpn offsets to the anchor boxes
RPN_bbox_pred = nn.Conv2d(512, nc_bbox_out, 1, 1, 0)
rpn_bbox_pred = RPN_bbox_pred(rpn_conv1)

rpn_bbox_pred.size()

torch.Size([24, 48, 37, 46])

## Anchor Target Layer

In [34]:
from model.rpn.anchor_target_layer import _AnchorTargetLayer

RPN_anchor_target = _AnchorTargetLayer(feat_stride, anchor_scales, anchor_ratios)

In [35]:
rpn_cls_score = rpn_cls_score.to(device)
rpn_cls_score.size()

torch.Size([24, 24, 37, 46])

In [37]:
rpn_cls_score

tensor([[[[ 8.1262e-01, -6.5381e-02, -4.3948e-01,  ..., -2.4277e-01,
           -2.2042e-01,  5.6576e-02],
          [-1.5262e+00, -2.0259e+00, -2.8260e+00,  ..., -1.9749e-01,
           -7.7089e-02, -4.2356e-02],
          [-5.6469e-01, -1.3351e+00, -1.7033e+00,  ..., -1.5634e-01,
           -1.5186e-01, -1.0202e-01],
          ...,
          [ 1.5173e-02, -2.1028e-01, -1.2641e-01,  ..., -2.6602e-01,
           -1.1109e-01, -2.0060e-01],
          [-1.1941e-01, -8.0217e-03, -1.9782e-01,  ..., -2.2825e-01,
           -2.1167e-01, -1.9277e-01],
          [-7.9313e-02, -8.1228e-03, -6.7514e-02,  ..., -7.3829e-02,
           -5.5856e-02, -1.6762e-01]],

         [[-1.3862e-01,  5.7697e-01,  8.1768e-01,  ...,  7.5118e-02,
            3.4746e-01,  2.9208e-01],
          [ 1.7131e+00,  1.7722e+00,  1.4626e+00,  ...,  1.3496e-01,
            2.0457e-01,  2.0534e-01],
          [ 8.7139e-01,  1.2627e+00,  1.2410e+00,  ..., -1.1183e-01,
            1.0001e-01,  1.7587e-01],
          ...,
     

In [36]:
gt_boxes.size()

torch.Size([24, 20, 5])

In [38]:
rpn_data = RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))

#1. label [24, 1, 444, 46]
#2. target [24, 48, 37, 46]
#3. inside_weights [24, 48, 37, 46]
#4. outside_weights [24, 48, 37, 46]

In [39]:
rpn_label = rpn_data[0].view(args.batch_size, -1)
rpn_label.size()

torch.Size([24, 20424])

In [48]:
rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
rpn_keep.size()


torch.Size([6144])

In [49]:
torch.topk(rpn_keep, 5)

torch.return_types.topk(
values=tensor([487901, 487865, 487849, 487810, 487803], device='cuda:0'),
indices=tensor([6143, 6142, 6141, 6140, 6139], device='cuda:0'))

In [50]:
rpn_cls_score.size()

torch.Size([24, 24, 37, 46])

In [51]:
rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
rpn_cls_score.size()

torch.Size([6144, 2])

In [40]:
rpn_cls_score.type()

'torch.cuda.FloatTensor'

In [54]:
rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep)
rpn_label = Variable(rpn_label.long())
rpn_label.size()


torch.Size([6144])

In [53]:
rpn_label


tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')

### why只丟cls_score就可以match? 

In [89]:
rpn_label = rpn_data[0].view(args.batch_size, -1)
print(rpn_data[0].size())
rpn_label.size()

torch.Size([24, 1, 444, 46])


torch.Size([24, 20424])

In [97]:
len(rpn_data[1:])

4

In [90]:
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]

# compute bbox regression loss
rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
rpn_bbox_targets = Variable(rpn_bbox_targets)

ValueError: too many values to unpack (expected 3)

In [80]:
fasterRCNN_student.module.RCNN_base[0].parameters()

<generator object Module.parameters at 0x7f6e283c45c8>

In [74]:
fasterRCNN_student(im_data, im_info, gt_boxes, num_boxes)

(tensor([[[  0.0000, 386.7188, 248.4375, 414.8438, 301.1719],
          [  0.0000, 312.5864, 232.5051, 433.2371, 266.5047],
          [  0.0000, 377.0632, 257.5294, 435.4623, 340.6023],
          ...,
          [  0.0000, 380.9831, 233.0662, 491.7379, 272.7239],
          [  0.0000, 380.0832, 225.1171, 501.2969, 322.5659],
          [  0.0000, 324.0278, 239.1087, 403.0208, 309.9331]],
 
         [[  1.0000, 460.5469, 253.1250, 482.8125, 302.3438],
          [  1.0000, 544.9219, 255.4688, 641.0156, 516.7969],
          [  1.0000, 558.6157, 286.2213, 637.8317, 478.1352],
          ...,
          [  1.0000, 639.2265, 340.1802, 740.8765, 391.4321],
          [  1.0000, 535.4707,   0.0000, 749.0000, 412.8619],
          [  1.0000, 466.9518, 272.1271, 546.0280, 334.1856]],
 
         [[  2.0000,  84.3750, 236.7188, 117.1875, 331.6406],
          [  2.0000,  92.5025, 220.2211, 178.3179, 274.9345],
          [  2.0000, 100.7142, 260.3217, 185.0015, 301.9442],
          ...,
          [  2.0000

## get feature map (base)

In [75]:
fasterRCNN_student.RCNN_base

AttributeError: 'DataParallel' object has no attribute 'RCNN_base'

In [None]:
fasterRCNN_s.train()
#fasterRCNN_t.train()
loss_temp = 0
loss_sup_temp = 0
start = time.time()

if epoch % (args.lr_decay_step + 1) == 0:
    adjust_learning_rate(optimizer, args.lr_decay_gamma)
    lr *= args.lr_decay_gamma

data_iter = iter(dataloader)
for step in range(iters_per_epoch):
    data = next(data_iter)

    with torch.no_grad():
        im_data.resize_(data[0].size()).copy_(data[0])
        im_info.resize_(data[1].size()).copy_(data[1])
        gt_boxes.resize_(data[2].size()).copy_(data[2])
        num_boxes.resize_(data[3].size()).copy_(data[3])

    fasterRCNN_s.zero_grad()


    # KD params
    u=KD_params['u']
    v=KD_params['v']
    lambda_var = KD_params['lambda_var']
    gamma = KD_params['gamma']


    # student net
    rois, rcn_Ps, rcn_Rs, rpn_Lhard, rpn_LsL1, \
    rcn_Lhard, rcn_LsL1, rois_label, rpn_Ps, rpn_Rs, fmap_s, rcn_gt, rpn_gt, mask_batch = fasterRCNN_s(im_data, im_info, gt_boxes, num_boxes)