In [1]:
import os
from skimage.io import imread
import numpy as np
import torch
import pickle
def preproc_frcnn(img):
    """ follow the processing steps of fasterRCNN
    """
    img = np.asarray(img[:,:,::-1], dtype=np.float32)
    PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
    img -= PIXEL_MEANS
    
    return torch.tensor(img)


In [2]:
def generate_pkl_by_scene(scene):
    BASE_DIR = '/home/superorange5/MI3_dataset/MI3_dataset_bydataset/'+scene

    file_names = [f for f in os.listdir(BASE_DIR) if '.jpg' in f]
    imgs_raw = [imread(os.path.join(BASE_DIR, fn)) for fn in file_names]
    imgs = [preproc_frcnn(img).unsqueeze(0).permute(0, 3, 1, 2) for img in imgs_raw]
    with open('data/pickle/MI3_test_'+scene+'.pkl','wb') as f:
        pickle.dump(imgs, f)

In [3]:
scene_list = ['Pathway','Doorway','Room','Bus'] #Staircase
for scene in scene_list:
    generate_pkl_by_scene(scene)

In [5]:
scene = 'Pathway'
with open('data/pickle/MI3_test_'+scene+'.pkl', 'rb') as handle:
    test_images = pickle.load(handle)

In [9]:
test_images

[tensor([[[[ -88.9801,  -89.9801,  -91.9801,  ...,  -75.9801,  -71.9801,
             -72.9801],
           [ -87.9801,  -90.9801,  -92.9801,  ...,  -72.9801,  -73.9801,
             -75.9801],
           [ -91.9801,  -92.9801,  -91.9801,  ...,  -69.9801,  -74.9801,
             -77.9801],
           ...,
           [ -88.9801,  -89.9801,  -90.9801,  ...,  -69.9801,  -67.9801,
             -64.9801],
           [ -87.9801,  -88.9801,  -90.9801,  ...,  -68.9801,  -69.9801,
             -66.9801],
           [ -88.9801,  -89.9801,  -91.9801,  ...,  -70.9801,  -65.9801,
             -52.9801]],
 
          [[-102.9465, -102.9465, -102.9465,  ...,  -81.9465,  -78.9465,
             -79.9465],
           [-100.9465, -101.9465, -103.9465,  ...,  -77.9465,  -79.9465,
             -81.9465],
           [-102.9465, -103.9465, -102.9465,  ...,  -74.9465,  -79.9465,
             -82.9465],
           ...,
           [ -99.9465, -100.9465,  -99.9465,  ...,  -82.9465,  -80.9465,
             -80.94

In [11]:
def rcnn_intermediate(model, x):
    """ Get intermediate results of specifc model.
        Note: This is NOT a generalized function for all torch models,
              due to the different arch. of models.
    """
    from torch.nn import MaxPool2d, AdaptiveAvgPool2d
    
    # forward the features layers of VGG.
    for l in list(model.RCNN_base.modules())[0]:
        x = l(x)
        
    x = MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)(x)   
    
    x = AdaptiveAvgPool2d(output_size=(7, 7))(x)
    
    # flatten for FC layers.
    x = x.view(x.shape[0], -1)
    
#     # go through FC layers.
#     for l in list(model.RCNN_top.modules())[0]:
#         x = l(x)

    return x


In [18]:
import _init_paths

from model.faster_rcnn.vgg16 import vgg16

model_campus  = torch.load('models/vgg16/KAIST/1st_round_cr/faster_rcnn_KAIST_downtown_1_3_566.pth')

KAIST_classes = np.asarray(['__background__','person','people','cyclist'])

fasterRCNN = vgg16(KAIST_classes, pretrained=False, class_agnostic='class_agnostic')
fasterRCNN.create_architecture()
fasterRCNN.eval()
fasterRCNN.cuda()

vgg16(
  (RCNN_rpn): _RPN(
    (RPN_Conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (RPN_cls_score): Conv2d(512, 24, kernel_size=(1, 1), stride=(1, 1))
    (RPN_bbox_pred): Conv2d(512, 48, kernel_size=(1, 1), stride=(1, 1))
    (RPN_proposal): _ProposalLayer()
    (RPN_anchor_target): _AnchorTargetLayer()
  )
  (RCNN_proposal_target): _ProposalTargetLayer()
  (RCNN_roi_pool): ROIPool(output_size=(7, 7), spatial_scale=0.0625)
  (RCNN_roi_align): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0)
  (RCNN_base): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), 

In [20]:
outputs = []


batch_sz = 24
n_batch = len(test_images)//batch_sz
res_batch = len(test_images)%batch_sz    

with torch.no_grad():
    for i in range(n_batch):
        in_batch = torch.cat(test_images[i*batch_sz:(i+1)*batch_sz]).to('cuda')
        x = rcnn_intermediate(fasterRCNN, in_batch)
        in_batch.to('cpu')
        outputs.append(x)

    # last incomplete batch.
    in_batch = torch.cat(test_images[-res_batch:]).to('cuda')
    x = rcnn_intermediate(fasterRCNN, in_batch)
    in_batch.to('cpu')
    outputs.append(x)

outputs = torch.cat(outputs)
X = outputs.to('cpu').numpy()

In [21]:
X

array([[4.7695785 , 3.1710246 , 2.7350667 , ..., 0.        , 0.        ,
        0.05313465],
       [4.9088845 , 3.2225666 , 3.1833305 , ..., 0.        , 0.        ,
        0.04309072],
       [4.968571  , 3.1741734 , 3.1645584 , ..., 0.        , 0.        ,
        0.04143884],
       ...,
       [5.144362  , 4.92007   , 2.9027584 , ..., 0.        , 0.        ,
        0.13285919],
       [4.648894  , 3.2865324 , 3.000243  , ..., 0.        , 0.        ,
        0.05819702],
       [4.5997963 , 3.6297185 , 3.0870798 , ..., 0.        , 0.        ,
        0.23383969]], dtype=float32)