In [1]:
import numpy as np
import pandas as pd

In [2]:
% cd /mnt/fs1/mayu-ot/Experiments/loc_iparaphrasing

/mnt/fs1/mayu-ot/Experiments/loc_iparaphrasing


In [3]:
def get_roi_meta(split):
    df = pd.read_csv('data/gt-roi/phrase_pair_%s.csv' % split)
    roi_meta1 = pd.DataFrame({'image': df['image'], 'roi': df['roi1']})
    roi_meta2 = pd.DataFrame({'image': df['image'], 'roi': df['roi2']})
    roi_meta = pd.concat([roi_meta1, roi_meta2])
    return roi_meta

In [4]:
from ipywidgets import IntProgress
from IPython.display import display

In [5]:
import pandas as pd
import os
import sys
sys.path.append('func/nets/')
from faster_rcnn import FasterRCNNExtractor
from chainercv.datasets import voc_bbox_label_names
from chainer import cuda
from chainercv import utils
import numpy as np
import chainer

def load_roi(roi):
    roi = roi[1:-1]
    roi = [int(x) for x in roi.split(', ')]
    return roi

def extract_roi_feat(split, img_root, device=0):     
    
    model = FasterRCNNExtractor(n_fg_class=len(voc_bbox_label_names), pretrained_model='voc07')
    
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()  # Make the GPU current
        model.to_gpu()
    
    roi_meta = get_roi_meta(split)
    roi_meta_unique = roi_meta.drop_duplicates()
    
    data= {}
    images = roi_meta_unique.image.unique()
    
    for im in images:
        bbox = roi_meta_unique.query('image == %i' % im)[['roi']].values
        data[im] = bbox


    N = len(roi_meta_unique)
    feat = np.zeros((N, 4096), dtype=np.float32)
    print('extract features of %i regions'%N)

    j = 0
    table_im= []
    table_roi = []
    
    iprog = IntProgress(min=0, max=N)
    display(iprog)
    
    for im in images:
        x = utils.read_image(os.path.join(img_root, '%i.jpg'%im), color=True)
        bbox = data[im]
        table_im += [im] * len(bbox)
        table_roi.append(bbox) 
        
        bbox = np.asarray([load_roi(x) for x in bbox.ravel()]).astype(np.float32)
        roi_indices = np.zeros((len(bbox),), dtype=np.int32)

        # preprocess
        p_x = model.prepare(x)
        scale = p_x.shape[-1] * 1. / x.shape[-1]
        bbox = bbox * scale

        # to gpu
        p_x = cuda.to_gpu(p_x)
        bbox = cuda.to_gpu(bbox)
        roi_indices = cuda.to_gpu(roi_indices)
        with chainer.using_config('train', False):
            y = model.extract(p_x[None, :], bbox, roi_indices)
        y.to_cpu()
        y_arr = y.data

        feat[j:j+len(y_arr)] = y_arr
        j += len(y_arr)

        iprog.value = j
#         print('%12s: %i / %i' % (im, j, N))

    np.save('data/region_feat/gt-roi-frcnn/%s'% split, feat)

    df = pd.DataFrame({'image': table_im, 'roi': np.vstack(table_roi).ravel()})
    df.to_csv('data/region_feat/gt-roi-frcnn/meta_%s.csv' % split)

  from ._conv import register_converters as _register_converters


In [6]:
extract_roi_feat('val', '../../Dataset/Flickr30kEntities/flickr30k-images/', device=3)
extract_roi_feat('test', '../../Dataset/Flickr30kEntities/flickr30k-images/', device=3)
extract_roi_feat('train', '../../Dataset/Flickr30kEntities/flickr30k-images/', device=3)

extract features of 6047 regions


IntProgress(value=0, max=6047)

extract features of 5942 regions


IntProgress(value=0, max=5942)

extract features of 178842 regions


IntProgress(value=0, max=178842)