# Load pre-trained features from Faster RCNN

In [1]:
import base64
import numpy as np
import csv
import sys
import zlib
import time
import glob
import mmap
import h5py
import torch

In [2]:
csv.field_size_limit(sys.maxsize)

FIELDNAMES = ["image_id", "image_w", "image_h", "num_boxes", "boxes", "features"]
trainval_path = "/ceph/kien/features/adaptive/trainval/*"
test_path = "/ceph/kien/features/adaptive/test2015/*"

In [3]:
# Test the dataset's structure
infile = "/ceph/kien/features/adaptive/trainval/karpathy_val_resnet101_faster_rcnn_genome.tsv"
in_data = {}

with open(infile) as tsv_in_file:
    reader = csv.DictReader(tsv_in_file, delimiter="\t", fieldnames = FIELDNAMES)
    for item in reader:
        item["image_id"] = int(item["image_id"])
        item["image_h"] = int(item["image_h"])
        item["image_w"] = int(item["image_w"])   
        item["num_boxes"] = int(item["num_boxes"])
        for field in ["boxes", "features"]:
            item[field] = np.frombuffer(base64.decodebytes(bytes(item[field], "utf-8")), 
                  dtype=np.float32).reshape((item["num_boxes"],-1))

        in_data[item["image_id"]] = item
        break
print(in_data)

{533452: OrderedDict([('image_id', 533452), ('image_w', 640), ('image_h', 478), ('num_boxes', 30), ('boxes', array([[  16.92600632,  207.32254028,  638.92669678,  477.20336914],
       [ 165.13946533,    0.        ,  638.92669678,  368.23352051],
       [ 372.32119751,    0.        ,  638.92669678,  410.76623535],
       [ 249.10119629,  321.56967163,  451.45025635,  468.78414917],
       [  53.49969101,   12.04119587,  370.06698608,  477.20336914],
       [ 571.78887939,    0.        ,  629.96490479,  118.63146973],
       [ 334.27658081,    0.        ,  476.74145508,   67.24216461],
       [ 375.36029053,    0.        ,  405.55444336,   28.27275658],
       [  41.51044846,   35.07247925,  237.24453735,  250.68437195],
       [ 311.44854736,   45.9132843 ,  499.88180542,  241.3283844 ],
       [   0.        ,    0.        ,  289.45135498,  126.67928314],
       [   0.        ,  173.48060608,  284.55725098,  376.94607544],
       [ 223.39830017,  186.83370972,  441.93817139,  421.12670

In [4]:
def statistic(data_path):
    num_img = 0
    num_feat = 0
    files = glob.glob(data_path)
    for file in files:
        with open(file) as tsv_file:
            reader = csv.DictReader(tsv_file, delimiter="\t", fieldnames = FIELDNAMES)
            for item in reader:
                num_img += 1
                num_feat += int(item["num_boxes"])
    
    return num_img, num_feat

In [5]:
# trainval_num_img, trainval_num_feat = statistic(trainval_path)
# print(trainval_num_img)
# print(trainval_num_feat)

# test_num_img, test_num_feat = statistic(test_path)
# print(test_num_img)
# print(test_num_feat)

In [6]:
trainval_num_img = 123287
trainval_num_feat = 3924253
test_num_img = 81434
test_num_feat = 2566887

In [9]:
def load_features(num_img, num_feat, data_path, data_file):
    img_idx = {}
    counter = 0
    feat_counter = 0
    
    boxes = data_file.create_dataset("boxes", (num_feat, 4), dtype=np.float32)
    features = data_file.create_dataset("features", (num_feat, 2048), dtype=np.float32)
    img_start_idx = data_file.create_dataset("img_start_idx", (num_img,), dtype=np.int64)
    img_end_idx = data_file.create_dataset("img_end_idx", (num_img,), dtype=np.int64)
    image_h = data_file.create_dataset("image_h", (num_img,), dtype=np.int64)
    image_w = data_file.create_dataset("image_w", (num_img,), dtype=np.int64)
    
    files = glob.glob(data_path)
    for file in files:
        with open(file) as tsv_file:
            reader = csv.DictReader(tsv_file, delimiter="\t", fieldnames = FIELDNAMES)
            for item in reader:
                num_boxes = int(item["num_boxes"])
                image_h[counter] = int(item["image_h"])
                image_w[counter] = int(item["image_w"])
                boxes[feat_counter:feat_counter + num_boxes] = np.frombuffer(base64.decodebytes(
                    bytes(item["boxes"], "utf-8")), dtype=np.float32).reshape((num_boxes,-1))
                features[feat_counter:feat_counter + num_boxes] = np.frombuffer(base64.decodebytes(
                    bytes(item["features"], "utf-8")), dtype=np.float32).reshape((num_boxes,-1))
                img_start_idx[counter] = feat_counter
                img_end_idx[counter] = feat_counter + num_boxes - 1
                feat_counter += num_boxes
                img_idx[int(item["image_id"])] = counter
                counter += 1
                if counter % 1000 == 0:
                    print("processing %i/%i" % (counter, num_img))
                    
    return img_idx

In [10]:
trainval_images_path = "/ceph/kien/features/trainval_images.h5"
test_images_path = "/ceph/kien/features/test_images.h5"

In [11]:
trainval_images = h5py.File(trainval_images_path, "w")
trainval_images_idx = load_features(trainval_num_img, trainval_num_feat, trainval_path, trainval_images)
torch.save(trainval_images_idx, "/ceph/kien/features/trainval_images.pt")
trainval_images.close()

processing 1000/123287
processing 2000/123287
processing 3000/123287
processing 4000/123287
processing 5000/123287
processing 6000/123287
processing 7000/123287
processing 8000/123287
processing 9000/123287
processing 10000/123287
processing 11000/123287
processing 12000/123287
processing 13000/123287
processing 14000/123287
processing 15000/123287
processing 16000/123287
processing 17000/123287
processing 18000/123287
processing 19000/123287
processing 20000/123287
processing 21000/123287
processing 22000/123287
processing 23000/123287
processing 24000/123287
processing 25000/123287
processing 26000/123287
processing 27000/123287
processing 28000/123287
processing 29000/123287
processing 30000/123287
processing 31000/123287
processing 32000/123287
processing 33000/123287
processing 34000/123287
processing 35000/123287
processing 36000/123287
processing 37000/123287
processing 38000/123287
processing 39000/123287
processing 40000/123287
processing 41000/123287
processing 42000/123287
p

In [12]:
test_images = h5py.File(test_images_path, "w")
test_images_idx = load_features(test_num_img, test_num_feat, test_path, test_images)
torch.save(test_images_idx, "/ceph/kien/features/test_images.pt")
test_images.close()

processing 1000/81434
processing 2000/81434
processing 3000/81434
processing 4000/81434
processing 5000/81434
processing 6000/81434
processing 7000/81434
processing 8000/81434
processing 9000/81434
processing 10000/81434
processing 11000/81434
processing 12000/81434
processing 13000/81434
processing 14000/81434
processing 15000/81434
processing 16000/81434
processing 17000/81434
processing 18000/81434
processing 19000/81434
processing 20000/81434
processing 21000/81434
processing 22000/81434
processing 23000/81434
processing 24000/81434
processing 25000/81434
processing 26000/81434
processing 27000/81434
processing 28000/81434
processing 29000/81434
processing 30000/81434
processing 31000/81434
processing 32000/81434
processing 33000/81434
processing 34000/81434
processing 35000/81434
processing 36000/81434
processing 37000/81434
processing 38000/81434
processing 39000/81434
processing 40000/81434
processing 41000/81434
processing 42000/81434
processing 43000/81434
processing 44000/814