In [1]:
import os
import os.path as op
import json
import cv2
import base64
import numpy as np
from tqdm import tqdm

from maskrcnn_benchmark.structures.tsv_file_ops import tsv_reader, tsv_writer
from maskrcnn_benchmark.structures.tsv_file_ops import generate_linelist_file
from maskrcnn_benchmark.structures.tsv_file_ops import generate_hw_file
from maskrcnn_benchmark.structures.tsv_file import TSVFile
from maskrcnn_benchmark.data.datasets.utils.image_ops import img_from_base64

In [4]:
orig_root = '/media/stopmosk/data/huawei/datasets_orig/textcaps_orig'

orig_img_train_val_dir = op.join(orig_root, 'train_images')  # Contains train & val images
orig_img_test_dir = op.join(orig_root, 'test_images')

orig_cap_filenames = {split: f'TextCaps_0.1_{split}.json' for split in ['train', 'val', 'test']}

exp_root = '/media/stopmosk/data/huawei/datasets_proc/textcaps_nn'

# cap_exp = '_caption.json'

In [5]:
caps = {'train': [], 'val': [], 'test': []}

for split in caps.keys():
    cap_filename = op.join(orig_root, orig_cap_filenames[split])
    with open(cap_filename) as fp:
        captions_json = json.load(fp)
    caps[split] = captions_json['data']

# caps['train'][0]

In [18]:
img_list = {split_name: set([item['image_id'] + '.jpg' for item in caps[split_name]]) for split_name in caps.keys()}
for split in img_list:
    print(len(img_list[split]))

21953
3166
3289


In [14]:
for split in caps.keys():
    rows = []
    rows_label = []
    rows_hw = []

    i = 2
    for img_p in tqdm(img_list[split]):
        img_key = img_p.split('.')[0]
        img_path = op.join(orig_root, f"{'test' if split=='test' else 'train'}_images", img_p)
        img = cv2.imread(img_path)
        img_encoded_str = base64.b64encode(cv2.imencode('.jpg', img)[1])
        row = [img_key, img_encoded_str]
        # print(row[1][800:900] , flush=True)
        rows.append(row)

        height = img.shape[0]
        width = img.shape[1]
        row_hw = [img_key, json.dumps([{'height': height, 'width': width}])]
        rows_hw.append(row_hw)
        i -= 1
        if i == 0:
            break
        
    exp_encoded_img_file = op.join(exp_root, f'{split}.img.tsv')
    exp_hw_file = op.join(exp_root, f'{split}.hw.tsv')
    print(exp_encoded_img_file, flush=True)
    tsv_writer(rows, exp_encoded_img_file)
    # tsv_writer(rows_label, label_file)
    tsv_writer(rows_hw, exp_hw_file)

  0%|          | 1/21953 [00:00<11:20, 32.25it/s]

/media/stopmosk/data/huawei/datasets/textcaps_nn/train.img.tsv



  0%|          | 1/3166 [00:00<01:47, 29.39it/s]

/media/stopmosk/data/huawei/datasets/textcaps_nn/val.img.tsv



  0%|          | 1/3289 [00:00<02:40, 20.44it/s]

/media/stopmosk/data/huawei/datasets/textcaps_nn/test.img.tsv





In [24]:
# caps['val'][0]

In [30]:
def generate_cap_json(split: str):
    captions = []
    cap_idx = 0
    for sample in caps[split]:
        image_id = sample['image_id']
        caption_str = sample['caption_str']
        captions.append(
            {
                'image_id': image_id,
                'id': cap_idx,
                'caption': caption_str,
            }
        )
        cap_idx +=1
    captions = sorted(captions, key=lambda k: k['image_id'])
    print(captions[:10])
    return captions

In [33]:
# cap_exp = '_caption.json'

for split in ['train', 'val']:
    cap_filename = op.join(exp_root, f'{split}_caption.json')
    with open(cap_filename, 'w') as fp:
        json.dump(generate_cap_json(split), fp)

[{'image_id': '0000599864fd15b3', 'id': 10210, 'caption': 'Turqouise bus with the numbers "29267222" right behind a group of people taking a picture.'}, {'image_id': '0000599864fd15b3', 'id': 68612, 'caption': 'Five people pose for a photo as the number 5 bus passes in the background.'}, {'image_id': '0000599864fd15b3', 'id': 68743, 'caption': 'Five people pose in front of a bus that advertises for a Hong Kong company with the phone number 2926 7222.'}, {'image_id': '0000599864fd15b3', 'id': 104407, 'caption': 'A group of people are posing for a picture in front of a bus with the number 5 on it.'}, {'image_id': '0000599864fd15b3', 'id': 109362, 'caption': 'Double decker bus number 5 is a bright teal color.'}, {'image_id': '0000e8b36676338b', 'id': 176, 'caption': 'Oddly shaped bus that is apparently for private use only.'}, {'image_id': '0000e8b36676338b', 'id': 70087, 'caption': 'black and white bus with felix private above windshield and license plate of dbu 889'}, {'image_id': '0000