In [1]:
import os
import os.path as op
import json
import cv2
import base64
import numpy as np
from tqdm import tqdm

from maskrcnn_benchmark.structures.tsv_file_ops import tsv_reader, tsv_writer
from maskrcnn_benchmark.structures.tsv_file_ops import generate_linelist_file
from maskrcnn_benchmark.structures.tsv_file_ops import generate_hw_file
from maskrcnn_benchmark.structures.tsv_file import TSVFile
from maskrcnn_benchmark.data.datasets.utils.image_ops import img_from_base64

In [6]:
#orig_root = '../../../datasets/textcaps_orig'
orig_root = '../../../../datasets/textcaps_orig'

# orig_img_train_val_dir = op.join(orig_root, 'train_val_images')  # Contains train & val images
# orig_img_test_dir = op.join(orig_root, 'test_images')

orig_cap_filenames = {split: f'TextCaps_0.1_{split}.json' for split in ['train', 'val', 'test']}

# exp_root = '../../../datasets/textcaps_nn'
exp_root = '../../../../datasets/textcaps_nn'

# cap_exp = '_caption.json'

In [7]:
caps = {'train': [], 'val': [], 'test': []}

for split in caps.keys():
    cap_filename = op.join(orig_root, orig_cap_filenames[split])
    with open(cap_filename) as fp:
        captions_json = json.load(fp)
    caps[split] = captions_json['data']

# caps['train'][0]

In [8]:
img_list = {split_name: set([item['image_id'] + '.jpg' for item in caps[split_name]]) for split_name in caps.keys()}
for split in img_list:
    print(len(img_list[split]))

21953
3166
3289


In [13]:
for split in caps.keys():
    rows = []
    rows_label = []
    rows_hw = []

    # i = 2
    for img_p in tqdm(img_list[split]):
        img_key = img_p.split('.')[0]
        img_path = op.join(orig_root, f"{'test' if split=='test' else 'train_val_images/train'}_images", img_p)
        img = cv2.imread(img_path)
        img_encoded_str = base64.b64encode(cv2.imencode('.jpg', img)[1])
        row = [img_key, img_encoded_str]
        # print(row[1][800:900] , flush=True)
        rows.append(row)

        height = img.shape[0]
        width = img.shape[1]
        row_hw = [img_key, json.dumps([{'height': height, 'width': width}])]
        rows_hw.append(row_hw)
        # i -= 1
        # if i == 0:
        #     break
        
    exp_encoded_img_file = op.join(exp_root, f'{split}.img.tsv')
    exp_hw_file = op.join(exp_root, f'{split}.hw.tsv')
    print(exp_encoded_img_file, flush=True)
    tsv_writer(rows, exp_encoded_img_file)
    # tsv_writer(rows_label, label_file)
    tsv_writer(rows_hw, exp_hw_file)

100%|██████████| 21953/21953 [06:04<00:00, 60.22it/s]

../../../datasets/textcaps_nn/train.img.tsv



100%|██████████| 3166/3166 [00:53<00:00, 59.17it/s]

../../../datasets/textcaps_nn/val.img.tsv



100%|██████████| 3289/3289 [00:55<00:00, 58.80it/s]

../../../datasets/textcaps_nn/test.img.tsv





In [14]:
# caps['val'][0]

In [9]:
def generate_cap_json(split: str):
    captions = []
    cap_idx = 0
    for sample in caps[split]:
        image_id = sample['image_id']
        caption_str = sample['caption_str']
        captions.append(
            {
                'image_id': image_id,
                'id': cap_idx,
                'caption': caption_str,
            }
        )
        cap_idx +=1
    captions = sorted(captions, key=lambda k: k['image_id'])
#     print(captions[:10])
    return captions

In [16]:
for split in ['train', 'val']:
    cap_filename = op.join(exp_root, f'{split}_caption.json')
    with open(cap_filename, 'w') as fp:
        json.dump(generate_cap_json(split), fp)

In [15]:
for split in ['val']:  #, 'test']:
    cap_filename = op.join(exp_root, f'{split}_caption_coco_format.json')
    json_annotations = generate_cap_json(split)
    json_images = [{'id': ann['image_id'], 'image_name': ann['image_id']} for ann in json_annotations]
    json_all = {
        'annotations': json_annotations,
        'images': json_images,
        'type': 'captions',
        'info': 'dummy',
        'licenses': 'dummy',
    }
    
    with open(cap_filename, 'w') as fp:
        json.dump(json_all, fp)