In [None]:
%matplotlib notebook
from PIL import Image
import os
import json
from matplotlib import pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from multiprocessing import Pool
import shutil
import copy


In [None]:
NEW_WIDTH = 640


with open('/home/ubuntu/data/ceratium-2000-labels.json', 'r') as label_file:
    labels = json.load(label_file)
    
img_dir = '/home/ubuntu/data/ceratium-2000/'

out_img_dir = '/home/ubuntu/data/ceratium-2000-small/'

In [None]:
if os.path.exists(out_img_dir):
    shutil.rmtree(out_img_dir)
os.makedirs(out_img_dir)

In [None]:
annotations_for_imageid = dict()
annotations_by_id = dict()

for ann_ in labels['annotations']:
    annotations_by_id[ann_['id']] = ann_
    try:
        annotations_for_imageid[ann_['image_id']] += [ann_['id']]
        print(ann_['image_id'], ': ', annotations_for_imageid[ann_['image_id']])
    except KeyError:
        annotations_for_imageid[ann_['image_id']] = [ann_['id']]


In [None]:
def resize(img_, plot=False):
    
    # load image
    img_path = os.path.join(img_dir, img_['file_name'])
    pil_image = Image.open(img_path)
    #pil_image = test_image
    
    # resize
    resize_factor = (NEW_WIDTH / pil_image.size[0])
    new_size = np.int32(np.array(pil_image.size) * resize_factor)
    pil_image = pil_image.resize(new_size,Image.ANTIALIAS)
    
    # update image properties
    img_['width'] = int(new_size[0])
    img_['height'] = int(new_size[1])
    
    # update all annotations
    
    new_anns = []
    
    for ann_id in annotations_for_imageid[img_['id']]:
        
        ann_ = annotations_by_id[ann_id]
        
        new_bbox = [int(x) for x in list(np.int64(np.array(ann_['bbox']) * resize_factor))]
        new_area = new_bbox[2] * new_bbox[3]
        ann_['bbox'] = new_bbox
        ann_['area'] = int(new_area)
        
        new_anns.append(ann_)

    if plot:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.imshow(pil_image)
        for ann_id in annotations_for_imageid[img_['id']]:
            ann_ = annotations_by_id[ann_id]
            ax.plot([ann_['bbox'][0], ann_['bbox'][0] + ann_['bbox'][2], ann_['bbox'][0] + ann_['bbox'][2], ann_['bbox'][0], ann_['bbox'][0]],
                    [ann_['bbox'][1], ann_['bbox'][1], ann_['bbox'][1] + ann_['bbox'][3], ann_['bbox'][1] + ann_['bbox'][3], ann_['bbox'][1]],
                    'r-')
    
    out_img_path = os.path.join(out_img_dir, img_['file_name'])
    pil_image.save(out_img_path)
    
    return (img_, new_anns)


In [None]:

pool = Pool(4)

new_images, new_annotations = [], []


for results in tqdm(pool.imap(resize, labels['images']),
                         total=len(labels['images'])):
    img_, anns_ = results
    new_images.append(img_)
    new_annotations += anns_
        

print(len(new_images))
print(len(new_annotations))

labels['images'] = new_images
labels['annotations'] = new_annotations
        
label_out = '/home/ubuntu/data/ceratium-2000-small-labels.json'
with open(label_out, 'w') as f:
    json.dump(labels, f)        

In [None]:
labels['images'][0]

In [None]:
labels['annotations'][0]

In [None]:
type(labels['annotations'][0]['bbox'][0])