In [1]:
import json
import datetime
import pickle
from glob import glob as glob
import h5py
import numpy as np
import os
from tqdm import tqdm as tqdm
import random
import shutil
from PIL import Image

In [2]:
mode = "combined"
with open('./' + mode + '_annotations_processed.pkl', 'rb') as f:
    annotations = pickle.load(f)
print('total number of images to split to train and test -----', len(annotations.keys()))

img_dict = {"train_image": [], "val_image": []}
annotation_dict = {"train_annotations": [], "val_annotations": []}

# shuffle and split the images
# 90% is training and 10% is validation
random.seed(2020)
img_name_keys = list(annotations.keys())
random.shuffle(img_name_keys)

split = int(.9 * len(img_name_keys))
train_img_name_keys = img_name_keys[0:split+1]
val_img_name_keys = img_name_keys[split:-1]

date = str(datetime.datetime.now())
instance_id = 0

output_dir = './coco_format_' + mode 
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
if not os.path.exists(output_dir + '/train'):
    os.makedirs(output_dir+ '/train')
if not os.path.exists(output_dir + '/val'):
    os.makedirs(output_dir+ '/val')

total number of images to split to train and test ----- 235755


# COCO ANNOTATIONS FOR TRAIN

In [None]:
#####################  COCO ANNOTATIONS FOR TRAIN #####################

for image_name in tqdm(train_img_name_keys):
    img_index = int(image_name.split('.')[0])
    img_path = mode + '/' + image_name
    img = Image.open(img_path)
    width, height = img.size[0], img.size[1]

    tmp_img_obj = {
        "license": 3,
        "file_name": image_name,
        "coco_url": "NA",
        "height": height,
        "width": width,
        "date_captured": date,
        "flickr_url": "NA",
        "id": img_index
    }
    img_dict["train_image"].append(tmp_img_obj)

    img_bbox = annotations[image_name]
    for i in range(0, len(img_bbox['label'])):
        x0, y0, w, h = img_bbox['left'][i], img_bbox['top'][i], img_bbox[
            'width'][i], img_bbox['height'][i]
        tmp_ann_obj = {
            "id": instance_id,
            "bbox": [x0, y0, w, h],
            "image_id": img_index,
            "segmentation": [],
            "ignore": 0,
            "area": w * h,
            "iscrowd": 0,
            "category_id": img_bbox['label'][i]
        }
        annotation_dict["train_annotations"].append(tmp_ann_obj)
        instance_id += 1
    shutil.copyfile(img_path, "./coco_format_" + mode + '/train/' +  image_name)

print('train image and annotation length', len(
    annotation_dict['train_annotations']), len(img_dict['train_image']))

final_coco_svhn_train = {
    "info": {
        "description": "COCO SVHN",
        "url": "NA",
        "version": "1.0",
        "year": 2020,
        "contributor": "Trinanjan Saha",
        "date_created": date
    },
    "licenses": {
        "url": "NA",
        "id": 1,
        "name": "NA"
    },
    "images": img_dict["train_image"],
    "annotations": annotation_dict["train_annotations"],
    "categories": [{
        "supercategory": "zero",
        "id": 0,
        "name": "zero",
    }, {
        "supercategory": "one",
        "id": 1,
        "name": "one",
    }, {
        "supercategory": "two",
        "id": 2,
        "name": "two",
    }, {
        "supercategory": "three",
        "id": 3,
        "name": "three",
    }, {
        "supercategory": "four",
        "id": 4,
        "name": "four",
    }, {
        "supercategory": "five",
        "id": 5,
        "name": "five",
    }, {
        "supercategory": "six",
        "id": 6,
        "name": "six",
    }, {
        "supercategory": "seven",
        "id": 7,
        "name": "seven",
    }, {
        "supercategory": "eight",
        "id": 8,
        "name": "eight",
    }, {
        "supercategory": "nine",
        "id": 9,
        "name": "nine",
    }]
}

filename = "./coco_format_"+ mode + '/' + "svhn_coco_train.json"
with open(filename, 'w') as fp:
    json.dump(final_coco_svhn_train, fp)

# COCO ANNOTATIONS FOR VAL

In [None]:
#####################  COCO ANNOTATIONS FOR VAL #####################
for image_name in tqdm(val_img_name_keys):

    img_index = int(image_name.split('.')[0])
    img_path =  mode + '/' + image_name
    img = Image.open(img_path)
    width, height = img.size[0], img.size[1]

    tmp_img_obj = {
        "license": 3,
        "file_name": image_name,
        "coco_url": "NA",
        "height": height,
        "width": width,
        "date_captured": date,
        "flickr_url": "NA",
        "id": img_index
    }
    img_dict["val_image"].append(tmp_img_obj)

    img_bbox = annotations[image_name]
    for i in range(0, len(img_bbox['label'])):
        x0, y0, w, h = img_bbox['left'][i], img_bbox['top'][i], img_bbox[
            'width'][i], img_bbox['height'][i]
        tmp_ann_obj = {
            "id": instance_id,
            "bbox": [x0, y0, w, h],
            "image_id": img_index,
            "segmentation": [],
            "ignore": 0,
            "area": w * h,
            "iscrowd": 0,
            "category_id": img_bbox['label'][i]
        }
        annotation_dict["val_annotations"].append(tmp_ann_obj)
        instance_id += 1
    shutil.copyfile(img_path, "./coco_format_" + mode + '/val/' +  image_name)

print('train image and annotation length', len(
    annotation_dict['val_annotations']), len(img_dict['val_image']))

final_coco_svhn_val = {
    "info": {
        "description": "COCO SVHN",
        "url": "NA",
        "version": "1.0",
        "year": 2020,
        "contributor": "Trinanjan Saha",
        "date_created": date
    },
    "licenses": {
        "url": "NA",
        "id": 1,
        "name": "NA"
    },
    "images": img_dict["val_image"],
    "annotations": annotation_dict["val_annotations"],
    "categories": [{
        "supercategory": "zero",
        "id": 0,
        "name": "zero",
    }, {
        "supercategory": "one",
        "id": 1,
        "name": "one",
    }, {
        "supercategory": "two",
        "id": 2,
        "name": "two",
    }, {
        "supercategory": "three",
        "id": 3,
        "name": "three",
    }, {
        "supercategory": "four",
        "id": 4,
        "name": "four",
    }, {
        "supercategory": "five",
        "id": 5,
        "name": "five",
    }, {
        "supercategory": "six",
        "id": 6,
        "name": "six",
    }, {
        "supercategory": "seven",
        "id": 7,
        "name": "seven",
    }, {
        "supercategory": "eight",
        "id": 8,
        "name": "eight",
    }, {
        "supercategory": "nine",
        "id": 9,
        "name": "nine",
    }]
}

filename = "./coco_format_"+ mode + '/' + "svhn_coco_val.json"
with open(filename, 'w') as fp:
    json.dump(final_coco_svhn_val, fp)

In [None]:
filename = "./coco_format/"+ mode + "_svhn_coco_val.json"
with open(filename, 'w') as fp:
    json.dump(final_coco_svhn_val, fp)