# Merge multiple COCO annotation JSON files into one file.

Given multiple COCO annotated JSON files, your goal is to merge them into one COCO annotated JSON file.

A merged COCO annotated JSON file is required where all the data is in one place and it becomes easy to split it into a training and validation JSON file according to the percentage ratio. In case you already have a validated COCO annotated JSON file, then this notebook can be used to merge multiple files into one training COCO annotated JSON file.

In [None]:
# Import necessary libraries
import tqdm
import json
import glob

In [None]:
def merge_jsons(list_of_jsons):
  """
  Merges a list of JSON files into a single JSON file.

  Args:
    list_of_jsons: A list of JSON files to be merged.

  Returns:
    A single JSON file containing the merged data.
  """

  num = 1
  image_id = 0
  images_list = []
  categories_list = []
  annotations_list = []
  labels_dict = {}
  mapping_images = {}
  mapping_categories = {}


  for i,json_file_path in tqdm.tqdm(enumerate(list_of_jsons)):
    # read JSON file
    with open(json_file_path) as json_file:
      read_json = json.load(json_file)

    if len(read_json['images'][0]) != 0:
      list_of_dic = []
      list_of_dic_cat = []


      # process images dictionary
      for image in read_json['images']:
        images_dict = {}
        list_of_dic.append((image['id'], image_id))
        images_dict['file_name'] = image['file_name']
        images_dict['id'] = image_id
        image_id += 1
        images_dict['width'] = image['width']
        images_dict['height'] = image['height']
        images_list.append(images_dict)
      mapping_images['file_{}'.format(i)] = dict(list_of_dic)


      # process categories dictionary
      for category in read_json['categories']:
        list_of_dic_cat.append((category['id'], category['name']))
        categories_dict = {}
        if category['name'] not in labels_dict.keys():
          if len(labels_dict.keys()) == 0:
            labels_dict[read_json['categories'][0]['name']] = 1
          else:
            labels_dict[category['name']] = max(labels_dict.values()) + 1
          categories_dict['supercategory'] = category['supercategory']
          categories_dict['id'] = labels_dict[category['name']]
          categories_dict['name'] = category['name']
          categories_list.append(categories_dict)
        else:
          pass
      mapping_categories['file_{}'.format(i)] = dict(list_of_dic_cat)


      # process annotations dictionary
      for annotation in read_json['annotations']:
        annotations_dict = {}
        annotations_dict['segmentation'] = annotation['segmentation']
        annotations_dict['area'] = annotation['area']
        annotations_dict['bbox'] = annotation['bbox']
        annotations_dict['image_id'] = mapping_images['file_{}'.format(i)][annotation['image_id']]
        annotations_dict['category_id'] = labels_dict[mapping_categories['file_{}'.format(i)][annotation['category_id']]]
        annotations_dict['id'] = num
        num +=1
        annotations_dict['iscrowd'] = 0
        annotations_list.append(annotations_dict)


  final_json = {
      'images':images_list,
      'categories':categories_list,
      'annotations':annotations_list
      }
  return final_json

In [None]:
files = glob.glob('/mydrive/sherman/**/*.json', recursive=True)
files

['/mydrive/sherman/annotation_3.json',
 '/mydrive/sherman/annotation_2.json',
 '/mydrive/sherman/annotation_4.json',
 '/mydrive/sherman/annotation_1.json']

In [None]:
data = merge_jsons(files)

4it [00:00, 10.26it/s]
