In [1]:
# For parsing and converting annotations
!pip install globox

Collecting globox
  Downloading globox-2.4.3-py3-none-any.whl (34 kB)
Collecting numpy<2.0.0,>=1.26.0 (from globox)
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: numpy, globox
  Attempting uninstall: numpy
    Found existing installation: numpy 1.25.2
    Uninstalling numpy-1.25.2:
      Successfully uninstalled numpy-1.25.2
Successfully installed globox-2.4.3 numpy-1.26.4


In [2]:
import json
import globox
import pandas as pd
import copy

In [3]:
# Load in annotations
with open(r'/content/drive/MyDrive/TACO/data/annotations.json') as f:
    dataset = json.load(f)

categories = dataset['categories']

In [4]:
# Load in csv file and convert into mapping
df = pd.read_csv(r'/content/drive/MyDrive/TACO/detector/taco_config/map_1.csv', header=None)
df.columns = ['old_class', 'new_class']
df.head()
class_map  = df.set_index('old_class')['new_class'].to_dict()
print(class_map)

{'Aerosol': 'Litter', 'Aluminium foil': 'Litter', 'Battery': 'Litter', 'Aluminium blister pack': 'Litter', 'Carded blister pack': 'Litter', 'Clear plastic bottle': 'Litter', 'Glass bottle': 'Litter', 'Other plastic bottle': 'Litter', 'Plastic bottle cap': 'Litter', 'Metal bottle cap': 'Litter', 'Broken glass': 'Litter', 'Drink can': 'Litter', 'Food Can': 'Litter', 'Corrugated carton': 'Litter', 'Drink carton': 'Litter', 'Egg carton': 'Litter', 'Meal carton': 'Litter', 'Other carton': 'Litter', 'Paper cup': 'Litter', 'Disposable plastic cup': 'Litter', 'Foam cup': 'Litter', 'Glass cup': 'Litter', 'Other plastic cup': 'Litter', 'Food waste': 'Litter', 'Plastic lid': 'Litter', 'Metal lid': 'Litter', 'Magazine paper': 'Litter', 'Tissues': 'Litter', 'Wrapping paper': 'Litter', 'Normal paper': 'Litter', 'Paper bag': 'Litter', 'Plastified paper bag': 'Litter', 'Pizza box': 'Litter', 'Garbage bag': 'Litter', 'Single-use carrier bag': 'Litter', 'Polypropylene bag': 'Litter', 'Produce bag': 'Lit

In [5]:
# Taken from TACO dataset.py
def replace_dataset_classes(dataset, class_map):
    """ Replaces classes of dataset based on a dictionary"""
    class_new_names = list(set(class_map.values()))
    class_new_names.sort()
    class_originals = copy.deepcopy(dataset['categories'])
    dataset['categories'] = []
    class_ids_map = {}  # map from old id to new id

    # Assign background id 0
    has_background = False
    if 'Background' in class_new_names:
        if class_new_names.index('Background') != 0:
            class_new_names.remove('Background')
            class_new_names.insert(0, 'Background')
        has_background = True

    # Replace categories
    for id_new, class_new_name in enumerate(class_new_names):

        # Make sure id:0 is reserved for background
        id_rectified = id_new
        if not has_background:
            id_rectified += 1

        category = {
            'supercategory': '',
            'id': id_rectified,  # Background has id=0
            'name': class_new_name,
        }
        dataset['categories'].append(category)
        # Map class names
        for class_original in class_originals:
            if class_map[class_original['name']] == class_new_name:
                class_ids_map[class_original['id']] = id_rectified

    # Update annotations category id tag
    for ann in dataset['annotations']:
        ann['category_id'] = class_ids_map[ann['category_id']]

In [6]:
# Replace all classes with just 'Litter'
replace_dataset_classes(dataset, class_map)

In [7]:
with open(r'/content/drive/MyDrive/TACO/data/annotations_new.json', 'w') as f:
    json.dump(dataset, f)

In [8]:
!globox convert -h

usage: globox convert [-h]
                      [--format {labelme,imagenet,via-json,openimage,pascalvoc,yolov7,yolo-darknet,coco,yolov5,txt,cvat}]
                      [--img_folder IMG_FOLDER] [--mapping MAPPING_IN] [--bb_fmt {ltrb,ltwh,xywh}]
                      [--norm {abs,rel}] [--ext EXT_IN] [--img_ext IMG_EXT_IN] [--sep SEP_IN]
                      [--save_fmt {labelme,imagenet,via-json,openimage,pascalvoc,yolov7,yolo-darknet,coco,yolov5,txt,cvat}]
                      [--bb_fmt_out {ltrb,ltwh,xywh}] [--norm_out {abs,rel}] [--sep_out SEP_OUT]
                      [--ext_out EXT_OUT] [--coco_auto_ids]
                      [--mapping_out MAPPING_OUT | --reverse_mapping_out REVERSE_MAPPING_OUT]
                      input output

positional arguments:
  input
  output

options:
  -h, --help            show this help message and exit

Parse options:
  --format {labelme,imagenet,via-json,openimage,pascalvoc,yolov7,yolo-darknet,coco,yolov5,txt,cvat}, -f {labelme,imagenet,via-

In [9]:
# Use globox to convert from COCO format into YOLO
!globox convert /content/drive/MyDrive/TACO/data/annotations_new.json /content/drive/MyDrive/TACO/data --format coco --save_fmt yolov7

Parsing: 100% 4784/4784 [00:00<00:00, 284532.98it/s]
Saving: 100% 1500/1500 [00:23<00:00, 63.79it/s]
