<a href="https://colab.research.google.com/github/piebro/RoboCup-SPL-Segmentation-Dataset/blob/master/RoboCup_SPL_Segmentation_Dataset_Preparation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title get dataset from Kaggle
! pip install -q kaggle

# upload kaggle.json api key: https://www.kaggle.com/docs/api
from google.colab import files
files.upload()

! mkdir -p ~/.kaggle 
! mv kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download pietbroemmel/naodevils-segmentation-upper-camera

In [13]:
# @title unzip data
!unzip -q naodevils-segmentation-upper-camera.zip -d dataset 

In [15]:
# @title install dependancies
!pip install -q imantics

  Building wheel for imantics (setup.py) ... [?25l[?25hdone


In [43]:
# @title declare paths
dataset_root_path = "dataset"
anns_manual_all = "annotations/manual_all.json"
anns_autolabel_all = "annotations/autolabel_all.json"

anns_manual_pergame_dir = "annotations/manual_pergame"
anns_autolabel_pergame_dir = "annotations/autolabel_pergame"

anns_manual_train = "annotations/manual_train.json"
anns_manual_val = "annotations/manual_val.json"

anns_autolabel_train = "annotations/autolabel_train.json"
anns_autolabel_and_manual_train = "annotations/autolabel_and_manual_train.json"

categories=["line","ball","robot","centercircle","goal","penaltycross"]
class_id_to_color = [
  [0, 0, 0],
  [255,0,0],
  [0,255,0],
  [0,0,255],
  [122,122,0],
  [0,122,122],
  [122,0,122]
]

In [44]:
# @title COCO Dataset Util Functions
import os
import glob

import json
import sys

import cv2
import numpy as np
import imantics
from pycocotools.coco import COCO

def save_combined_annotation_json(coco_json_paths, output_json_path, new_categories):
  filename_to_annotations = {}

  for coco_json in coco_json_paths:
    coco = COCO(coco_json)

    id_to_new_id = {}
    for cat_id in coco.getCatIds():
      cat_data = coco.loadCats(cat_id)[0]
      new_cat_id = new_categories.index(cat_data["name"]) + 1
      id_to_new_id[cat_id] = new_cat_id

    for img_id in coco.imgs.keys():
      annotations = []
      for ann_old in coco.loadAnns(coco.getAnnIds(img_id)):
        annotation = {
          "category_id": id_to_new_id[ann_old["category_id"]],
          "segmentation": ann_old["segmentation"],
          "bbox": ann_old["bbox"],
        }
        annotations.append(annotation)
      
      filename = coco.loadImgs(img_id)[0]["file_name"]
      
      if filename in filename_to_annotations:
        filename_to_annotations[filename].extend(annotations)
      else:
        filename_to_annotations[filename] = annotations

  filename_annotations = []
  for filename, annotations in filename_to_annotations.items():
    filename_annotations.append({
      "filename": filename,
      "annotations": annotations
    })

  coco_json = filename_annotations_list_to_coco_json(filename_annotations, new_categories)
  with open(output_json_path, 'w') as outfile:
    json.dump(coco_json, outfile)


def get_mask_handle_occlusion(annotation, height, width):
  count = len(annotation)
  mask = np.zeros([height, width, count], dtype=np.uint8)
  
  
  seg_list_robot_and_ball = []
  seg_list_goal = []
  seg_list_rest = []
  # ids: line:1, ball:2, robot:3, centercircle:4, goal:5, penaltycross:6
  # first draw order: goal, line, centercircle, penaltycross, [robot,ball]
  for shape in annotation:
    category_id = shape["category_id"]
    segmentations  = shape["segmentation"]

    if category_id == 2 or category_id == 3:
        seg_list_robot_and_ball.append((category_id, segmentations))
    elif category_id == 6:
        seg_list_goal.append((category_id, segmentations))
    else:
        seg_list_rest.append((category_id, segmentations))
  

  seg_list = []
  seg_list.extend(seg_list_goal)
  seg_list.extend(sorted(seg_list_rest, key=lambda x: x[0]))
  seg_list.extend(seg_list_robot_and_ball)
  
  category_id_list = []
  for i, (category_id, segmentations) in enumerate(seg_list):
    category_id_list.append(category_id)

    pts = [
      np
      .array(anno)
      .reshape(-1, 2)
      .round()
      .astype(int)
      for anno in segmentations
      ]
        
    img = mask[:, :, i:i+1].copy()
    cv2.fillPoly(img, pts, 1)
    mask[:, :, i:i+1] = img
    
  # Handle occlusions
  if(mask.shape[2] > 0): # if at least one mask is there
    occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
    for i in range(count-2, -1, -1):
        mask[:, :, i] = mask[:, :, i] * occlusion
        occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))

  return mask.astype(np.bool), np.array(category_id_list).astype(np.int32)


def save_non_overlapping_annotations_json(json_path, save_json_path):
  coco = COCO(json_path)
  with open(json_path) as json_file:
    coco_json = json.load(json_file)

  coco_json_new = {}
  coco_json_new["categories"] = coco_json["categories"]
  coco_json_new["images"] = coco_json["images"]
  
  new_annotations = []
  for img_count, img_id in enumerate(coco.imgs.keys()):

    annotations = coco.loadAnns(coco.getAnnIds(img_id))
    mask = get_mask_handle_occlusion(annotations, width=640, height=480)
    
    for i, class_id in enumerate(mask[1]):
      im_mask = imantics.Mask(mask[0][:,:,i])
      polygons = im_mask.polygons()
      polygons_new = []
      for poly in polygons:
        polygons_new.append(poly.tolist())

      new_annotation = {
          "id":len(new_annotations),
          "image_id": img_id,
          "category_id": int(class_id),
          "segmentation": polygons_new,
          "bbox": list(im_mask.bbox().bbox()),
          'iscrowd': False,
          'isbbox': False
      }
      new_annotations.append(new_annotation)
      
      sys.stdout.write('\rremoved overlapping for '+str(img_count+1)+' / '+str(len(coco.imgs.keys()))+' images')
      sys.stdout.flush()
  
  print("")
    
  coco_json_new["annotations"] = new_annotations
  with open(save_json_path, 'w') as outfile:
    json.dump(coco_json_new, outfile)


def save_train_val_annotations_json(json_path, train_json_path, val_json_path, val_size, shuffel_seed=42):
  coco = COCO(json_path)
  img_ids = sorted(coco.imgs.keys())

  np.random.seed(shuffel_seed)
  np.random.shuffle(img_ids)

  seperate_index = round(len(img_ids)*val_size)
  img_ids_val = img_ids[:seperate_index]
  img_ids_train = img_ids[seperate_index:]

  with open(json_path) as json_file:
    categories_json = json.load(json_file)["categories"]

  for img_ids, save_path in [[img_ids_val, val_json_path], [img_ids_train, train_json_path]]:
    save_json = {
        "images":[],
        "categories":categories_json,
        "annotations":[]
    }

    for img_id in img_ids:
      save_json["images"].append(coco.loadImgs(img_id)[0])
      save_json["annotations"].extend(coco.loadAnns(coco.getAnnIds(img_id)))

    with open(save_path, 'w') as outfile:
      json.dump(save_json, outfile)


def filename_annotations_list_to_coco_json(filename_annotations_list, categories):
  images = []
  annotations = []
  categories_json = []

  for i, cat_name in enumerate(categories):
    categories_json.append({"id":i+1, "name":cat_name})

  for img_id, filename_annotations in enumerate(filename_annotations_list):
    images.append({
        "id": img_id,
        "path": filename_annotations["filename"],
        "file_name": filename_annotations["filename"],
    })

    for annotation in filename_annotations["annotations"]:
      annotation["id"] = len(annotations)+1
      annotation["image_id"] = img_id
      annotations.append(annotation)

  return {
      "images":images,
      "annotations":annotations,
      "categories":categories_json
  }

def split_annotations_per_games(json_path, annotations_folder):
    if os.path.isdir(annotations_folder) and len(os.listdir(annotations_folder)) != 0:
        print("The folder is not empty")
        return
    os.mkdir(annotations_folder)
  
    coco = COCO(json_path)
    with open(json_path) as json_file:
        coco_json = json.load(json_file)

    new_jsons = {}

    for img_id in coco.imgs.keys():
        img_data = coco.loadImgs(img_id)[0]
        filename = img_data["file_name"]
        game_name = "_".join(filename.split("_")[1:3])
        if game_name not in new_jsons:
            new_jsons[game_name] = {
              "categories": coco_json["categories"],
              "images": [],
              "annotations": []
            }
    
        new_jsons[game_name]["images"].append(img_data)
        new_jsons[game_name]["annotations"].extend(coco.loadAnns(coco.getAnnIds(img_id)))

    for key, value in new_jsons.items():
        save_json_path = os.path.join(annotations_folder, key + ".json")
        print("saved", key, "at", save_json_path, "with", len(value["images"]), "images and", len(value["annotations"]), "annotations")
        with open(save_json_path, 'w') as outfile:
            json.dump(value, outfile)

In [47]:
os.mkdir("annotations")
raw_manual_annotations_root_path = os.path.join(dataset_root_path, "raw_manual_annotations")
json_filenames = ["upper_00000_00500.json", "upper_00500_01000.json", "upper_01000_01500.json"]
json_paths = []
for json_filename in json_filenames:
    json_paths.append(os.path.join(raw_manual_annotations_root_path, json_filename))

save_combined_annotation_json(json_paths, anns_manual_all, categories)
save_non_overlapping_annotations_json(anns_manual_all, anns_manual_all)

loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
loading annotations into memory...
Done (t=0.16s)
creating index...
index created!
loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
removed overlapping for 1179 / 1179 images


In [49]:
split_annotations_per_games(anns_manual_all, anns_manual_pergame_dir)

loading annotations into memory...
Done (t=0.35s)
creating index...
index created!
saved GermanOpen2019_HULKs at annotations/manual_pergame/GermanOpen2019_HULKs.json with 464 images and 2624 annotations
saved RoboCup2019_rUNSWift at annotations/manual_pergame/RoboCup2019_rUNSWift.json with 326 images and 1505 annotations
saved RoboCup2019_BandB at annotations/manual_pergame/RoboCup2019_BandB.json with 233 images and 1427 annotations
saved RoboCup2019_Team-Team at annotations/manual_pergame/RoboCup2019_Team-Team.json with 151 images and 786 annotations
saved RoboCup2019_HTWK-Leipzig at annotations/manual_pergame/RoboCup2019_HTWK-Leipzig.json with 5 images and 14 annotations


In [50]:
train_manual_paths = [
    "annotations/manual_pergame/GermanOpen2019_HULKs.json",
    "annotations/manual_pergame/RoboCup2019_rUNSWift.json",
    "annotations/manual_pergame/RoboCup2019_BandB.json",
    "annotations/manual_pergame/RoboCup2019_HTWK-Leipzig.json",
]
val_manual_paths = [
    "annotations/manual_pergame/RoboCup2019_Team-Team.json",
]
save_combined_annotation_json(train_manual_paths, anns_manual_train, categories)
save_combined_annotation_json(val_manual_paths, anns_manual_val, categories)

loading annotations into memory...
Done (t=0.14s)
creating index...
index created!
loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!


In [52]:
raw_autolabel_annotations_root_path = os.path.join(dataset_root_path, "raw_autolabel_annotations")
join_auto_label_paths = [
    os.path.join(raw_autolabel_annotations_root_path, "auto_label_br.json"),
    os.path.join(raw_autolabel_annotations_root_path, "auto_label_lcgp.json")
]

save_combined_annotation_json(join_auto_label_paths, anns_autolabel_all, categories)
save_non_overlapping_annotations_json(anns_autolabel_all, anns_autolabel_all)

loading annotations into memory...
Done (t=2.67s)
creating index...
index created!
loading annotations into memory...
Done (t=2.92s)
creating index...
index created!
loading annotations into memory...
Done (t=3.40s)
creating index...
index created!
removed overlapping for 8821 / 8821 images


In [53]:
split_annotations_per_games(anns_autolabel_all, anns_autolabel_pergame_dir)

loading annotations into memory...
Done (t=3.27s)
creating index...
index created!
saved RoboCup2019_Dutch-Nao at annotations/autolabel_pergame/RoboCup2019_Dutch-Nao.json with 1294 images and 8876 annotations
saved RoboCup2019_TJark at annotations/autolabel_pergame/RoboCup2019_TJark.json with 1127 images and 7803 annotations
saved RoboCup2019_HULKs at annotations/autolabel_pergame/RoboCup2019_HULKs.json with 3503 images and 23619 annotations
saved RoboCup2019_SwiftArk at annotations/autolabel_pergame/RoboCup2019_SwiftArk.json with 908 images and 7993 annotations
saved RoboCup2019_NTU-RoboPal at annotations/autolabel_pergame/RoboCup2019_NTU-RoboPal.json with 1168 images and 8058 annotations
saved GermanOpen2019_HULKs at annotations/autolabel_pergame/GermanOpen2019_HULKs.json with 135 images and 852 annotations
saved RoboCup2019_HTWK-Leipzig at annotations/autolabel_pergame/RoboCup2019_HTWK-Leipzig.json with 500 images and 2862 annotations
saved RoboCup2019_BandB at annotations/autolabel

In [54]:
train_autolabel_paths = [
    "annotations/autolabel_pergame/RoboCup2019_HULKs.json",
    "annotations/autolabel_pergame/RoboCup2019_NTU-RoboPal.json",
    "annotations/autolabel_pergame/RoboCup2019_TJark.json",
    "annotations/autolabel_pergame/RoboCup2019_HTWK-Leipzig.json",
    "annotations/autolabel_pergame/RoboCup2019_Dutch-Nao.json",
]
save_combined_annotation_json(train_autolabel_paths, anns_autolabel_train, categories)

loading annotations into memory...
Done (t=1.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.55s)
creating index...
index created!
loading annotations into memory...
Done (t=0.35s)
creating index...
index created!
loading annotations into memory...
Done (t=0.13s)
creating index...
index created!
loading annotations into memory...
Done (t=0.65s)
creating index...
index created!


In [55]:
train_autolabel_and_manual_paths = [
    "annotations/autolabel_train.json",
    "annotations/manual_train.json",
]
save_combined_annotation_json(train_autolabel_and_manual_paths, anns_autolabel_and_manual_train, categories)

loading annotations into memory...
Done (t=2.69s)
creating index...
index created!
loading annotations into memory...
Done (t=0.30s)
creating index...
index created!


In [58]:
# @title Analyse Dataset

def analyse_annotations(json_path):
    info = {}
    info["file_path"] = json_path
    coco = COCO(json_path)
    img_ids = coco.imgs.keys()
    info["image_count"] = len(img_ids)
    anns_ids = coco.anns.keys()
    info["annotation_count"] = len(anns_ids)

    cat_id_to_name = {}
    for _, cat in coco.cats.items():
        cat_id_to_name[cat['id']] = cat['name']

    cat_counts = {}
    for ann_id in anns_ids:
        cat_id = coco.anns[ann_id]["category_id"]
        if cat_id not in cat_counts:
            cat_counts[cat_id] = 0
        else:
            cat_counts[cat_id] += 1

    for cat_id, count in cat_counts.items():
        cat_name = cat_id_to_name[cat_id]
        info[cat_name + "_counts"] = count
        info[cat_name + "_counts_percent"] = round(count/info["annotation_count"]*100)/100
  
    return info

def analyse_annotations_list(list_of_annotations_jsons):
    infos = {}
    for anns_file in list_of_annotations_jsons:
        infos[anns_file] = analyse_annotations(anns_file)
    return infos

def analyse_annotations_list_to_text(list_of_annotations_jsons):
    infos = analyse_annotations_list(list_of_annotations_jsons)
    text_array = []
    for _, info in infos.items():
        text_array.append(annotation_info_to_text(info, categories))
    return "\n\n".join(text_array)

def annotation_info_to_text(info, categories):
    for category in categories:
        category_counts = category + "_counts"
        if category_counts not in info:
            info[category_counts] = None
        
    return "\n".join([
        f'filepath: {info["file_path"]}',
        f'images: {info["image_count"]}, annotations: {info["annotation_count"]}',
        f'lines: {info["line_counts"]}, balls: {info["ball_counts"]}, robots: {info["robot_counts"]}, centercircles: {info["centercircle_counts"]}, goals: {info["goal_counts"]}, penaltycrosses: {info["penaltycross_counts"]}'
    ])


all_anns_files = glob.glob("annotations/*.json") + glob.glob("annotations/**/*.json")
text = analyse_annotations_list_to_text(all_anns_files)

with open("annotations/overview.txt", "w") as f:
    f.write(text)

loading annotations into memory...
Done (t=2.97s)
creating index...
index created!
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!
loading annotations into memory...
Done (t=2.73s)
creating index...
index created!
loading annotations into memory...
Done (t=0.27s)
creating index...
index created!
loading annotations into memory...
Done (t=0.33s)
creating index...
index created!
loading annotations into memory...
Done (t=3.32s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.26s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.32s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.41s)
creating index...
index created!
load

In [59]:
!cd "annotations"; zip -r ../annotations.zip *
!du annotations.zip

  adding: autolabel_all.json (deflated 72%)
  adding: autolabel_and_manual_train.json (deflated 71%)
  adding: autolabel_pergame/ (stored 0%)
  adding: autolabel_pergame/RoboCup2019_BandB.json (deflated 72%)
  adding: autolabel_pergame/RoboCup2019_SwiftArk.json (deflated 72%)
  adding: autolabel_pergame/RoboCup2019_Team-Team.json (deflated 72%)
  adding: autolabel_pergame/RoboCup2019_TJark.json (deflated 72%)
  adding: autolabel_pergame/RoboCup2019_rUNSWift.json (deflated 71%)
  adding: autolabel_pergame/RoboCup2019_Dutch-Nao.json (deflated 72%)
  adding: autolabel_pergame/RoboCup2019_HTWK-Leipzig.json (deflated 72%)
  adding: autolabel_pergame/GermanOpen2019_HULKs.json (deflated 72%)
  adding: autolabel_pergame/RoboCup2019_NTU-RoboPal.json (deflated 72%)
  adding: autolabel_pergame/RoboCup2019_HULKs.json (deflated 72%)
  adding: autolabel_train.json (deflated 71%)
  adding: manual_all.json (deflated 72%)
  adding: manual_pergame/ (stored 0%)
  adding: manual_pergame/RoboCup2019_BandB.