# Setup

In [1]:
!pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-v63g3kxy
  Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-v63g3kxy
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
  Created wheel for pycocotools: filename=pycocotools-2.0-cp37-cp37m-linux_x86_64.whl size=263921 sha256=074b4c6db75f73953a705eab5e2ddcd2bc50f14a1baa288a0ffe80f6aba91f0a
  Stored in directory: /tmp/pip-ephem-wheel-cache-_88ll_l6/wheels/e2/6b/1d/344ac773c7495ea0b85eb228bc66daec7400a143a92d36b7b1
Successfully built pycocotools
Installing collected packages: pycocotools
  Attempting uninstall: pycocotools
    Found existing installation: pycocotools 2.0.3
    Uninstalling pycocotools-2.0.3:
      Successfully uninstalled pycocotools-2.0.3
Successfully installed pycocotools-2.0


# Load Ground Truths

In [2]:
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip annotations_trainval2017.zip

--2021-12-04 00:29:29--  http://images.cocodataset.org/annotations/annotations_trainval2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.217.129.209
Connecting to images.cocodataset.org (images.cocodataset.org)|52.217.129.209|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 252907541 (241M) [application/zip]
Saving to: ‘annotations_trainval2017.zip’


2021-12-04 00:29:37 (31.8 MB/s) - ‘annotations_trainval2017.zip’ saved [252907541/252907541]

Archive:  annotations_trainval2017.zip
  inflating: annotations/instances_train2017.json  
  inflating: annotations/instances_val2017.json  
  inflating: annotations/captions_train2017.json  
  inflating: annotations/captions_val2017.json  
  inflating: annotations/person_keypoints_train2017.json  
  inflating: annotations/person_keypoints_val2017.json  


In [3]:
import json

with open("./annotations/instances_val2017.json", "r") as f:
    ground_truths = json.load(f)

In [4]:
img_ids = [gt["image_id"] for gt in ground_truths["annotations"]][:10]

In [5]:
ground_truths["annotations"] = [
    gt for gt in ground_truths["annotations"] if gt["image_id"] in img_ids
]

In [6]:
with open("mutated_ground_truths.json", "w") as f:
    json.dump(ground_truths, f)

In [7]:
import random


def mutate_bounding_box(bounding_box):
    def shift(x, xw):
        return xw * random.uniform(-1, 1) + x

    return [
        shift(bounding_box[0], bounding_box[2] / 10),
        shift(bounding_box[1], bounding_box[3] / 10),
        random.uniform(0.9, 1.1) * bounding_box[2],
        random.uniform(0.9, 1.1) * bounding_box[3],
    ]


results = []
for gt in [
    gt for gt in ground_truths["annotations"] if gt["image_id"] in img_ids
]:
    result = {
        "image_id": gt["image_id"],
        "area": gt["area"],
        "bounding_box": mutate_bounding_box(gt["bounding_box"]),
        "id": gt["id"],
        "category_id": gt["category_id"],
        "score": random.uniform(0, 1),
    }
    results.append(result)

with open("coco_results.json", "w") as f:
    json.dump(results, f)

In [8]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

with open("coco_results.json", "r") as f:
    results = json.load(f)
coco = COCO("./mutated_ground_truths.json")
ret = {}

cocoDt = coco.loadRes(results)
cocoEval = COCOeval(coco, cocoDt, "bounding_box")
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bounding_box*
DONE (t=5.57s).
Accumulating evaluation results...
DONE (t=1.68s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.661
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.793
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.651
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.676
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.671
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.504
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.686
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all |

# Creating y_true, y_pred

In [9]:
import numpy as np
from collections import defaultdict

num_imgs = len(img_ids)

groups = defaultdict(list)

for annotation in ground_truths["annotations"]:
    img_id = annotation["image_id"]
    bounding_box = annotation["bounding_box"]
    bounding_box = [x for x in bounding_box] + [int(annotation["category_id"])]
    groups[img_id].append(bounding_box)

imgs = sorted(groups.keys())

result = []
for img in imgs:
    bounding_boxes = groups[img]
    bounding_boxes = np.array(bounding_boxes)
    result.append(bounding_boxes)

m = max([r.shape[0] for r in result])


def to_shape(a, shape):
    y_, x_ = shape
    y, x = a.shape
    y_pad = y_ - y
    x_pad = x_ - x
    return np.pad(
        a, ((0, y_pad), (0, x_pad)), mode="constant", constant_values=-1
    )


result = [to_shape(r, (m, 5)) for r in result]
y_true = np.array(result).astype(float)

print(y_true.shape)

(10, 25, 5)


In [10]:
print(y_true)

[[[ 2.7210e+02  2.0023e+02  1.5197e+02  2.7977e+02  1.8000e+01]
  [ 1.8123e+02  8.6280e+01  2.7440e+01  7.3530e+01  4.4000e+01]
  [ 1.7474e+02  0.0000e+00  2.6104e+02  2.2079e+02  7.0000e+01]
  ...
  [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]
  [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]
  [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]]

 [[ 1.9797e+02  1.1722e+02  1.7045e+02  2.2207e+02  1.8000e+01]
  [ 5.8450e+01  8.1690e+01  1.3153e+02  1.2590e+02  6.4000e+01]
  [ 9.2900e+00  1.6000e+02  4.6658e+02  3.1484e+02  3.0000e+00]
  ...
  [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]
  [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]
  [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]]

 [[ 4.7307e+02  3.9593e+02  3.8650e+01  2.8670e+01  1.8000e+01]
  [ 2.0401e+02  2.3508e+02  6.0840e+01  1.7736e+02  1.0000e+00]
  [ 4.3000e-01  4.9979e+02  3.3979e+02  1.0645e+02  1.5000e+01]
  ...
  [-1.0000e+00 -1.

In [11]:
import numpy as np
from collections import defaultdict

num_imgs = len(img_ids)

groups = defaultdict(list)

for annotation in results:
    img_id = annotation["image_id"]
    bounding_box = annotation["bounding_box"]
    bounding_box = (
        [x for x in bounding_box]
        + [int(annotation["category_id"])]
        + [annotation["score"]]
    )
    groups[img_id].append(bounding_box)

imgs = sorted(groups.keys())

result = []
for img in imgs:
    bounding_boxes = groups[img]
    bounding_boxes = np.array(bounding_boxes)
    result.append(bounding_boxes)

m = max([r.shape[0] for r in result])


def to_shape(a, shape):
    y_, x_ = shape
    y, x = a.shape
    y_pad = y_ - y
    x_pad = x_ - x
    return np.pad(
        a, ((0, y_pad), (0, x_pad)), mode="constant", constant_values=-1
    )


result = [to_shape(r, (m, 6)) for r in result]
y_pred = np.array(result)

print(y_pred.shape)

(10, 25, 6)


In [12]:
print(y_pred)

[[[ 2.83170164e+02  1.98574781e+02  1.43679031e+02  2.79637175e+02
    1.80000000e+01  8.60476676e-01]
  [ 1.78782770e+02  8.57070874e+01  2.57205260e+01  7.17351988e+01
    4.40000000e+01  8.67949953e-01]
  [ 1.60398593e+02 -1.26983180e+01  2.77554052e+02  2.16501984e+02
    7.00000000e+01  9.40993483e-02]
  ...
  [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00
   -1.00000000e+00 -1.00000000e+00]
  [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00
   -1.00000000e+00 -1.00000000e+00]
  [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00
   -1.00000000e+00 -1.00000000e+00]]

 [[ 2.05261670e+02  1.06865086e+02  1.71921347e+02  2.08470932e+02
    1.80000000e+01  2.33589777e-02]
  [ 5.23691118e+01  7.91802126e+01  1.21694503e+02  1.36141918e+02
    6.40000000e+01  8.40724888e-01]
  [-2.03978956e+01  1.42724445e+02  4.20246262e+02  2.87332323e+02
    3.00000000e+00  1.98790481e-01]
  ...
  [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000

In [13]:
outfile = "sample_boxes.npz"
np.savez(outfile, y_true, y_pred)

In [15]:
from google.colab import files

files.download(outfile)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>