In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!cp /content/gdrive/MyDrive/canada/annotations.zip /content/

!unzip annotations.zip
!mkdir house/train house/val house/test
!rm -r annotations.zip

Archive:  annotations.zip
   creating: house/
   creating: house/originals/
  inflating: house/originals/1000_PNG.rf.62e4dbbada8e507ddf382e1e60f5560e.jpg  
  inflating: house/originals/1000_PNG.rf.7f446088d00fc8d0853e7f1e5d897666.jpg  
  inflating: house/originals/1000_PNG.rf.85d8eb88d99766e3b89dcfca62a82862.jpg  
  inflating: house/originals/100_PNG.rf.095020f2270482fe3f8dc647629b2c55.jpg  
  inflating: house/originals/100_PNG.rf.4f63b9ac1240bcada575df9d508b0cb1.jpg  
  inflating: house/originals/100_PNG.rf.e893a1c7104bec122dbddacb6ccdd250.jpg  
  inflating: house/originals/101_PNG.rf.560182138e098f1ba47e493afae8bf27.jpg  
  inflating: house/originals/101_PNG.rf.b8ba0a6b280d3879c4de763fb4a6be35.jpg  
  inflating: house/originals/101_PNG.rf.e1f91255ce0e8154df8cd8334a2ead8d.jpg  
  inflating: house/originals/102_PNG.rf.9311204ea47b13cf590f71c284b377e3.jpg  
  inflating: house/originals/102_PNG.rf.ef3b3174041f2789f8f98e0751bd8def.jpg  
  inflating: house/originals/102_PNG.rf.f49c5b9575bb

In [3]:
!pip install sklearn
!pip install funcy

Collecting funcy
  Downloading funcy-1.17-py2.py3-none-any.whl (33 kB)
Installing collected packages: funcy
Successfully installed funcy-1.17


In [4]:
import funcy
from sklearn.model_selection import train_test_split
import json
import shutil
from shutil import copyfile

from os.path import exists


def save_coco(file, info, licenses, images, annotations, categories):
    with open(file, 'wt', encoding='UTF-8') as coco:
        json.dump({ 'info': info, 'licenses': licenses, 'images': images, 
            'annotations': annotations, 'categories': categories}, coco, indent=2, sort_keys=True)

def filter_annotations(annotations, images):
    image_ids = funcy.lmap(lambda i: int(i['id']), images)
    return funcy.lfilter(lambda a: int(a['image_id']) in image_ids, annotations)

def split_coco(args):
    with open(args['annotations'], 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco['info']
        licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)

        if args['having_annotations']:
            images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images, train_size=args['split'])

        save_coco(args['train'], info, licenses, x, filter_annotations(annotations, x), categories)
        save_coco(args['test'], info, licenses, y, filter_annotations(annotations, y), categories)

        print("Saved {} entries in {} and {} in {}".format(len(x), args['train'], len(y), args['test']))

def organize_coco_images(args):
    with open(args['annotations'], 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        imgs = [img['file_name'] for img in coco['images']]
        for img in imgs:
            print(img)
            copyfile(args['origin'] +img, args['destination'] + img)

In [5]:
#dict for training and tmp
split_coco_attributes = {
    'having_annotations': True,
    'split': 0.7,
    'train': '/content/house/train/cocos.json',
    'test': '/content/house/tmp.json',
    'annotations': '/content/house/originals/coco_annotations.json'
}
split_coco(split_coco_attributes)
#dict for val and test
split_coco_attributes = {
    'having_annotations': True,
    'split': 0.7,
    'train': '/content/house/val/cocos.json',
    'test': '/content/house/test/cocos.json',
    'annotations': '/content/house/tmp.json'
}
split_coco(split_coco_attributes)
!rm -r house/tmp.json

Saved 2087 entries in /content/house/train/cocos.json and 895 in /content/house/tmp.json
Saved 626 entries in /content/house/val/cocos.json and 269 in /content/house/test/cocos.json


In [6]:
organize_coco_images_dict = {
    'annotations': '/content/house/train/cocos.json',
    'origin': '/content/house/originals/',
    'destination': '/content/house/train/'
}
organize_coco_images(organize_coco_images_dict)

organize_coco_images_dict = {
    'annotations': '/content/house/test/cocos.json',
    'origin': '/content/house/originals/',
    'destination': '/content/house/test/'
}
organize_coco_images(organize_coco_images_dict)

organize_coco_images_dict = {
    'annotations': '/content/house/val/cocos.json',
    'origin': '/content/house/originals/',
    'destination': '/content/house/val/'
}
organize_coco_images(organize_coco_images_dict)

#remove original folder, we don't need it anymore
!rm -r house/originals/

917_PNG.rf.6e2df33f7e371bf7039e16895ab7f583.jpg
943_PNG.rf.8da3bc39dfaccd8a786f1a132d62483f.jpg
581_PNG.rf.29db66a8d93c2b9c93a18718d76df165.jpg
245_PNG.rf.632f31b7b06451be65549719307de926.jpg
338_PNG.rf.9e3b8cd36a1ca29c57e9aa99f0fe323a.jpg
71_PNG.rf.c23eb36f27a0e2d243f31fc1e61fd74e.jpg
440_PNG.rf.ff489efc40ce9835d492449779a38851.jpg
628_PNG.rf.fcfd7cfd5ccf3f21a8e698eb2e934f09.jpg
954_PNG.rf.e80abfb40ec31c90387fc5f4e1d83bdb.jpg
575_PNG.rf.22cf8361d620f288cc80ebf335f3bfa9.jpg
103_PNG.rf.ce979ba04e8d6e02b9e6733903b99cfe.jpg
595_PNG.rf.4c51d33a1131939d3048eb07646b8bf7.jpg
915_PNG.rf.97601fd5d23b26c98409278f9c339d5c.jpg
28_PNG.rf.cdbfee920245698586877b43e450fd96.jpg
565_PNG.rf.011571e7699b9aa26b685572b56e5433.jpg
281_PNG.rf.86852818a7cc6e61f943957b08b2cac9.jpg
527_PNG.rf.7a1d8d16258cd45c4d61e9bed44b2319.jpg
676_PNG.rf.d4f49254374ed12e366687b1ade375c5.jpg
178_PNG.rf.6c33be242428ab3807893b1c0645eaa7.jpg
325_PNG.rf.107988e144ce7d9c80e7a8806bcdb26b.jpg
859_PNG.rf.3e9d428870f8cb9b7312e5e1ecae1a9

In [7]:
!git clone https://github.com/cocodataset/cocoapi.git

Cloning into 'cocoapi'...
remote: Enumerating objects: 975, done.[K
remote: Total 975 (delta 0), reused 0 (delta 0), pack-reused 975[K
Receiving objects: 100% (975/975), 11.72 MiB | 18.56 MiB/s, done.
Resolving deltas: 100% (576/576), done.


In [8]:
!cd cocoapi/PythonAPI/ && make

python setup.py build_ext --inplace
running build_ext
cythoning pycocotools/_mask.pyx to pycocotools/_mask.c
  tree = Parsing.p_module(s, pxd, full_module_name)
building 'pycocotools._mask' extension
creating build
creating build/common
creating build/temp.linux-x86_64-3.7
creating build/temp.linux-x86_64-3.7/pycocotools
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fdebug-prefix-map=/build/python3.7-pX47U3/python3.7-3.7.12=. -fstack-protector-strong -Wformat -Werror=format-security -g -fdebug-prefix-map=/build/python3.7-pX47U3/python3.7-3.7.12=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.7/dist-packages/numpy/core/include -I../common -I/usr/include/python3.7m -c ../common/maskApi.c -o build/temp.linux-x86_64-3.7/../common/maskApi.o -Wno-cpp -Wno-unused-function -std=c99
[01m[K../common/maskApi.c:[m[K In function ‘[01m[KrleDecode[m[K’:
       [01;35

In [9]:
import cv2
import numpy as np
import os

from pycocotools.coco import COCO

def create_masks(gt_dir, ann_file):
  if not os.path.exists(gt_dir):
      os.makedirs(gt_dir)

  coco = COCO(ann_file)
  cat_ids = coco.getCatIds(catNms=['window'])
  img_ids = coco.getImgIds(catIds=cat_ids);
  imgs = coco.loadImgs(img_ids)

  for img in imgs:
    mask_img = np.zeros((img['height'],img['width']), dtype=np.uint8)
    anns_ids = coco.getAnnIds(imgIds=img['id'], catIds=cat_ids, iscrowd=None)
    anns = coco.loadAnns(anns_ids)
    for ann in anns:
      if ann['category_id']==1:
        mask_img |= coco.annToMask(ann)
    cv2.imwrite(f"{gt_dir}/{img['file_name']}", mask_img * 9, [cv2.IMWRITE_JPEG_QUALITY, 100])


In [10]:
create_masks("house/masks_test", "house/test/cocos.json")

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [11]:
create_masks("house/masks_val", "house/val/cocos.json")

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!


In [12]:
create_masks("house/masks_train", "house/train/cocos.json")

loading annotations into memory...
Done (t=0.04s)
creating index...
index created!


In [13]:
import json

from pathlib import Path
from tqdm import tqdm
from itertools import groupby


def convert_to_odgt(coco_json_path, odgt_path, dataset_path, mask_path):
    """
    json_file: categories
        [{'id': 1, 'name': 'car', 'supercategory': 'none'},
         {'id': 2, 'name': 'pedestrian', 'supercategory': 'none'},
         {'id': 3, 'name': 'others', 'supercategory': 'none'},
         {'id': 4, 'name': 'bus', 'supercategory': 'none'},
         {'id': 5, 'name': 'van', 'supercategory': 'none'}]
    :return:
    """
    with coco_json_path.open() as fi:
        json_file = json.load(fi)

    images = json_file['images']
    tags = json_file['categories']
    annos = json_file['annotations']  # list of annotations
    annos.sort(key=lambda x: x['image_id'])
    # group annotations by image_id

    img_not_found_count = 0

    with odgt_path.open(mode='w+') as fo:
        od_line = {}
        for image_id, objects in tqdm(groupby(annos, key=lambda x: x['image_id'])):
            image_info = next(item for item in images if item["id"] == image_id)

            gtboxes = []
            for obj in objects:
                gtbox = {
                    'box'  : obj['bbox'],
                    'occ'  : 0,
                    'tag'  : tags[obj['category_id'] - 1]['name'],
                    'extra': {'ignore': 0}
                }
                gtboxes.append(gtbox)
            od_line['gtboxes'] = gtboxes

            img_path = dataset_path / image_info['file_name']
            if not img_path.exists():
                img_not_found_count += 1
                continue

            od_line['fpath_img'] = str(img_path)
            od_line['fpath_segm'] = str(mask_path / image_info['file_name'])

            od_line['dbName'] = "COCO",
            od_line['dbInfo'] = {"vID": "window_train", "frameID": -1}
            od_line['width'] = image_info['width']
            od_line['height'] = image_info['height']
            od_line['ID'] = image_info['file_name']
            fo.write(f'{json.dumps(od_line)}\n')
    print(f'Can\'t find {img_not_found_count} images')


In [14]:
convert_to_odgt(Path("house/train/cocos.json"), Path("house/training.odgt"), Path("house/train"), Path("house/masks_train"))

2087it [00:00, 6538.34it/s]

Can't find 0 images





In [15]:
convert_to_odgt(Path("house/val/cocos.json"), Path("house/validation.odgt"), Path("house/val"), Path("house/masks_val"))

626it [00:00, 12729.55it/s]

Can't find 0 images





In [16]:
convert_to_odgt(Path("house/test/cocos.json"), Path("house/test.odgt"), Path("house/test"), Path("house/masks_test"))

269it [00:00, 11524.70it/s]

Can't find 0 images





In [17]:
!zip -r masks_train.zip house/masks_train

  adding: house/masks_train/ (stored 0%)
  adding: house/masks_train/799_PNG.rf.40141ef5b72fc20f415beb346f68f026.jpg (deflated 23%)
  adding: house/masks_train/435_PNG.rf.252dfc099d1fca57734fca1a9333199a.jpg (deflated 51%)
  adding: house/masks_train/500_PNG.rf.50139b253a067b58b90fc69fb536199b.jpg (deflated 40%)
  adding: house/masks_train/393_PNG.rf.a92fb25bbb75d0fa3078769b07b7dafe.jpg (deflated 43%)
  adding: house/masks_train/403_PNG.rf.b5934fe15390e1c177fdab1bb1c7a433.jpg (deflated 44%)
  adding: house/masks_train/366_PNG.rf.499e25a73cb654a9f8b3e6fc8e0945a4.jpg (deflated 45%)
  adding: house/masks_train/325_PNG.rf.85213ac4803959fabb34900f1d07091e.jpg (deflated 40%)
  adding: house/masks_train/409_PNG.rf.f299f651b186836d1dfcadb1bd33f838.jpg (deflated 39%)
  adding: house/masks_train/708_PNG.rf.a18b017c44107190f2e9893e1ef7ddd7.jpg (deflated 55%)
  adding: house/masks_train/126_PNG.rf.8f8963f4a1cbce306f6d9c033f4e22ee.jpg (deflated 47%)
  adding: house/masks_train/894_PNG.rf.38d78899a0

In [18]:
!zip -r masks_val.zip house/masks_val/

  adding: house/masks_val/ (stored 0%)
  adding: house/masks_val/761_PNG.rf.83bfc2b881482bb79f0fa95fcd3042cc.jpg (deflated 40%)
  adding: house/masks_val/127_PNG.rf.515d12075802397a4e76712f47af22c0.jpg (deflated 41%)
  adding: house/masks_val/99_PNG.rf.6a751872438d8f7738012ee0f1d9f17f.jpg (deflated 31%)
  adding: house/masks_val/765_PNG.rf.993a14c74ef6231af258591159b188e8.jpg (deflated 49%)
  adding: house/masks_val/571_PNG.rf.9b90f4b6ec7f3bba8fd5c97d8e24a34d.jpg (deflated 36%)
  adding: house/masks_val/536_PNG.rf.63eed4ca7903e5bb5720f79e28966f10.jpg (deflated 38%)
  adding: house/masks_val/566_PNG.rf.a4355c39c26aeca14a7fe0d0e5fdac05.jpg (deflated 52%)
  adding: house/masks_val/787_PNG.rf.734915d9b7fa64d7c5dbac7292a25264.jpg (deflated 47%)
  adding: house/masks_val/120_PNG.rf.0a7541273a0ad05ab5f32a1d17da568c.jpg (deflated 39%)
  adding: house/masks_val/333_PNG.rf.7a48ccd1a3533f3d903196c175325ca7.jpg (deflated 36%)
  adding: house/masks_val/807_PNG.rf.91b257a56c0d445aa05745d77c6b83d8.jp

In [19]:
!zip -r train.zip house/train

  adding: house/train/ (stored 0%)
  adding: house/train/799_PNG.rf.40141ef5b72fc20f415beb346f68f026.jpg (deflated 2%)
  adding: house/train/435_PNG.rf.252dfc099d1fca57734fca1a9333199a.jpg (deflated 1%)
  adding: house/train/500_PNG.rf.50139b253a067b58b90fc69fb536199b.jpg (deflated 0%)
  adding: house/train/393_PNG.rf.a92fb25bbb75d0fa3078769b07b7dafe.jpg (deflated 1%)
  adding: house/train/403_PNG.rf.b5934fe15390e1c177fdab1bb1c7a433.jpg (deflated 1%)
  adding: house/train/366_PNG.rf.499e25a73cb654a9f8b3e6fc8e0945a4.jpg (deflated 1%)
  adding: house/train/325_PNG.rf.85213ac4803959fabb34900f1d07091e.jpg (deflated 1%)
  adding: house/train/409_PNG.rf.f299f651b186836d1dfcadb1bd33f838.jpg (deflated 1%)
  adding: house/train/708_PNG.rf.a18b017c44107190f2e9893e1ef7ddd7.jpg (deflated 1%)
  adding: house/train/126_PNG.rf.8f8963f4a1cbce306f6d9c033f4e22ee.jpg (deflated 1%)
  adding: house/train/894_PNG.rf.38d78899a00e465afacbdc12fc1120aa.jpg (deflated 1%)
  adding: house/train/692_PNG.rf.6cba7f98

In [20]:
!zip -r validation.zip house/val

  adding: house/val/ (stored 0%)
  adding: house/val/761_PNG.rf.83bfc2b881482bb79f0fa95fcd3042cc.jpg (deflated 2%)
  adding: house/val/127_PNG.rf.515d12075802397a4e76712f47af22c0.jpg (deflated 1%)
  adding: house/val/99_PNG.rf.6a751872438d8f7738012ee0f1d9f17f.jpg (deflated 3%)
  adding: house/val/765_PNG.rf.993a14c74ef6231af258591159b188e8.jpg (deflated 1%)
  adding: house/val/571_PNG.rf.9b90f4b6ec7f3bba8fd5c97d8e24a34d.jpg (deflated 1%)
  adding: house/val/536_PNG.rf.63eed4ca7903e5bb5720f79e28966f10.jpg (deflated 1%)
  adding: house/val/566_PNG.rf.a4355c39c26aeca14a7fe0d0e5fdac05.jpg (deflated 1%)
  adding: house/val/787_PNG.rf.734915d9b7fa64d7c5dbac7292a25264.jpg (deflated 2%)
  adding: house/val/120_PNG.rf.0a7541273a0ad05ab5f32a1d17da568c.jpg (deflated 1%)
  adding: house/val/333_PNG.rf.7a48ccd1a3533f3d903196c175325ca7.jpg (deflated 2%)
  adding: house/val/807_PNG.rf.91b257a56c0d445aa05745d77c6b83d8.jpg (deflated 2%)
  adding: house/val/480_PNG.rf.55e93bacc77a50a8ecf7821e52da1278.jp

In [21]:
!zip -r masks_test.zip house/masks_test/

  adding: house/masks_test/ (stored 0%)
  adding: house/masks_test/536_PNG.rf.8d4efa752fba5b796c9d182c7627953c.jpg (deflated 38%)
  adding: house/masks_test/784_PNG.rf.3ab4de95788beafa45a7e4a5d78615f3.jpg (deflated 38%)
  adding: house/masks_test/743_PNG.rf.4087e7b99660f939a0389d8b0597d2ff.jpg (deflated 57%)
  adding: house/masks_test/598_PNG.rf.c0c5f6657e3a5a8ccea7d7973233a9b5.jpg (deflated 45%)
  adding: house/masks_test/702_PNG.rf.ff34f5dead94bdc15e6aac969b260849.jpg (deflated 39%)
  adding: house/masks_test/301_PNG.rf.16abaec314b7cc6d7a155a685bc45153.jpg (deflated 47%)
  adding: house/masks_test/289_PNG.rf.78f2ea5f9453ea041d81cd5a5ec7e9ef.jpg (deflated 20%)
  adding: house/masks_test/959_PNG.rf.a3468a49c546e35fc7bebc27a4ae356a.jpg (deflated 45%)
  adding: house/masks_test/856_PNG.rf.fcea007497cd35f3120a29c7661ac29d.jpg (deflated 40%)
  adding: house/masks_test/73_PNG.rf.01d091eb826349f007d312e3e7cb3b41.jpg (deflated 36%)
  adding: house/masks_test/25_PNG.rf.e63c28d0af11f251bae08ad3

In [22]:
!zip -r test.zip house/test

  adding: house/test/ (stored 0%)
  adding: house/test/536_PNG.rf.8d4efa752fba5b796c9d182c7627953c.jpg (deflated 1%)
  adding: house/test/784_PNG.rf.3ab4de95788beafa45a7e4a5d78615f3.jpg (deflated 2%)
  adding: house/test/743_PNG.rf.4087e7b99660f939a0389d8b0597d2ff.jpg (deflated 1%)
  adding: house/test/598_PNG.rf.c0c5f6657e3a5a8ccea7d7973233a9b5.jpg (deflated 1%)
  adding: house/test/702_PNG.rf.ff34f5dead94bdc15e6aac969b260849.jpg (deflated 1%)
  adding: house/test/301_PNG.rf.16abaec314b7cc6d7a155a685bc45153.jpg (deflated 0%)
  adding: house/test/289_PNG.rf.78f2ea5f9453ea041d81cd5a5ec7e9ef.jpg (deflated 1%)
  adding: house/test/959_PNG.rf.a3468a49c546e35fc7bebc27a4ae356a.jpg (deflated 1%)
  adding: house/test/856_PNG.rf.fcea007497cd35f3120a29c7661ac29d.jpg (deflated 1%)
  adding: house/test/73_PNG.rf.01d091eb826349f007d312e3e7cb3b41.jpg (deflated 2%)
  adding: house/test/25_PNG.rf.e63c28d0af11f251bae08ad349574d2d.jpg (deflated 1%)
  adding: house/test/175_PNG.rf.b365d4e6c3a8e27354f8e57