In [1]:
import os
import json
from glob import glob
from pprint import pprint
from multiprocessing import Pool
from collections import defaultdict
from pathlib import Path
from tqdm import tqdm
import cv2

import numpy as np
import pandas as pd

PATH_IMGS = './imgs/'
PATH_JSON = './jsons/'
PATH_IMASK = './instance_mask'
INPUT_SHAPE = (416, 416)

In [2]:
with open('train.txt') as f:
    ls_train_id = [x.strip() for x in f.readlines()]

with open('val.txt') as f:
    ls_val_id = [x.strip() for x in f.readlines()]

with open('test.txt') as f:
    ls_test_id = [x.strip() for x in f.readlines()]

print(len(ls_train_id), len(ls_val_id), len(ls_test_id))

1400 200 400


In [3]:
ls_SceneParse150 = []
with open('SceneParse150') as f:
    ls_SceneParse150 = [line.strip() for line in f.readlines()]

In [4]:
dict_test = defaultdict(lambda : len(dict_test))
dict_test['apple']
dict_test['banana']
print(dict_test.items())

dict_items([('apple', 0), ('banana', 1)])


In [5]:
class LabelEncoder:
    def __init__(self):
        self.label2id = defaultdict(lambda : len(self.label2id))
        # self.label2id['bg']
        self.id2label = dict()
    
    def build_id2label(self):
        for key, value in self.label2id.items():
            self.id2label[value] = key

    def fit(self, ls_label):
        for label in ls_label:
            self.label2id[label]
        self.build_id2label()

    def transform(self, ls_label):
        return [dict(self.label2id)[label] for label in ls_label]
    
    def fit_transform(self, ls_label):
        ls_result = [self.label2id[label] for label in ls_label]
        self.build_id2label()
        return ls_result
    
    def get_map(self):
        return self.label2id
    
    def get_reverse_map(self):
        return self.id2label
    
    
lbec = LabelEncoder()

In [6]:
for train_id in ls_train_id + ls_val_id + ls_test_id:
    f_json = f"jsons/ADE_val_{train_id}.json"
    with open(f_json) as f:
        dict_json = json.load(f)['annotation']
        ls_obj = dict_json['object']
        for obj in ls_obj:
            name = obj['name'].replace(', ', ';').replace(' ', ';').lower()
            if name in ls_SceneParse150:
                lbec.fit([name])
                
print(len(lbec.get_map()))

150


In [8]:
# can't use multiproceesing becasue lbec need to update
def get_ground_truth(img_id):
    # X
    f_img = cv2.imread(str(Path(PATH_IMGS) / f"ADE_val_{img_id}.jpg"))
    f_img = cv2.resize(f_img, INPUT_SHAPE, interpolation=cv2.INTER_NEAREST)
    # f_img = f_img / 255.0

    # y
    f_json = open(Path(PATH_JSON) / f'ADE_val_{img_id}.json')
    dict_img = json.load(f_json)['annotation']
    ls_img_obj = dict_img['object']
    img_size = dict_img['imsize']
    f_json.close()
    
    ls_obj_label = []
    ls_obj_mask_file = []
    for obj in ls_img_obj:
        name = obj['name'].replace(', ', ';').replace(' ', ';').lower()
        if name not in lbec.label2id:
            continue
        ls_obj_label.append(name)
        ls_obj_mask_file.append(str(Path('./instance_mask') / obj['instance_mask']))
    
    ls_obj_label_id = lbec.transform(ls_obj_label)
    np_full_seg = np.zeros(shape=img_size[:2])
    for label, mask_file in zip(ls_obj_label_id, ls_obj_mask_file):
        if not os.path.exists(mask_file):
            print('can\'t find mask file: ', label, lbec.id2label[label], mask_file)
            continue

        mask_img = cv2.imread(mask_file)
        np_full_seg[np.all(mask_img == (255, 255, 255), axis=2)] = label

    np_full_seg = cv2.resize(np_full_seg, INPUT_SHAPE, interpolation=cv2.INTER_NEAREST)
    cv2.imwrite(f'cv2_tmp/ADE_val_{img_id}.png', np_full_seg*1.7)
    return f_img, np_full_seg

# get_ground_truth(ls_train_id[5])
# get_ground_truth('00000471')

In [9]:
# img = cv2.imread('instance_mask/ADE_val_00000001/instance_000_ADE_val_00000001.png')
# print(img.shape)
# img = cv2.resize(img, (416, 416), interpolation =cv2.INTER_NEAREST)
# cv2.imwrite('test.png', img)

In [9]:
pool = Pool(80)
ls_output = list(tqdm(pool.imap(get_ground_truth, ls_train_id), total=len(ls_train_id), desc='training preparing'))
train_X, train_y = zip(*ls_output)
train_X = np.array(train_X)
train_y = np.array(train_y)

training preparing: 100%|██████████| 1400/1400 [00:27<00:00, 50.19it/s]


In [10]:
pool = Pool(80)
ls_output = list(tqdm(pool.imap(get_ground_truth, ls_val_id), total=len(ls_val_id), desc='val preparing'))
val_X, val_y = zip(*ls_output)
val_X = np.array(val_X)
val_y = np.array(val_y)

val preparing: 100%|██████████| 200/200 [00:07<00:00, 25.96it/s]


In [11]:
pool = Pool(80)
ls_output = list(tqdm(pool.imap(get_ground_truth, ls_test_id), total=len(ls_test_id), desc='test preparing'))
test_X, test_y = zip(*ls_output)
test_X = np.array(test_X)
test_y = np.array(test_y)

test preparing: 100%|██████████| 400/400 [00:17<00:00, 22.23it/s]


In [12]:
np.savez_compressed('ADE20K_DL_seg', 
                    train_X=train_X, train_y=train_y,
                    val_X=val_X, val_y=val_y,
                    test_X=test_X, test_y=test_y)

In [16]:
test_y.max()

148.0