# Convert to PASCAL
This script selects scenes and objectds of interest from the ADE20K dataset and creates a PASCAL-like directory structure, where each image segmentation is a one-channel mask with all of the semantic labels encoded pixel-by-pixel.

In [5]:
import os
import shutil
from pathlib import Path
import json

# Ignore warnings about data types.
import warnings; warnings.simplefilter("ignore")

import numpy as np
from matplotlib import pyplot as plt
from skimage.io import imread, imsave
from skimage import img_as_ubyte
from scipy.io import loadmat

import pandas as pd
import cv2

from preprocess import filter_ade20k_dataset

In [10]:
# TODO: Set the directory here for the unprocessed ADE20K full dataset.
SRC_DIR = "/home/mxs8x15/datasets/ADE20K_2016_07_26/"
# TODO: Set a destination directory for the formatted ADE20K dataset.
DEST_DIR = "./tutorial_ade20k_pascal"

# TODO: Select scenes of interest to keep.
required_keywords = [
    'bedroom', 'childs_room', 'dorm_room'
]

reject_keywords = ["outdoor"]

grouped_labels = [
    ('background', ),
    ('door', 'doorframe, doorcase', 'double door'),
    ('bed', 'bedpost'),
    ('wardrobe, closet, press', ),
    ('lamp', ),
    ('chest of drawers, chest, bureau, dresser', ),
    ('pillow', ),
    ('armchair', 'chair', 'swivel chair'),
]

In [11]:
meta = loadmat(os.path.join(SRC_DIR, 'index_ade20k.mat'), squeeze_me=True)
# Only the 'index' field stores the data
meta_index = meta['index']
# Create a dict of all the metadata fields
ade_metadata = {name: meta_index[name][()] for name in meta_index.dtype.names}
objectnames = ade_metadata['objectnames'].tolist()

# Map ALL text labels to integer values
label_to_old_id = {label: i for (i, label) in enumerate(objectnames)}
# Create a reverse mapping -- integers to text labels.
old_id_to_label = {i: label for (label, i) in label_to_old_id.items()}

# Map individual labels to their new_ids
label_to_new_id = {}
# Map new_ids to a string representing the label group.
new_id_to_label = {}

# Mape old_ids to new_ids if the original label is kept
old_id_to_new_id = {}
old_id_to_new_id[0] = 0  # Background maps to same value.

for new_id, group in enumerate(grouped_labels):
    for label in group:
        label_to_new_id[label] = new_id
        
        if label in label_to_old_id:
            old_id = label_to_old_id[label]
            old_id_to_new_id[old_id] = new_id
    
    # The value at new_id will be the group label combined into
    # a single string.
    new_id_to_label[new_id] = ' | '.join(group)

In [12]:
new_id_to_label

{0: 'background',
 1: 'door | doorframe, doorcase | double door',
 2: 'bed | bedpost',
 3: 'wardrobe, closet, press',
 4: 'lamp',
 5: 'chest of drawers, chest, bureau, dresser',
 6: 'pillow',
 7: 'armchair | chair | swivel chair'}

In [None]:
# Map all old category ids to semantic labels
oldid_to_label = {idx: objectnames[idx].strip() for idx in range(len(objectnames))}

# Map all semantic labels to old category ids
name_to_oldid = {name: oldid for oldid, name in oldid_to_label.items()}

# Store all the kept old_ids in one place
keep_oldids = [oldid for oldid in oldid_to_label
                   if objectnames[oldid] in keep_categories]

# Map old category ids to new category ids
old2new = {old_id: (new_id + 1) for (new_id, old_id) in enumerate(keep_oldids)}

# Map new category ids to labels
newid_to_label = {new_id: oldid_to_label[old_id] for old_id, new_id in old2new.items()}
newid_to_label[0] = "background/ignore"

In [22]:
objectcounts = ade_metadata['objectcounts'].tolist()
objectnames = ade_metadata['objectnames'].tolist()

print(objectnames[:5], objectcounts[:5])

['-', 'aarm panel', 'abacus', 'accordion, piano accordion, squeeze box', 'acropolis'] [1.0, 0.0, 1.0, 1.0, 11.0]


In [25]:
np.argmax(objectcounts)

3049

In [26]:
objectnames[3049]

'window'

In [19]:
objectnames = ade_metadata['objectnames'].tolist()
objectnames = sorted(list(set(objectnames)))
objectnames

[' ',
 '-',
 'aarm panel',
 'abacus',
 'accordion, piano accordion, squeeze box',
 'acropolis',
 'ad, advertisement, advertizement, advertising, advertizing, advert',
 'adding machine',
 'adding machine, totalizer, totaliser',
 'advertisement board',
 'aerial',
 'air conditioner, air conditioning',
 'air hockey table',
 'air machine',
 'aircraft carrier',
 'aircraft carrier, carrier, flattop, attack aircraft carrier',
 'airplane, aeroplane, plane',
 'airport cart',
 'alarm',
 'alarm clock',
 'alarm clock, alarm',
 'alembic',
 'alga',
 'alga, algae',
 'algae',
 "altar, communion table, Lord's table",
 "altar, communion table, lord's table",
 'altarpiece',
 'altarpiece, reredos',
 'amphitheater',
 'amphitheater, amphitheatre, coliseum',
 'amphora',
 'anchor',
 'anchor, ground tackle',
 'andiron',
 'andiron, firedog, dog, dog-iron',
 'andirons',
 'animal toy',
 'animal, animate being, beast, brute, creature, fauna',
 'animals',
 'antenna',
 'antenna, aerial, transmitting aerial',
 'antler

In [18]:
objectnames = sorted(list(set(objectnames)))
len(objectnames)

3148

## Build Mappings of Semantic Labels to Category Ids
Build mappings associating semantic labels, old category ids and new category ids.

In [9]:
len(level1)

744

In [8]:
level1 = ade_metadata['wordnet_level1'].tolist()
level1 = [label for label in level1 if not isinstance(label, np.ndarray)]
level1

['arm',
 'abacus',
 'accordion, piano accordion, squeeze box',
 'acropolis',
 'ad, advertisement, advertizement, advertising, advertizing, advert',
 'adding machine, totalizer, totaliser',
 'ad, advertisement, advertizement, advertising, advertizing, advert',
 'antenna, aerial, transmitting aerial',
 'air conditioner, air conditioning',
 'table game',
 'machine',
 'aircraft carrier, carrier, flattop, attack aircraft carrier',
 'airplane, aeroplane, plane',
 'alarm clock, alarm',
 'alembic',
 'alga, algae',
 'alga, algae',
 'altarpiece, reredos',
 'amphitheater, amphitheatre, coliseum',
 'amphora',
 'anchor, ground tackle',
 'andiron, firedog, dog, dog-iron',
 'andiron, firedog, dog, dog-iron',
 'plaything, toy',
 'animal, animate being, beast, brute, creature, fauna',
 'animal, animate being, beast, brute, creature, fauna',
 'antenna, aerial, transmitting aerial',
 'antenna, aerial, transmitting aerial',
 'antler',
 'antler',
 'anvil',
 'aperture',
 'machine',
 'apparel, wearing appare

In [12]:
level1 = ade_metadata['wordnet_level1'].tolist()
level1 = sorted(set([label for label in level1 if not isinstance(label, np.ndarray)]))
with open('all_ade20k_labels.txt', 'w') as wf:
    for name in level1:
        wf.write(name + "\n")

In [15]:
ade_metadata.keys()

dict_keys(['filename', 'folder', 'typeset', 'objectIsPart', 'objectPresence', 'objectcounts', 'objectnames', 'proportionClassIsPart', 'scene', 'wordnet_found', 'wordnet_level1', 'wordnet_synset', 'wordnet_hypernym', 'wordnet_gloss', 'wordnet_synonyms', 'wordnet_frequency'])

In [15]:
# Semantic labels
object_labels = ade_metadata['objectnames']

# Map all old category ids to semantic labels
oldid_to_label = {idx: object_labels[idx].strip() for idx in range(len(object_labels))}

# Map all semantic labels to old category ids
name_to_oldid = {name: oldid for oldid, name in oldid_to_label.items()}

# Store all the kept old_ids in one place
keep_oldids = [oldid for oldid in oldid_to_label
                   if object_labels[oldid] in keep_categories]

# Map old category ids to new category ids
old2new = {old_id: (new_id + 1) for (new_id, old_id) in enumerate(keep_oldids)}

# Map new category ids to labels
newid_to_label = {new_id: oldid_to_label[old_id] for old_id, new_id in old2new.items()}
newid_to_label[0] = "background/ignore"

## Build PASCAL Directory Structure
This is the desired directory structure
```
.
└── VOCdevkit
    └── VOC2012
        ├── Annotations
        ├── ImageSets
        │   ├── Action
        │   ├── Layout
        │   ├── Main
        │   └── Segmentation
        ├── JPEGImages
        ├── SegmentationClass
        └── SegmentationObject
```

In [18]:
assert len(keep_categories) == (len(newid_to_label) - 1)

In [19]:
# Create the same directory structure as PASCAL VOC 2012.
VOC_DIR = os.path.join(DEST_DIR, 'VOCdevkit', 'VOC2012')

# Create the destination image and segmentation directory if it
# doesn't exist.
DST_IMG_DIR = os.path.join(VOC_DIR, 'JPEGImages')
DST_ANNOT_DIR = os.path.join(VOC_DIR, 'SegmentationClass')
DST_IMGSET_DIR = os.path.join(VOC_DIR, 'ImageSets')
DST_IMGSET_SEG_DIR = os.path.join(DST_IMGSET_DIR, 'Segmentation')

Path(DST_IMG_DIR).mkdir(parents=True, exist_ok=True)
Path(DST_ANNOT_DIR).mkdir(parents=True, exist_ok=True)
Path(DST_IMGSET_DIR).mkdir(parents=True, exist_ok=True)

# Create the other directories that PASCAL uses, even if we
# leave them empty.
Path(os.path.join(VOC_DIR, 'Annotations')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(VOC_DIR, 'SegmentationObject')).mkdir(parents=True, exist_ok=True)

# These directories are all under the ImageSets Directory
Path(os.path.join(DST_IMGSET_DIR, 'Action')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(DST_IMGSET_DIR, 'Layout')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(DST_IMGSET_DIR, 'Main')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(DST_IMGSET_DIR, 'Segmentation')).mkdir(parents=True, exist_ok=True)

## Labelmap
Prints out the labelmap, also saves it to the `DEST_DIR`.

In [20]:
labelmap = {v: k for (k, v) in newid_to_label.items()}
labelmap_filepath = os.path.join(DEST_DIR, "labelmap.json")

print(f"Here is the label mapping: \n{json.dumps(labelmap, indent=4)}")
print(f"Saving to {labelmap_filepath}")

with open(labelmap_filepath, "w") as wf:
    json.dump(labelmap, wf, indent=4)

Here is the label mapping: 
{
    "airplane, aeroplane, plane": 1,
    "animal, animate being, beast, brute, creature, fauna": 2,
    "apparel, wearing apparel, dress, clothes": 3,
    "arcade machine": 4,
    "armchair": 5,
    "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin": 6,
    "awning, sunshade, sunblind": 7,
    "bag": 8,
    "ball": 9,
    "bannister, banister, balustrade, balusters, handrail": 10,
    "bar": 11,
    "barrel, cask": 12,
    "base, pedestal, stand": 13,
    "baseboard, mopboard, skirting board": 14,
    "basket, handbasket": 15,
    "bathtub, bathing tub, bath, tub": 16,
    "bed": 17,
    "bench": 18,
    "bicycle, bike, wheel, cycle": 19,
    "blanket, cover": 20,
    "blind, screen": 21,
    "boat": 22,
    "book": 23,
    "bookcase": 24,
    "booth, cubicle, stall, kiosk": 25,
    "bottle": 26,
    "box": 27,
    "bridge, span": 28,
    "buffet, counter, sideboard": 29,
    "building, edifice": 30,
    

## Create Helpers for Processing Each Image

In [21]:
def is_sparse_mask(mask, threshold=.70):
    """
    This informs us if a mask has too many "background/ignore" pixels.
    
    If more than the threshold ratio of the image has the semantic label
    '0', then we consider the mask to be sparse. 
    
    Args:
        :mask: - a 1-channel numpy array
        :threshold: - a value from 0 - 1.0
    
    Returns:
        :is_sparse: - True if the mask is more than threshold percent
            background pixels, false otherwise
    """
    h, w = mask.shape
    n_pixels = h * w
    n_zeros = n_pixels - np.count_nonzero(mask)
    ratio = n_zeros / n_pixels
    is_sparse = (ratio >= threshold)
    
    return is_sparse

In [22]:
def process_image(seg_mask):
    """
    Take as input a segmentation mask from ADE20k dataset,
    return a new segmentation mask with everything but the 
    categories of interest removed.
    """
    # Create a new one-channel image mask
    h, w, _ = seg_mask.shape
    new_seg_mask = np.zeros((h, w)).astype(np.uint8)
    green_channel_vals = np.unique(seg_mask[:, :, 1])
    
    for green_channel_val in green_channel_vals:
        if green_channel_val == 0:
            continue
        
        pixels = (seg_mask[:, :, 1] == green_channel_val)
        instance_idx = green_channel_val
        red_channel_val = seg_mask[pixels][0, 0]
        category_id = red_channel_val // 10 * 256 + instance_idx - 1
        category = oldid_to_label[category_id]
        
        # If we're not keeping this category in new dataset, discard it.
        if category_id not in old2new:
            continue

        newid = old2new[category_id]
        color = newid  # The color is the newid itself.
        new_seg_mask[pixels] = color
        
    # Convert from float32 to uint8
    new_annot_uint8 = img_as_ubyte(new_seg_mask)
    unique_labels = np.unique(new_annot_uint8).tolist()
    return new_annot_uint8, unique_labels

In [23]:
def handle_partition(partition, samples_fname='train.txt'):
    """
    Given a partition name, this helper takes care of
    processing all image masks in the partition and saving
    to the appropriate directory.
    """
    PARTITION = partition
    PARTITION_DIR = os.path.join(TMP_DIR, PARTITION)
    IMG_DIR = os.path.join(PARTITION_DIR, "images")
    ANNOT_DIR = os.path.join(PARTITION_DIR, "annotations")
    
    # Load all metadata from the flattened dataset we created.
    with open(os.path.join(TMP_DIR, "ade20k.json"), 'r') as rf:
        dataset = json.load(rf)
                
    kept_samples = []
    all_unique_labels = set()
    
    print(f"Processing {PARTITION} partition")
    for i, img_metadata in enumerate(dataset[PARTITION]):
        if (i+1) % 500 == 0:
            print(f"Processing {i+1} of {len(dataset[PARTITION])}")
            
        img_name = os.path.join(IMG_DIR, img_metadata['img_name'])
        seg_name = os.path.join(ANNOT_DIR, img_metadata['seg_name'])

        annot_img = imread(seg_name)
        new_annot_img, unique_labels = process_image(annot_img)
        
        # If mask is 70% or more of background, don't keep this sample
        if is_sparse_mask(new_annot_img, threshold=0.70):
            continue
        
        # Update tracking of all unique labels
        all_unique_labels.update(unique_labels)        
            
        # Keep the image. Add to the list of images in our dataset.
        sample_name = os.path.basename(img_name).replace(".jpg", "")
        kept_samples.append(sample_name)
        
        # Save the annotation and the new segmentation in appropriate
        # directories.
        new_img_name = os.path.join(DST_IMG_DIR, f'{sample_name}.jpg')
        new_seg_name = os.path.join(DST_ANNOT_DIR, f'{sample_name}.png')
        
        shutil.copy(img_name, new_img_name)
        imsave(new_seg_name, new_annot_img)
        

    # Store the list of sample names in this partition
    samples_filepath = os.path.join(DST_IMGSET_SEG_DIR, samples_fname)
    print(f"Saving to {samples_filepath}")
    
    with open(samples_filepath, 'w') as wf:
        for sample_name in kept_samples:
            wf.write(sample_name)
            wf.write("\n")
        
    print(f"Completed {PARTITION} partition")
    print(f"Kept a total of {len(kept_samples)} of the {len(dataset[PARTITION])} images.")
    
    return all_unique_labels

In [24]:
%%time
partition = "validation"
all_unique_labels = handle_partition(partition, samples_fname="val.txt")

Processing validation partition
Processing 500 of 503
Saving to /home/mxs8x15/datasets/subset_ade20k_baseboards_v2/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt
Completed validation partition
Kept a total of 503 of the 503 images.
CPU times: user 2min 38s, sys: 1.49 s, total: 2min 40s
Wall time: 2min 40s


In [25]:
print(f"Unique labels: {len(all_unique_labels)} - {all_unique_labels}")
print(f"Unique labels: {[newid_to_label[label] for label in all_unique_labels]}")

Unique labels: 112 - {0, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 60, 61, 62, 63, 64, 66, 68, 70, 71, 75, 77, 78, 80, 81, 82, 83, 84, 85, 87, 88, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 103, 104, 107, 109, 110, 111, 113, 114, 116, 117, 118, 119, 120, 122, 123, 124, 125, 126, 127, 128, 131, 132, 134, 136, 137, 140, 141, 142, 145, 146, 147, 148, 149, 151}
Unique labels: ['background/ignore', 'animal, animate being, beast, brute, creature, fauna', 'apparel, wearing apparel, dress, clothes', 'arcade machine', 'armchair', 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin', 'bag', 'ball', 'bannister, banister, balustrade, balusters, handrail', 'bar', 'base, pedestal, stand', 'baseboard, mopboard, skirting board', 'basket, handbasket', 'bathtub, bathing tub, bath, tub', 'bed', 'bench', 'blanket, cover'

In [26]:
%%time
partition = "training"
all_unique_labels = handle_partition(partition, samples_fname="train.txt")

Processing training partition
Processing 500 of 5029
Processing 1000 of 5029
Processing 1500 of 5029
Processing 2000 of 5029
Processing 2500 of 5029
Processing 3000 of 5029
Processing 3500 of 5029
Processing 4000 of 5029
Processing 4500 of 5029
Processing 5000 of 5029
Saving to /home/mxs8x15/datasets/subset_ade20k_baseboards_v2/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt
Completed training partition
Kept a total of 5026 of the 5029 images.
CPU times: user 25min 3s, sys: 16.2 s, total: 25min 19s
Wall time: 25min 21s
