# OpenImage Challenge 2019

## training pspnet in gluoncv

files (N is [0-9A-F]):

BASE_DIR/classes-segmentation.txt   …download from https://storage.googleapis.com/openimages/v5/classes-segmentation.txt

BASE_DIR/train-images-N/*.jpg   …training image from s3://open-images-dataset/tar/train_N.tar.gz

BASE_DIR/mask-images-N/*.png   …mask image from https://storage.googleapis.com/openimages/v5/train-masks/train-masks-N.zip

TEST_DIR/*.jpg   …test image for prediction

temporary directory:

TEMP_DIR/join-masks-N/
TEMP_DIR/output-images/

In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import mxnet as mx
from tqdm import tqdm_notebook as tqdm

In [2]:
TEMP_DIR = '../'
BASE_DIR = '../'
TEST_DIR = BASE_DIR+'/test-images'
with open(BASE_DIR+"classes-segmentation.txt") as f:
    CLASSES = [c.strip() for c in f.readlines()]
CLASSES = ["__background__"] + CLASSES
NUM_CLASS = len(CLASSES)
CLOP_SIZE = 480
BASE_SIZE = 520

In [3]:
BATCH_SIZE = 5
NUM_WORKERS = 2
NUM_EPOCHS = 1
NUM_GPUS = 1
USING_SPLITS = ["z"]

In [4]:
if not os.path.isdir(TEMP_DIR):
    os.mkdir(TEMP_DIR)
    os.mkdir(TEMP_DIR+"join-masks-N/")
    os.mkdir(TEMP_DIR+"output-images/")

In [5]:
def _mask_filepart_classname(name):
    if name.startswith("m"):
        return "/m/" + name[1:]
    return name

In [6]:
import math
def _create_resize_image(img, ismask=False, tosize=None):
    long_side_size = BASE_SIZE * 2
    if img.height < img.width:
        scale = img.width / long_side_size
        size = (long_side_size, max(BASE_SIZE,math.ceil(img.height / scale)))
    else:
        scale = img.height / long_side_size
        size = (max(BASE_SIZE,math.ceil(img.width / scale)), long_side_size)
    return img.resize(size if tosize is None else tosize, Image.NEAREST if ismask else Image.BILINEAR)

In [7]:
def _make_openimage2019_mask(split_name):
    img_paths = []
    mask_paths = []
    img_folder = os.path.join(BASE_DIR, 'train-images-'+split_name)
    mask_folder = os.path.join(BASE_DIR, 'mask-images-'+split_name)
    join_folder = os.path.join(TEMP_DIR, 'join-masks-'+split_name)
    image_mask = {}
    for filename in tqdm(os.listdir(mask_folder)):
        basename, _ = os.path.splitext(filename)
        maskname = basename.split("_")
        if filename.endswith(".png"):
            imgpath = os.path.join(img_folder, filename)
            imagename = maskname[0] + '.jpg'
            imagepath = os.path.join(img_folder, imagename)
            if os.path.isfile(imagepath):
                if imagepath not in image_mask:
                    image_mask[imagename] = [filename]
                else:
                    image_mask[imagename].append(filename)
            else:
                print('cannot find the image:', imagepath)

    for imagename, masknames in tqdm(image_mask.items()):
        imgpath = os.path.join(img_folder, imagename)
        img = _create_resize_image(Image.open(imgpath)).convert('RGB')
        mask = np.zeros((img.height,img.width), dtype=np.int32)
        for filename in masknames:
            basename, _ = os.path.splitext(filename)
            maskname = basename.split("_")
            maskpath = os.path.join(mask_folder, filename)
            maskflag = _create_resize_image(Image.open(maskpath), ismask=True, tosize=(img.width,img.height)).convert('1')
            maskclass = _mask_filepart_classname(maskname[1])
            if maskclass in CLASSES:
                mask[maskflag] = CLASSES.index(maskclass)

        basename, _ = os.path.splitext(imagename)
        joinpath = os.path.join(join_folder, basename)
        np.save(joinpath, mask)

In [8]:
for z in USING_SPLITS:
    _make_openimage2019_mask(z)

HBox(children=(IntProgress(value=0, max=2000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=2000), HTML(value='')))




In [9]:
from gluoncv.data.segbase import SegmentationDataset
import random
class OpenImage2019Segmentation(SegmentationDataset):
    def __init__(self, splits=USING_SPLITS, **kwargs):
        super(OpenImage2019Segmentation, self).__init__(BASE_DIR, split='train', mode=None, transform=None, **kwargs)
        self.images, self.masks = [], []
        for split in splits:
            image, mask = _get_openimage2019_pairs(split)
            self.images.extend(image)
            self.masks.extend(mask)
            
    def __transform(self, img, mask):
        # random mirror
        if random.random() < 0.5:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            mask = mask[:, ::-1]
        img, mask = self._img_transform(img), self._mask_transform(mask)
        return img, mask

    def __getitem__(self, index):
        img = _create_resize_image(Image.open(self.images[index])).convert('RGB')
        cx = random.randint(0, img.width - CLOP_SIZE)
        cy = random.randint(0, img.height - CLOP_SIZE)
        mask = np.load(self.masks[index])
        img, mask = self.__transform(img, mask)
        return img[cy:cy+CLOP_SIZE,cx:cx+CLOP_SIZE,:].transpose((2,0,1)), mask[cy:cy+CLOP_SIZE,cx:cx+CLOP_SIZE]

    def __len__(self):
        return len(self.images)

    @property
    def classes(self):
        return CLASSES

    @property
    def pred_offset(self):
        return 1


def _get_openimage2019_pairs(split_name):
    img_paths = []
    join_paths = []
    img_folder = os.path.join(BASE_DIR, 'train-images-'+split_name)
    join_folder = os.path.join(TEMP_DIR, 'join-masks-'+split_name)
    for filename in os.listdir(img_folder):
        basename, _ = os.path.splitext(filename)
        if filename.endswith(".jpg"):
            imgpath = os.path.join(img_folder, filename)
            joinname = basename + '.npy'
            joinpath = os.path.join(join_folder, joinname)
            if os.path.isfile(joinpath):
                img_paths.append(imgpath)
                join_paths.append(joinpath)
            else:
                print('cannot find the mask:', maskpath)

    return img_paths, join_paths

In [10]:
from mxnet import gluon, autograd
from mxnet.gluon.data.vision import transforms

import gluoncv
from gluoncv.loss import *
from gluoncv.utils import LRScheduler
from gluoncv.model_zoo.segbase import *
from gluoncv.model_zoo import get_model
from gluoncv.utils.parallel import *
from gluoncv.data import get_segmentation_dataset
from gluoncv.model_zoo.pspnet import PSPNet

In [11]:
class Trainer(object):
    def __init__(self):
        ctx_list = [mx.gpu(i) for i in range(NUM_GPUS)]
        trainset = OpenImage2019Segmentation()
        self.train_data = gluon.data.DataLoader(
            trainset, BATCH_SIZE, shuffle=True, last_batch='rollover', num_workers=NUM_WORKERS)
        
        model = PSPNet(NUM_CLASS, backbone='resnet50', pretrained_base=True, ctx=ctx_list)
        model.cast("float16")
        
        self.net = DataParallelModel(model, ctx_list)

        criterion = MixSoftmaxCrossEntropyLoss(aux=True, mixup=False, aux_weight=0.2)
        self.criterion = DataParallelCriterion(criterion, ctx_list, False)
        
        self.lr_scheduler = LRScheduler(mode='poly', base_lr=1e-3,
                                        nepochs=NUM_EPOCHS,
                                        iters_per_epoch=len(self.train_data),
                                        power=0.9)
        kv = mx.kv.create('device')
        optimizer_params = {'lr_scheduler': self.lr_scheduler,
                            'wd':0.0001,
                            'momentum': 0.9,
                            'learning_rate': 1e-3,
                            'multi_precision': True
                           }

        self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd',
                                       optimizer_params, kvstore = kv)

    def training(self, epoch):
        tbar = tqdm(self.train_data)
        train_loss = 0.0
        alpha = 0.2
        for i, (data, target) in enumerate(tbar):
            with autograd.record(True):
                outputs = self.net(data.astype("float16", copy=False))
                losses = self.criterion(outputs, target)
                mx.nd.waitall()
                autograd.backward(losses)
            self.optimizer.step(BATCH_SIZE)
            for loss in losses:
                train_loss += np.mean(loss.asnumpy()) / len(losses)
            tbar.set_description('Epoch %d, training loss %.3f'%\
                (epoch, train_loss/(i+1)))
            mx.nd.waitall()

        return self.net.module
    

In [12]:
trainer = Trainer()
for epoch in range(NUM_EPOCHS):
    net = trainer.training(epoch)
    net.save_parameters("checkpoint_%d"%epoch)

self.crop_size 480


HBox(children=(IntProgress(value=0, max=400), HTML(value='')))




In [13]:
import base64
import numpy as np
from pycocotools import _mask as coco_mask
import typing as t
import zlib

def encode_binary_mask(mask: np.ndarray) -> t.Text:
    """Converts a binary mask into OID challenge encoding ascii text."""

    # check input mask --
    if mask.dtype != np.bool:
        raise ValueError("encode_binary_mask expects a binary mask, received dtype == %s" % mask.dtype)

    mask = np.squeeze(mask)
    if len(mask.shape) != 2:
        raise ValueError("encode_binary_mask expects a 2d mask, received shape == %s" % mask.shape)

    # convert input mask to expected COCO API input --
    mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
    mask_to_encode = mask_to_encode.astype(np.uint8)
    mask_to_encode = np.asfortranarray(mask_to_encode)

    # RLE encode mask --
    encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

    # compress and base64 encoding --
    binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
    base64_str = base64.b64encode(binary_str)
    return base64_str

In [14]:
test_images = [filename for filename in os.listdir(TEST_DIR)]

output_folder = os.path.join(TEMP_DIR, 'output-images')

for filename in tqdm(test_images):
    test_filename = os.path.join(TEST_DIR, filename)
    basename, _ = os.path.splitext(filename)
    org_img = Image.open(test_filename)
    img = _create_resize_image(org_img).convert('RGB')
    data = np.array(img).transpose((2,0,1))
    indata = np.zeros((4,3,CLOP_SIZE,CLOP_SIZE), dtype=np.float16)
    indata[0] = data[:,0:CLOP_SIZE,0:CLOP_SIZE]
    indata[1] = data[:,data.shape[1]-CLOP_SIZE:data.shape[1],0:CLOP_SIZE]
    indata[2] = data[:,0:CLOP_SIZE,data.shape[2]-CLOP_SIZE:data.shape[2]]
    indata[3] = data[:,data.shape[1]-CLOP_SIZE:data.shape[1],data.shape[2]-CLOP_SIZE:data.shape[2]]
    with mx.Context(mx.gpu()):  
        indata = mx.nd.array(indata).astype("float16")
    out = net(indata)
    outdata = np.zeros((NUM_CLASS,img.height,img.width))
    outdata[:,0:CLOP_SIZE,0:CLOP_SIZE] = out[0].asnumpy()[0]
    outdata[:,data.shape[1]-CLOP_SIZE:data.shape[1],0:CLOP_SIZE] = out[0].asnumpy()[1]
    outdata[:,0:CLOP_SIZE,data.shape[2]-CLOP_SIZE:data.shape[2]] = out[0].asnumpy()[2]
    outdata[:,data.shape[1]-CLOP_SIZE:data.shape[1],data.shape[2]-CLOP_SIZE:data.shape[2]] = out[0].asnumpy()[3]
    outputpath = os.path.join(output_folder, basename)
    np.save(outputpath, outdata)

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))




In [19]:
empty_submission_df = pd.DataFrame({"ImageID":[filename.split(".")[0] for filename in os.listdir(TEST_DIR)]})
#empty_submission_df = pd.read_csv("../input/open-images-2019-instance-segmentation/sample_empty_submission.csv")
ImageID_list = []
ImageWidth_list = []
ImageHeight_list = []
PredictionString_list = []

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

for num, row in tqdm(empty_submission_df.iterrows(), total=len(empty_submission_df)):
    filename = row["ImageID"] + ".jpg"
    test_filename = os.path.join(TEST_DIR, filename)
    org_img = Image.open(test_filename)
    
    filename = row["ImageID"] + ".npy"
    outputpath = os.path.join(output_folder, filename)
    output = np.load(outputpath)
    img_out = output.argmax(axis=0)
    
    PredictionString = ""
        
    for i in range(1,NUM_CLASS,1):        
        class_id = i
        confidence = 1.0
        
        pred_mask = (img_out == i)
        if pred_mask.sum() == 0:
            continue
        bin_img = output[i,:,:][pred_mask]
        confidence = sigmoid(bin_img.mean())
        
        pred_mask = Image.fromarray(pred_mask.astype(np.uint8))
        pred_mask_sized = pred_mask.resize((org_img.width,org_img.height), Image.NEAREST)
        pred_mask_sized = np.array(pred_mask_sized) != 0
        
        if confidence > 0.5:
            encoded_mask = encode_binary_mask(pred_mask_sized)
            encoded_label = CLASSES[i]

            PredictionString += encoded_label 
            PredictionString += " "
            PredictionString += str(confidence)
            PredictionString += " "
            PredictionString += encoded_mask.decode()
            PredictionString += " "
        
    ImageID_list.append(row["ImageID"])
    ImageWidth_list.append(org_img.width)
    ImageHeight_list.append(org_img.height)
    PredictionString_list.append(PredictionString)

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))




In [20]:
results=pd.DataFrame({"ImageID":ImageID_list,
                      "ImageWidth":ImageWidth_list,
                      "ImageHeight":ImageHeight_list,
                      "PredictionString":PredictionString_list
                     })

In [21]:
results.head()

Unnamed: 0,ImageID,ImageWidth,ImageHeight,PredictionString
0,a806afcdc63693e3,1600,1200,
1,a806afcdc63693e9,1600,1200,
2,a806afcdc63693e1,1600,1200,
3,a806afcdc63693e2,1600,1200,


In [22]:
results.to_csv("submission007.csv", index=False)