# Thanks
Thanks to @Draconda, we found that the prior mask information is very effective for this task. On the one hand, we can use mask to directly regress values of yaw, pitch, roll, x, y, z, and mask as what @Draconda told. On the other hand, we can also concanate the mask information with the original image and send them to any network structure you currently design for prediction. Here, we share a simpler way to get accurate masks using detectron2

## Requirements for Detectron2
* Python ≥ 3.6
* PyTorch ≥ 1.3
* torchvision that matches the PyTorch installation. You can install them together at pytorch.org to make sure of this.
* OpenCV, optional, needed by demo and visualization
* pycocotools: pip install cython; pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
* gcc & g++ ≥ 4.9

In [None]:
pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [None]:
pip install 'git+https://github.com/facebookresearch/detectron2.git'

## Visualization of masks

In [None]:
import torch

from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor

if torch.cuda.is_available():
    device = torch.device("cuda:{}".format(0))
else:
    device = torch.device("cpu")

print("-> Loading model")
cfg = get_cfg()
cfg.merge_from_file("../input/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")

cfg.MODEL.DEVICE = str(device)
cfg.MODEL.RPN.NMS_THRESH = 0.1
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

cfg.MODEL.WEIGHTS = "../input/parameters/model_final_f10217.pkl"

model = DefaultPredictor(cfg)

In [None]:
import PIL.Image as Image

from torchvision import transforms

default_transform = transforms.Compose([transforms.ToTensor()])

def load_image(path, transform=default_transform):
    image = Image.open(path)
    return transform(image)

In [None]:
image_path = '../input/pku-autonomous-driving/train_images/ID_7f6f07350.jpg'

In [None]:
import cv2

image = cv2.imread(image_path)
outputs = model(image)

In [None]:
from detectron2.data import MetadataCatalog
from matplotlib import pyplot as plt

from detectron2.utils.visualizer import ColorMode
from detectron2.utils.visualizer import Visualizer

v = Visualizer(image[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE_BW)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
v = v.get_image()[:, :, ::-1]

plt.imshow(v)
plt.show()

## Only need one-channel mask
Some kagglers find that the npy file is large. This is because the shape of the mask I store is the number of instances * image size. If you only care about the binary result of instance-background, you can do max pooling:

In [None]:
mask = outputs["instances"].pred_masks.sum(0) > 0

## Only need instances
Some kaggler only want to use the detected instances as input to the model (without concatenating it with the original image), you can use opencv to crop:

In [None]:
import numpy as np

mask = torch.stack([mask, mask, mask], dim=2)
mask = mask.cpu().numpy().astype("uint8")

instances = cv2.multiply(image, mask)
plt.imshow(instances)
plt.show()

## Make mask predictions

In [None]:
import os
import cv2
import pdb
import glob
import argparse

import numpy as np

In [None]:
def make_multi_channel_masks(source_dir='../input/pku-autonomous-driving/train_images',
                dist_dir='../input/pku-autonomous-driving/train_images_mask',
                ext='jpg'):
    """Function to predict for a single image or folder of images
    """

    # FINDING INPUT IMAGES
    if os.path.isdir(source_dir):
        # Searching folder for images
        paths = glob.glob(os.path.join(source_dir, '*.{}'.format(ext)))
        output_directory = dist_dir
    else:
        raise Exception("Can not find source_dir: {}".format(source_dir))

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        
    print("-> Predicting on {:d} test images".format(len(paths)))

    for idx, image_path in enumerate(paths):
        image = cv2.imread(image_path)
        outputs = model(image)

        output_name = os.path.splitext(os.path.basename(image_path))[0]
        name_dest_npy = os.path.join(output_directory, "{}.npy".format(output_name))
        mask = outputs['instances'].pred_masks.cpu().numpy()
        np.save(name_dest_npy, mask)

        print("   Processed {:d} of {:d} images - saved prediction to {}".format(
                idx + 1, len(paths), name_dest_npy))

    print('-> Done!')

In [None]:
def make_single_channel_masks(source_dir='../input/pku-autonomous-driving/train_images',
                dist_dir='../input/pku-autonomous-driving/train_images_mask',
                ext='jpg'):
    """Function to predict for a single image or folder of images
    """

    # FINDING INPUT IMAGES
    if os.path.isdir(source_dir):
        # Searching folder for images
        paths = glob.glob(os.path.join(source_dir, '*.{}'.format(ext)))
        output_directory = dist_dir
    else:
        raise Exception("Can not find source_dir: {}".format(source_dir))

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        
    print("-> Predicting on {:d} test images".format(len(paths)))

    for idx, image_path in enumerate(paths):
        image = cv2.imread(image_path)
        outputs = model(image)

        output_name = os.path.splitext(os.path.basename(image_path))[0]
        name_dest_npy = os.path.join(output_directory, "{}.npy".format(output_name))
        mask = outputs["instances"].pred_masks.sum(0) > 0
        mask = mask.float().unsqueeze(0)
        mask = mask.cpu().numpy()
        np.save(name_dest_npy, mask)

        print("   Processed {:d} of {:d} images - saved prediction to {}".format(
                idx + 1, len(paths), name_dest_npy))

    print('-> Done!')

In [None]:
def make_instances(source_dir='../input/pku-autonomous-driving/train_images',
                dist_dir='../input/pku-autonomous-driving/train_images_mask',
                ext='jpg'):
    """Function to predict for a single image or folder of images
    """

    # FINDING INPUT IMAGES
    if os.path.isdir(source_dir):
        # Searching folder for images
        paths = glob.glob(os.path.join(source_dir, '*.{}'.format(ext)))
        output_directory = dist_dir
    else:
        raise Exception("Can not find source_dir: {}".format(source_dir))

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        
    print("-> Predicting on {:d} test images".format(len(paths)))

    for idx, image_path in enumerate(paths):
        image = cv2.imread(image_path)
        outputs = model(image)

        output_name = os.path.splitext(os.path.basename(image_path))[0]
        name_dest_jpg = os.path.join(output_directory, "{}.jpg".format(output_name))
        mask = outputs["instances"].pred_masks.sum(0) > 0
        mask = torch.stack([mask, mask, mask], dim=2)
        mask = mask.cpu().numpy().astype("uint8")

        instances = cv2.multiply(image, mask)
        cv2.imwrite(name_dest_jpg, instances)

        print("   Processed {:d} of {:d} images - saved prediction to {}".format(
                idx + 1, len(paths), name_dest_jpg))

    print('-> Done!')

Choose a generator based on your needs:

In [None]:
# make_multi_channel_masks()
# make_single_channel_masks()
# make_instances()