# Segmentation Model (Version 1)


Steps:
* Import Libraries + other dependencies
* Load Image from URL
* Load pre-trained model
* Run inference on images
* Extract inferences labels (From panoptic inferences)
* Save inference masks (From instance inferences)

Citation:
@misc{wu2019detectron2,
  author =       {Yuxin Wu and Alexander Kirillov and Francisco Massa and
                  Wan-Yen Lo and Ross Girshick},
  title =        {Detectron2},
  howpublished = {\url{https://github.com/facebookresearch/detectron2}},
  year =         {2019}
}


In [1]:
# Install Torch and Torchvisions
# -> Make sure to use correct version for instance used

# Install detectron2
# -> Make sure to use correct version for instance
# -> See https://detectron2.readthedocs.io/tutorials/install.html for instructions

# pip install pyyaml==5.1    #or => pip3 install pyyaml==5.1
# pip install pycocotools    #or => pip3 install pycocotools
# pip install opencv-python  #or => pip3 install opencv-python


# Import libraries and Load Dependencies:

In [2]:
import torch, torchvision
print("Torch Version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

Torch Version: 1.5.1
CUDA available: False


In [3]:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, DatasetCatalog

# import common libraries
import os, json, random
import cv2
import numpy as np
import requests
from requests.exceptions import HTTPError, Timeout

# Load test Images:

In [4]:
def httpGetRequest(url):
    # Use requests to issue a standard HTTP GET 
    try:
        result = requests.get(url ,timeout=15)
        # raise_for_status will throw an exception if an HTTP error
        result.raise_for_status
        print(result)
        return result
    except HTTPError as err:
        print("Error: {0}".format(err))
    except Timeout as err:
        print("Request time out {0}".format(err))

In [5]:
# load image
image_response = httpGetRequest("https://storage.googleapis.com/segmentation-testing/testing_images1/bikes.jpeg")
# get image as numpy array
image_NumpyArray = np.frombuffer(image_response.content, np.uint8)
image = cv2.imdecode(image_NumpyArray, cv2.IMREAD_COLOR)

<Response [200]>


# LOAD PRE-TRAINED MODEL:

Then, we create a detectron2 config and a detectron2 `DefaultPredictor` to run inference on this image.

* Use cfg.MODEL.DEVICE='cpu' only if you don't have a GPU.

In [6]:
# ---------- build model: ----------
cfg = get_cfg()
# add config 
cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml"))
# set threshold for this model; means that inference has to be greater than 50%
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  
# Find a model weights
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml")
# ***Add line below only if inference will be made on a CPU:
cfg.MODEL.DEVICE='cpu'
#run inference against predictor:
predictor = DefaultPredictor(cfg)

# Make predictions / inferences to images:


In [8]:
# takes about 10-15 seconds on CPU
outputs = predictor(image)

In [10]:
# output values needed to return 
print(">>> Instance segmentation classes prediction:\n", outputs["instances"].pred_classes)
print(">>> Instance segmentation mask prediction:\n", outputs["instances"].pred_masks)
print(">>> Panoptic segmentation mask prediction:\n", outputs["panoptic_seg"][1])

>>> Instance segmentation classes prediction:
 tensor([ 0,  0,  1,  1, 39])
>>> Instance segmentation mask prediction:
 tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False

# Extract labels (Panoptic Segmentation Inference):

* Get the image labels from the panoptic segmentation prediction

In [39]:
# load classes
thing_classes=['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

stuff_classes=['things', 'banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform', 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water', 'window-blind', 'window', 'tree', 'fence', 'ceiling', 'sky', 'cabinet', 'table', 'floor', 'pavement', 'mountain', 'grass', 'dirt', 'paper', 'food', 'building', 'rock', 'wall', 'rug']

In [40]:
#Get class ID (category_id) and the class it belongs to (bool)
classID = list()
for i in range(len(outputs["panoptic_seg"][1])):

    classID.append([outputs["panoptic_seg"][1][i]['category_id'], outputs["panoptic_seg"][1][i]['isthing']])
    print(outputs["panoptic_seg"][1][i])

print("")
print(classID)
print("")

{'id': 1, 'isthing': True, 'score': 0.999495267868042, 'category_id': 0, 'instance_id': 0}
{'id': 2, 'isthing': True, 'score': 0.9988834261894226, 'category_id': 0, 'instance_id': 1}
{'id': 3, 'isthing': True, 'score': 0.9974486231803894, 'category_id': 1, 'instance_id': 2}
{'id': 4, 'isthing': True, 'score': 0.9939423203468323, 'category_id': 1, 'instance_id': 3}
{'id': 5, 'isthing': False, 'category_id': 21, 'area': 122946}
{'id': 6, 'isthing': False, 'category_id': 23, 'area': 81396}
{'id': 7, 'isthing': False, 'category_id': 24, 'area': 34879}
{'id': 8, 'isthing': False, 'category_id': 37, 'area': 6769}
{'id': 9, 'isthing': False, 'category_id': 40, 'area': 281030}

[[0, True], [0, True], [1, True], [1, True], [21, False], [23, False], [24, False], [37, False], [40, False]]



In [57]:
# display labels extracted
# TODO: Labels might need to be a dictionary to be the corresponding value of an imageID key
labels_things= list()
labels_stuff = list()
for i in range(len(classID)):
    if classID[i][1] == True:
        # print(thing_classes[classID[i][0]])
        labels_things.append(thing_classes[classID[i][0]])
    else:
        # print(stuff_classes[classID[i][0]])
        labels_stuff.append(stuff_classes[classID[i][0]])

print(">>>things class:", labels_things)
print(">>>stuff class:", labels_stuff) # <--- we might only need the this result as input for the GAN models

>>>things class: ['person', 'person', 'bicycle', 'bicycle']
>>>stuff class: ['road', 'sand', 'sea', 'tree', 'sky']


# Save Masks (Instance segmentation):

* Get the masks from the Instance segmentation prediction
* We are using these masks because overlap which is what we need as inputs the GAN model

In [47]:
# convert Torch Tensors to a NumPy Arrays
# print(outputs["instances"].pred_classes.numpy())
# print(outputs["instances"].pred_masks.numpy())

# store numpy arrays in for mask class and mask
maskClass = outputs["instances"].pred_classes.numpy()
masksInferences =outputs["instances"].pred_masks.numpy()

In [48]:
#Get class ID (category_id) and the class it belongs to (bool)
maskClassID = list()
for i in range(len(maskClass)):
    # print(maskClass[i])
    maskClassID.append(maskClass[i])
    
print(maskClassID)

[0, 0, 1, 1, 39]


In [49]:
# display labels extracted
masksInferences
# TODO: might need to store as dictionaries and then convert to json maybe (depends on database)
maskInference_labels = list()
for i in range(len(maskClassID)):
    # print(thing_classes[maskClassID[i]])
    maskInference_labels.append(thing_classes[maskClassID[i]])

print(maskInference_labels) #notice how instance seg classified one/more things than the panoptic 

['person', 'person', 'bicycle', 'bicycle', 'bottle']


In [45]:
# TODO: Find out we will store the mask which is a multidimensial numpy array (Database???)
# print(len(masksInferences))
for i in range(0, len(masksInferences)):
    print("---------------------------")
    print("Label:", maskInference_labels[i])
    print(masksInferences[i])
    

---------------------------
Label: person
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
---------------------------
Label: person
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
---------------------------
Label: bicycle
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
---------------------------
Label: bicycle
[[False False False ... False False Fa