<a href="https://colab.research.google.com/github/komazawa-deep-learning/komazawa-deep-learning.github.io/blob/master/2021notebooks/2021_1126semantic_segmentation_pytorch_deeplabv3_resnet50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image

import torch
import torchvision
import torchvision.transforms as transforms
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# DeepLabV3 モデルの訓練済モデルをダウンロード
model = torchvision.models.segmentation.deeplabv3_resnet50(pretrained=True)

In [None]:
# セマンティックセグメンテーションに用いる，色分け地図の定義
label_map = [
             (0, 0, 0),  # background
             (128, 0, 0), # aeroplane
             (0, 128, 0), # bicycle
             (128, 128, 0), # bird
             (0, 0, 128), # boat
             (128, 0, 128), # bottle
             (0, 128, 128), # bus 
             (128, 128, 128), # car
             (64, 0, 0), # cat
             (192, 0, 0), # chair
             (64, 128, 0), # cow
             (192, 128, 0), # dining table
             (64, 0, 128), # dog
             (192, 0, 128), # horse
             (64, 128, 128), # motorbike
             (192, 128, 128), # person
             (0, 64, 0), # potted plant
             (128, 64, 0), # sheep
             (0, 192, 0), # sofa
             (128, 192, 0), # train
             (0, 64, 128) # tv/monitor
             ]


In [None]:
# 画像から平均を引いて，標準偏差で割るための定数を定義
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


def get_segment_labels(image, model, device):
    image = transform(image).to(device)
    image = image.unsqueeze(0) # add a batch dimension
    outputs = model(image)
    return outputs

def draw_segmentation_map(outputs):
    labels = torch.argmax(outputs.squeeze(), dim=0).detach().cpu().numpy()

    # create Numpy arrays containing zeros
    # later to be used to fill them with respective red, green, and blue pixels
    red_map = np.zeros_like(labels).astype(np.uint8)
    green_map = np.zeros_like(labels).astype(np.uint8)
    blue_map = np.zeros_like(labels).astype(np.uint8)
    
    for label_num in range(0, len(label_map)):
        index = labels == label_num
        red_map[index] = np.array(label_map)[label_num, 0]
        green_map[index] = np.array(label_map)[label_num, 1]
        blue_map[index] = np.array(label_map)[label_num, 2]
        
    segmentation_map = np.stack([red_map, green_map, blue_map], axis=2)
    return segmentation_map

def image_overlay(image, segmented_image):
    alpha = 1   # transparency for the original image
    beta = 0.8  # transparency for the segmentation map
    gamma = 0   # scalar added to each sum

    segmented_image = cv2.cvtColor(segmented_image, cv2.COLOR_RGB2BGR)
    image = np.array(image)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.addWeighted(image, alpha, segmented_image, beta, gamma, image)
    return image



In [None]:
from google.colab import files
files.upload()  # ご自身の PC からファイルをアップロードして下さい

img_filename = input('アップロードしたファイル名を入力してください: ')
img = plt.imread(img_filename)
plt.figure(figsize=(8,6))
plt.axis('off')
plt.imshow(img)

In [None]:
model.eval().to(device)
image = Image.open(img_filename)
outputs = get_segment_labels(image, model, device)

# get the data from the `out` key
outputs = outputs['out']
segmented_image = draw_segmentation_map(outputs)
image_segmented = image_overlay(image, segmented_image)

matplotlib.rcParams['figure.figsize'] = 12, 9
plt.imshow(image_segmented)
plt.show()

#save_filename = 'img_semantic_segmented.jpg'
#cv2.imwrite(save_filename, final_image)