Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
99 lines (75 sloc) 3.46 KB
from torchvision import models
from PIL import Image
import matplotlib.pyplot as plt
import torch
import numpy as np
import cv2
# Apply the transformations needed
import torchvision.transforms as T
# Define the helper function
def decode_segmap(image, source, nc=21):
label_colors = np.array([(0, 0, 0), # 0=background
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
r = np.zeros_like(image).astype(np.uint8)
g = np.zeros_like(image).astype(np.uint8)
b = np.zeros_like(image).astype(np.uint8)
for l in range(0, nc):
idx = image == l
r[idx] = label_colors[l, 0]
g[idx] = label_colors[l, 1]
b[idx] = label_colors[l, 2]
rgb = np.stack([r, g, b], axis=2)
# Load the foreground input image
foreground = cv2.imread(source)
# Change the color of foreground image to RGB
# and resize image to match shape of R-band in RGB output map
foreground = cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB)
foreground = cv2.resize(foreground, (r.shape[1], r.shape[0]))
# Create a background array to hold white pixels
# with the same size as RGB output map
background = 255 * np.ones_like(rgb).astype(np.uint8)
# Convert uint8 to float
foreground = foreground.astype(float)
background = background.astype(float)
# Create a binary mask of the RGB output map using the threshold value 0
th, alpha = cv2.threshold(np.array(rgb), 0, 255, cv2.THRESH_BINARY)
# Apply a slight blur to the mask to soften edges
alpha = cv2.GaussianBlur(alpha, (7, 7), 0)
# Normalize the alpha mask to keep intensity between 0 and 1
alpha = alpha.astype(float) / 255
# Multiply the foreground with the alpha matte
foreground = cv2.multiply(alpha, foreground)
# Multiply the background with ( 1 - alpha )
background = cv2.multiply(1.0 - alpha, background)
# Add the masked foreground and background
out_image = cv2.add(foreground, background)
# Return a normalized output image for display
return out_image / 255
def segment(net, path, show_orig=True, dev='cuda'):
img = Image.open(path)
if show_orig:
plt.imshow(img)
plt.axis('off')
plt.show()
# Comment the Resize and CenterCrop for better inference results
trf = T.Compose([T.Resize(450),
# T.CenterCrop(224),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
inp = trf(img).unsqueeze(0).to(dev)
out = net.to(dev)(inp)['out']
om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
rgb = decode_segmap(om, path)
plt.imshow(rgb)
plt.axis('off')
plt.show()
dlab = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()
segment(dlab, 'img_test/1.jpg', show_orig=True)
You can’t perform that action at this time.