# transformers: Semantic segmentation

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import requests
from PIL import Image
from transformers import (
    pipeline,
    AutoImageProcessor,
    AutoModelForSemanticSegmentation
)

## Load image

In [None]:
# load image
url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg'

image = Image.open(requests.get(url, stream=True).raw)

In [None]:
# show image
fig, ax = plt.subplots(figsize=(6, 4))
ax.imshow(np.asarray(image))
ax.set_aspect('equal', adjustable='box')
fig.tight_layout()

## Load model

In [None]:
# set device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# set model name
model_name = 'nvidia/segformer-b1-finetuned-cityscapes-1024-1024'

# create pre- and post-processor
processor = AutoImageProcessor.from_pretrained(model_name)

# load model
model = AutoModelForSemanticSegmentation.from_pretrained(model_name, device_map='auto')
model = model.eval()

In [None]:
# load pipeline (preprocessor, model and postprocessor)
pipe = pipeline('image-segmentation', model=model_name, device_map='auto')

## Run model

In [None]:
# preprocess images
preprocessed_images = processor([image], return_tensors='pt')
x = preprocessed_images['pixel_values']

# run model
with torch.no_grad():
    outputs = model(x)

logits = outputs.logits

# postprocess outputs
segmentations = processor.post_process_semantic_segmentation(
    outputs,
    target_sizes=[(image.height, image.width)]
)

print(f'Images shape: {x.shape}')
print(f'Logits shape: {logits.shape}')

In [None]:
# get specific predictions
target_label = 'car'

target_idx = model.config.label2id[target_label]
target_mask = (segmentations[0] == target_idx).int()

In [None]:
# show predictions
fig, ax = plt.subplots(figsize=(6, 4))
ax.imshow(target_mask.numpy())
ax.set_aspect('equal', adjustable='box')
ax.set_title(f'Predictions: {target_label}')
fig.tight_layout()

## Run pipeline

In [None]:
# run pipeline
results = pipe(image)

print(results)

In [None]:
# get specific predictions
pipe_labels = [d['label'] for d in results]

target_idx = pipe_labels.index(target_label)
target_mask = results[target_idx]['mask'] # PIL image

In [None]:
# show predictions
fig, ax = plt.subplots(figsize=(6, 4))
ax.imshow(np.asarray(target_mask))
ax.set_aspect('equal', adjustable='box')
ax.set_title(f'Predictions: {target_label}')
fig.tight_layout()