# Silhouette

This notebook shows how to extract the silhouette of a person from a video by using detectron2.

In [None]:
from respiration.dataset import VitalCamSet

dataset = VitalCamSet()

subject = 'Proband16'
scenario = '101_natural_lighting'

In [None]:
frames, meta = dataset.get_video_rgb(subject, scenario, show_progress=True)

In [None]:
# Show the first frame
import matplotlib.pyplot as plt

plt.imshow(frames[0])
plt.show()

In [None]:
im = frames[0]

In [None]:
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

cfg = get_cfg()
# model = "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
model = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
config_path = model_zoo.get_config_file(model)

cfg.merge_from_file(config_path)
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model)
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model

In [None]:
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

In [None]:
outputs

In [None]:
prediction_masks = outputs["instances"].pred_masks
prediction_classes = outputs["instances"].pred_classes

prediction_masks.shape, prediction_classes.shape

In [None]:
# We can use `Visualizer` to draw the predictions on the image.
v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

plt.imshow(out.get_image()[:, :, ::-1])
plt.show()

In [None]:
# Get the mask of the person
person_class = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes.index("person")
person_mask = prediction_masks[prediction_classes == person_class][0].cpu().numpy()

In [None]:
# Only show the person, the rest is black
plt.imshow(im * person_mask[:, :, None])
plt.show()

## Extract the silhouette from the video

In [None]:
from tqdm.auto import tqdm

# Run prediction on all frames
masks = []

for frame in tqdm(frames):
    outputs = predictor(frame)
    prediction_masks = outputs["instances"].pred_masks
    prediction_classes = outputs["instances"].pred_classes
    person_mask = prediction_masks[prediction_classes == person_class][0].cpu().numpy()
    masks.append(person_mask)

In [None]:
# Create a video with the silhouette
import cv2
import numpy as np

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('silhouette.avi', fourcc, meta.fps, (im.shape[1], im.shape[0]))

for idx, mask in tqdm(enumerate(masks), total=len(masks)):
    frame = np.uint8(frames[idx] * mask[:, :, None])
    out.write(frame)

out.release()