# Prerequisties
- We will be running Mask R-CNN on Detectron2, and load the pre-trained Mask R-CNN model
- The model predicts bounding boxes and many classes, we will filter class "person" only with index "0"
- We will then mask the pred_segments with random gray noises

In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities.
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone https://github.com/facebookresearch/detectron2
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

In [None]:
from google.colab.patches import cv2_imshow
import cv2

!wget https://i.ibb.co/d6msyFW/input.jpg -q -O input.jpg
im = cv2.imread("./input.jpg")
cv2_imshow(im)

In [3]:
import random

import numpy as np
from PIL import Image
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
import cv2
import time

# Gray out persons for images
- Configure and load the detectron2 Mask R-CNN model
- Masks the segmented predictoins with gray noise
- Save the resulting masked image as masked_image.jpg

In [None]:
# Set up detector2 model and configuration
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # Set threshold for the object detection
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

def get_frame_prediction(image):
    # Predict the objects in the image with detectron2
    outputs = predictor(image)

    # Filter the object detections to find people
    person_indices = np.where(outputs["instances"].pred_classes.cpu() == 0)[0]

    # Create a white noise image the same size as the original
    noise = np.random.randint(0, 256, (image.shape[0], image.shape[1], 1)).astype(np.uint8)
    noise = np.repeat(noise, 3, axis=2)

    # Apply a segmented white noise mask to the image for each person detected
    for i in person_indices:
        noise = np.random.randint(0, 256, (image.shape[0], image.shape[1], 1)).astype(np.uint8)
        noise = np.repeat(noise, 3, axis=2)
        mask_np = outputs["instances"].pred_masks.cpu().numpy()[i]
        mask_np = np.expand_dims(mask_np, axis=-1)
        mask_np = np.repeat(mask_np, 3, axis=2)
        masked_noise = np.multiply(noise, mask_np)
        image = np.where(mask_np == 1, masked_noise.astype(np.uint8), image)
        
    return image

In [14]:
# Load the image
image = np.array(Image.open("./input.jpg"))

# Run detectron2 on the image
masked_image = get_frame_prediction(image)

# Show the resulting image
cv2_imshow(get_frame_prediction(image))

# Save the resulting masked image
masked_image = Image.fromarray(masked_image.astype(np.uint8))
masked_image.save('masked_image.jpg')

# Video input for the model
- Downloads ForBiggerEscapes.mp4 from github
- Applies our algorithm and saves the results as masked_video.mp4

In [None]:
## Thanks to sample free video urls on github https://gist.github.com/deepakpk009/99fd994da714996b296f11c3c371d5ee
!wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerEscapes.mp4

--2023-05-06 11:18:05--  http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerEscapes.mp4
Resolving commondatastorage.googleapis.com (commondatastorage.googleapis.com)... 74.125.142.128, 2607:f8b0:400e:c08::80
Connecting to commondatastorage.googleapis.com (commondatastorage.googleapis.com)|74.125.142.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2299653 (2.2M) [video/mp4]
Saving to: ‘ForBiggerEscapes.mp4.1’


2023-05-06 11:18:05 (121 MB/s) - ‘ForBiggerEscapes.mp4.1’ saved [2299653/2299653]



In [18]:
##### You can skip the model loading if you've already ran the images section #####
# Set up detector2 model and configuration
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # Set threshold for the object detection
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
###################################################################################

# Load the video
video = cv2.VideoCapture("ForBiggerBlazes.mp4")

# Get the codec and frame size of the video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = int(video.get(cv2.CAP_PROP_FPS))
frame_size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))

# Create a writer object to save the masked video
out = cv2.VideoWriter("masked_video.mp4", fourcc, fps, frame_size)

# Define number of frames for progress estimation
max_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
length = 0

# Define the time when the code started running
start_time = time.time()

while True:
    # Read a frame from the video
    ret, frame = video.read()
    
    if not ret:
        # Break out of the loop if no more frames left
        break
    
    # Predict the objects in the image with detectron2
    outputs = predictor(frame)
    
    # Filter the object detections to find people
    person_indices = np.where(outputs["instances"].pred_classes.cpu() == 0)[0]
    
    # Create a grayscale noise image the same size as the current frame
    noise = np.random.randint(0, 256, (frame.shape[0], frame.shape[1], 1)).astype(np.uint8)
    
    # Repeat noise across all three channels
    noise = np.repeat(noise, 3, axis=2)
    
    # Apply a segmented grayscale noise mask to the image for each person detected
    for i in person_indices:
        mask_np = outputs["instances"].pred_masks.cpu().numpy()[i]
        mask_np = np.expand_dims(mask_np, axis=-1)
        mask_np = np.repeat(mask_np, 3, axis=2)
        masked_noise = np.multiply(noise, mask_np)
        frame = np.where(mask_np == 1, masked_noise.astype(np.uint8), frame)
    
    # Write the resulting frame to the output video file
    out.write(frame)
    length = length + 1

    print(f"\rProgress: {length}/" + str(max_frames), end="", flush=True)

# end the timer
end_time = time.time()

# calculate the time taken
elapsed_time = end_time - start_time
print()
print(f"The code took {elapsed_time:.3f} seconds to run.")

# Release the video capture and writer objects
video.release()
out.release()

Progress: 360/360
The code took 62.507 seconds to run.
