# Install detectron2

In [3]:
# install dependencies: 
!pip install pyyaml==5.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

Collecting pyyaml==5.1
[?25l  Downloading https://files.pythonhosted.org/packages/9f/2c/9417b5c774792634834e730932745bc09a7d36754ca00acf1ccd1ac2594d/PyYAML-5.1.tar.gz (274kB)
[K     |█▏                              | 10kB 22.4MB/s eta 0:00:01[K     |██▍                             | 20kB 29.7MB/s eta 0:00:01[K     |███▋                            | 30kB 22.5MB/s eta 0:00:01[K     |████▉                           | 40kB 26.4MB/s eta 0:00:01[K     |██████                          | 51kB 24.6MB/s eta 0:00:01[K     |███████▏                        | 61kB 27.5MB/s eta 0:00:01[K     |████████▍                       | 71kB 17.7MB/s eta 0:00:01[K     |█████████▋                      | 81kB 18.8MB/s eta 0:00:01[K     |██████████▊                     | 92kB 17.5MB/s eta 0:00:01[K     |████████████                    | 102kB 17.5MB/s eta 0:00:01[K     |█████████████▏                  | 112kB 17.5MB/s eta 0:00:01[K     |██████████████▍                 | 122kB 17.5MB/s eta 

In [4]:
# install detectron2: (Colab has CUDA 10.1 + torch 1.7)
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
import torch
assert torch.__version__.startswith("1.7")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html
# exit(0)  # After installation, you need to "restart runtime" in Colab. This line can also restart runtime

Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html
Collecting detectron2
[?25l  Downloading https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/detectron2-0.3%2Bcu101-cp36-cp36m-linux_x86_64.whl (6.8MB)
[K     |████████████████████████████████| 6.8MB 611kB/s 
Collecting Pillow>=7.1
[?25l  Downloading https://files.pythonhosted.org/packages/5f/19/d4c25111d36163698396f93c363114cf1cddbacb24744f6612f25b6aa3d0/Pillow-8.0.1-cp36-cp36m-manylinux1_x86_64.whl (2.2MB)
[K     |████████████████████████████████| 2.2MB 16.3MB/s 
Collecting yacs>=0.1.6
  Downloading https://files.pythonhosted.org/packages/38/4f/fe9a4d472aa867878ce3bb7efb16654c5d63672b86dc0e6e953a67018433/yacs-0.1.8-py3-none-any.whl
Collecting fvcore>=0.1.2
  Downloading https://files.pythonhosted.org/packages/56/79/88b76017e62a96e303617109a11817c8ce41a80b2bd25ce2130f69b72fdb/fvcore-0.1.2.post20201204.tar.gz
Collecting portalocker
  Downloading https://files.pythonhosted.org/pa

In [5]:
#mount googledrive
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Setup Model and Load Weights

In [6]:
# import some common detectron2 utilities
import detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg

In [7]:
cfg = get_cfg()
# Initialize from detectron2
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml"))
# Load weights from best model path
cfg.MODEL.WEIGHTS = "/content/gdrive/My Drive/APS360 Project/code/outputs/fasterrcnn_model_final.pth"

In [16]:
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2  # 2 class labels
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

# Set up metadata

In [10]:
from detectron2.data import Metadata
metadata = Metadata(evaluator_type='coco', name='video_test', thing_classes=['face_with_mask', 'face_no_mask'],
                thing_colors=[[0, 255, 0], [255, 0, 0]])


# Run mask detection on recorded video

In [12]:
from detectron2.utils.video_visualizer import _DetectedInstance, VideoVisualizer
from detectron2.utils.visualizer import (ColorMode, Visualizer, 
                                         _create_text_labels, _PanopticPrediction)

class CustomVideoVisualizer(VideoVisualizer):
    # Changed default behaviour to use fixed colours specified by metadata
    def __init__(self, metadata):
        super().__init__(metadata, instance_mode=ColorMode.IMAGE) # Default uses ColorMode.IMAGE which uses random colors
        assert metadata.thing_colors

    def new_draw_instance_predictions(self, frame, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        frame_visualizer = Visualizer(frame, self.metadata)
        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
            # mask IOU is not yet enabled
            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
            # assert len(masks_rles) == num_instances
        else:
            masks = None

        detected = [
            _DetectedInstance(classes[i], boxes[i], mask_rle=None, ttl=8,
                              color=[x/255 for x in self.metadata.thing_colors[classes[i]]])
            for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                (masks.any(dim=0) > 0).numpy() if masks is not None else None
            )
            alpha = 0.3
        else:
            alpha = 1.0 # Changed from 0.5

        frame_visualizer.overlay_instances(
            boxes=None if masks is not None else boxes,  # boxes are a bit distracting
            masks=masks,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )

        return frame_visualizer.output
    

In [21]:
import cv2
import time

def runOnVideo(video_path, save_path, fps=30, max_frames=None):
    """ Runs the predictor on every frame in the video (unless maxFrames is given),
    and returns the frame with the predictions drawn.
    """
    
    # Get video and video properties
    video = cv2.VideoCapture(video_path)
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_fps = video.get(cv2.CAP_PROP_FPS)
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Set the fps to video_fps if it is too high
    fps = min(video_fps, fps)

    # Initialize video writer
    video_writer = cv2.VideoWriter(save_path, fourcc=cv2.VideoWriter_fourcc(*"mp4v"), 
                                   fps=float(fps), frameSize=(width, height), isColor=True)
    
    # Initialize visualizer
    v = CustomVideoVisualizer(metadata) 
    
    # Skip frames
    skipFrame = 0
    frames_to_skip = round(video_fps / fps) - 1
    readFrames = 0
    maxFrames = min(num_frames, max_frames) if max_frames else num_frames
    inference_time, count = 0, 0
    while True:
        hasFrame, frame = video.read()
        if not hasFrame:
            break

        if skipFrame == 0:
            skipFrame = frames_to_skip
            
            # Begin inference
            start = time.time()

            # Get prediction results for this frame
            outputs = predictor(frame)

            end = time.time()

            # Make sure the frame is colored
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            # Draw a visualization of the predictions using the video visualizer
            visualization = v.new_draw_instance_predictions(frame, outputs["instances"].to("cpu"))

            # End inference
            

            # Convert Matplotlib RGB format to OpenCV BGR format
            visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR)

            # Write to video file
            video_writer.write(visualization)

            # Keep track of average inference time
            count += 1
            inference_time += end - start

        else:
            skipFrame -= 1

        readFrames += 1
        if readFrames > maxFrames:
            break
    
    # Print average inference time
    print('Average instance time: {:.2f} ms'.format(1000 * inference_time/count))

    # Release resources
    video.release()
    video_writer.release()
    cv2.destroyAllWindows()

    return

In [22]:
# Define video paths
video_path = '/content/gdrive/My Drive/APS360 Project/code/Test videos/demo2.mov'
save_path = '/content/gdrive/My Drive/APS360 Project/code/Test videos/demo2_with_predictions.mp4'
# save_path =  'out.mp4'

# Run detection on video
runOnVideo(video_path, save_path, fps=15)

Average instance time: 164.74 ms
