# VIDEO SEGMENTATION

## SETUP

####MOUNT GOOGLE DRIVE

In [0]:
from google.colab import drive
# mount drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#### NAVIGATE TO THE PROJECT DIRECTORY

In [0]:
cd '/content/drive/My Drive/segmentation'

/content/drive/My Drive/segmentation


#### ENVIROMENT SETUP + IMPORT LIBERARIES AND MODULES

In [0]:
# enviroment setup
%matplotlib inline 
# import mrcnn modules  
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize

# import enviroment modules
import os
import sys
import random
import math
import numpy as np
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
import imageio
from skimage.transform import rescale
from skimage import img_as_ubyte

Using TensorFlow backend.


#### SETUP PROJECT DIRECTORIES AND PATHS

In [0]:
# setup directories 
root_dir = os.getcwd() # get the path to the local directory
sys.path.append(root_dir) # root directory
model_dir = os.path.join(root_dir, "logs") # model directory
vid_dir = os.path.join(root_dir, "input") # input videos directory
unlooped_vid_dir = os.path.join(root_dir, "unlooped") # unlooped videos directory 
weights_path = os.path.join(root_dir, "mrcnn/mask_rcnn_coco.h5") # Path to trained weights 

#### FUNCTIONS TO READ AND WRITE VIDEOS

In [0]:
def get_frames(dir_path, vid): 
    """
    Get separate frames from the video file
    dir_path: path to the directory of the video file that is to be processed
    vid: video file name and its extention (eg. 'video.mp4')
    returns: a list of frames in np.array format
    """
    # get a path to the file
    vid_path = os.path.join(dir_path, os.path.normpath(vid))
    # split video into frames 
    reader = imageio.get_reader(vid_path)
    frames = []
    for i, im in enumerate(reader):
        frames.append(im)
    return frames
    
def generate_videos(frames, file_name, do_loop = 0): 
    """
    Generate new video files in the root folder
    frames: a list of frames in np.array format
    file_name: video file name and its extention (eg. 'video.mp4') (this will be the default name of the output file)
    do_loop: bool - should the generated video be a "boomerang" style loop
    """
    # turn list into np.array
    frames = np.array(frames)
    # if we want to generate boomerang loop
    if(do_loop):
      # add looped to the name of the output file
      file_name = 'looped_' + file_name
      # create frames in reversed order 
      flip_frames = np.flip(frames,0)
      # remove the frame that will overlap once they are combined (end to end)
      flip_frames = np.delete(flip_frames,0, axis = 0)
      # combine regular and reversed order arrays to make one looped video
      frames = np.concatenate((frames,flip_frames)) 
      # remove the second overlapping frame (begining to begining)
      frames = np.delete(frames,frames.shape[0]-1, axis = 0)
    #write video file
    imageio.mimwrite(file_name, frames , fps = 15, macro_block_size = None)
    print('Generating '+ file_name + ':Success!')

  
def calculate_offset(pad_y,pad_x,vid = 1):
    """
    Calculate 1px padding in case the dimentions of the video are not divisable by 2
    pad_y: height in px
    pad_x: width in pixels
    vid: bool - determines the dimentios of the npad (depeds at the stage in the pipeline when the video is being generatd)
    returnns the amount of padding to be returned on either side of the video
    """
    if(pad_y%2 == 0 and pad_x%2 == 0): # if the padding we need to add is simmetrical across the dimentions
      if(vid):
        npad = ((0, 0), (round(pad_y/2), round(pad_y/2)), (round(pad_x/2), round(pad_x/2)), (0, 0)) # distribute paddings symmetrically for width and height
      else:
        npad = ((round(pad_y/2), round(pad_y/2)), (round(pad_x/2), round(pad_x/2)), (0, 0))
    elif(pad_y%2 != 0 and pad_x%2 != 0): # if the padding we need to add is asimmetrical across the dimentions
      if(vid): 
        npad = ((0, 0), (round(pad_y/2 - 0.5), round(pad_y/2 + 0.5)), (round(pad_x/2 - 0.5), round(pad_x/2 + 0.5)), (0, 0)) # distribute paddings asymmetrically for width and height by 1 px
      else:
        npad = ((round(pad_y/2 - 0.5), round(pad_y/2 + 0.5)), (round(pad_x/2 - 0.5), round(pad_x/2 + 0.5)), (0, 0))
    elif(pad_y%2 != 0 and pad_x%2 == 0): # if the padding we need to add is asimmetrical for height and simmetrical for width
      if(vid):
        npad = ((0, 0), (round(pad_y/2 - 0.5), round(pad_y/2 + 0.5)), (round(pad_x/2), round(pad_x/2)), (0, 0)) # offset height by 1 px and keep width simmetrical
      else:
        npad = ((round(pad_y/2 - 0.5), round(pad_y/2 + 0.5)), (round(pad_x/2), round(pad_x/2)), (0, 0))
    else: # if the padding we need to add is simmetrical for height and asimmetrical for width
      if(vid):
        npad = ((0, 0), (round(pad_y/2), round(pad_y/2)), (round(pad_x/2 - 0.5), round(pad_x/2 + 0.5)), (0, 0)) # keep height simmetrical and offset width by 1 px
      else:
        npad = ((round(pad_y/2), round(pad_y/2)), (round(pad_x/2 - 0.5), round(pad_x/2 + 0.5)), (0, 0))
    return npad
  

#### MISCELENIOUS HELPERS

In [0]:
def progress_bar(i,total):
    """
    Prints nicely formatted percentage br to the console
    i: current index inn the array
    total: the length of the array
    """
    print("",end = "\r")
    progg = i/total
    perc_done = int((progg*10))
    perc_remain = 10 - perc_done
    print("["+"#"*perc_done+"-"*perc_remain+"] "+ str(int(progg * 100)) +"% complete" , end="")

#### MODEL SETUP (FROM: https://github.com/matterport/Mask_RCNN/blob/master/samples/demo.ipynb)


In [0]:
# Import and setup coco configurations and tools
sys.path.append(os.path.join(root_dir, "mrcnn/coco_config/coco/")) # Import COCO config
import coco # Import COCO tools
# coco configurations
class InferenceConfig(coco.CocoConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
config = InferenceConfig()
# config.display()
model = modellib.MaskRCNN(mode="inference", model_dir=model_dir, config=config) # Create model in inference mode
model.load_weights(weights_path, by_name=True) # Load weights 

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [0]:
# Get informationn about the model
model.keras_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        (None, None, None, 3 0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, None, None, 3 0           input_image[0][0]                
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
bn_conv1 (BatchNorm)            (None, None, None, 6 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

#### FUNCTIONS TO EXTRACT INFORMATION FROM THE MODEL

In [0]:
def run_model(frames):
    """
    wrapper function for getting the information from the model
    frames: a list of frames from the video.
    Returns: 
    demormalised boxes [y1,x1,y2,x2].
    [num_instances, activation maps from the network'mcrnn_masks].
    """
    boxes = []
    maps = []
    print('processing ' + str(len(frames)) +' frames') 
    for i in range(len(frames)):
      # get activations of the detections and mrcnnn_mask layers from the model for the current frame 
      activations = get_activations(frames[i]) 
      # get ids of all the detected classes 
      ids = get_ids(activations["detections"])
      # get bounding boxes for all the instances of the 'person' class
      boxes.append(get_boxes(activations["detections"], frames[i].shape, frames[i]))
      # get probability maps for all the distances of the 'person' class
      maps.append(get_maps(activations["mrcnn_mask"], boxes[i], ids))
      # print the progress bar to the console
      progress_bar(i,len(frames)-1)
    print("",end = '\n')
    print('processing complete')
    return boxes, maps

def get_activations(frame):
    """
    frame: a single frame of the video.
    Returns activations of detection and mcrnn_mask layers for the given frame.
    """
    # Get activations of "mrcnn_mask" and "mrcnn_detection" layers
    activations = model.run_graph([frame], [
        ("detections",          model.keras_model.get_layer("mrcnn_detection").output),
        ("mrcnn_mask",          model.keras_model.get_layer("mrcnn_mask").output),  
    ])
    return activations
  
def get_ids(detections):
    """
    detections: a tensor of detections from the detection layer.
    Returns ids of all detected classes.
    """
    ids = detections[0, :, 4].astype(np.int32) # get all predicted class ids with 0 padding
    count = np.where(ids == 0)[0][0] # count predictions without 0 padding
    ids = ids[:count] # discard 0 padding
    return ids

def get_windows(frame):
    """
    img: a current frame of the video.
    Returns 
    dimentions of the molded frame.
    dimentions of the image window within that frame
    """
    molded_frame, window, scale, padding, crop = utils.resize_image(
                frame,
                min_dim=config.IMAGE_MIN_DIM,
                min_scale=config.IMAGE_MIN_SCALE,
                max_dim=config.IMAGE_MAX_DIM,
                mode=config.IMAGE_RESIZE_MODE)
    return molded_frame, window
  
  
def get_boxes(detections, frame_shape, frame):
    """
    detections: a tensor of detections from the detection layer.
    frame_shape: dims of the current frame
    Returns demormalised boxes [y1,x1,y2,x2].
    """
    # get information about frame transformation from the model
    molded_frame_shape, window = get_windows(frame)
    # get unmolded boxes
    boxes = detections[0, :, :4]
    # normalize window so it matches the boxes transforms
    window = utils.norm_boxes(window, molded_frame_shape.shape[:2])
    # get window coordinates
    wy1, wx1, wy2, wx2 = window
    # save as a numpy array
    shift = np.array([wy1, wx1, wy1, wx1])
    wh = wy2 - wy1  # window height
    ww = wx2 - wx1  # window width
    # save scale as numpy array
    scale = np.array([wh, ww, wh, ww])
    # Convert boxes to normalized coordinates on the window
    boxes = np.divide(boxes - shift, scale)
    # Convert boxes to pixel coordinates on the original image
    boxes = utils.denorm_boxes(boxes, frame_shape[:2])
    return boxes

def get_maps(masks, boxes, ids):
    """
    masks: a tennsor of activation maps from mcrnn_masks.
    boxes: demormalised boxes.
    Returns [num_instances, activation maps from the network'mcrnn_masks].
    """
    masks = masks[0] # save mask of the current image
    molded_maps = np.array([masks[i, :, :, c] for i, c in enumerate(ids) if c == 1]) # get molded maps only for people class
    return molded_maps

In [0]:
def get_frame_maps(frames, boxes, maps, apply_vid = 1):
    """
    frame_shape: dims of the current frame
    boxes: demormalised boxes.
    Returns [num_instances, activation maps from the network'mcrnn_masks].
    """ 
    frame_maps = frames
    confusion_idxs = []
    maximum_confusion = frames[0].shape[0]*frames[0].shape[1]
    for frame in range(len(frames)):
      h_maps = np.array([unmold(m, boxes[frame][i], frames[frame].shape) # get unmolded maps [num_instances, height, width]
                       for i, m in enumerate(maps[frame])])
      instances = h_maps.shape[0]
      heatmap = np.zeros([frames[frame].shape[0],frames[frame].shape[1]]) # prepare array for the final heatmap
      for i in range(instances): # for all instances of people
        h_maps[i] = np.where(h_maps[i] >= 0.5, 1 - h_maps[i], h_maps[i]) * 2
        heatmap = heatmap + h_maps[i] # add them together
      heatmap[heatmap > 1] = 1 # if several instances overlap, mark that space as 100% human
      confusion_idxs.append(np.sum(heatmap)/maximum_confusion) # save confusion index for the frame
      if(apply_vid): # if you wannt to apply map to the original video
        for c in range(frames[frame].shape[2]):
          frame_maps[frame][:,:,c] = np.multiply(frames[frame][:,:,c], heatmap)
      else: # if you want to return a heatmap only
        cmap = plt.cm.jet
        frame_maps[frame] = (cmap(heatmap) * 255).astype(np.uint8)
    return frame_maps, confusion_idxs
  
def get_rescaled(frames, c_idxs):
    """
    frames: list of frames (np.arrays)
    c_idxs: list of confusion indices for each frame.
    Returns a list of frames reslcaled and padded accordingly to their confusion index.
    """ 
    scaled_frames = []
    for frame in range(len(frames)):
      assert c_idxs[frame] <= 0.6 # make sure the confusion index is smaller than 0.6 to prevent ovepannding
      scaled = rescale(frames[frame], 0.4 + c_idxs[frame]) # rescale frame
      pad_y = (frames[frame].shape[0]-scaled.shape[0]) # calculate the amount of padding in y axis
      pad_x = (frames[frame].shape[1]-scaled.shape[1]) # calculate the amount of padding in x axis
      npad = calculate_offset(pad_y,pad_x, vid = 0) # clalculate combined padding with offset
      scaled = img_as_ubyte(np.pad(scaled, pad_width=npad, mode='constant', constant_values=0)) # add padding to scaled images
      scaled_frames.append(scaled) 
    return scaled_frames
  
def unmold(mask, bbox, image_shape):
    """
    This function is adapted from utils.unmold_mask and the only difference is 
    that it returns class "person" probability per pixel in the box, instead of the binary mask
    ---> Converts a mask generated by the neural network to a format similar to its original shape.
    mask: 28x28 raw activation map from the mrcnn_mask layer.
    bbox: denormalised box corresponding to the activation map.
    image_shape: shape of the original image
    Returns an activation map with the same size as the original image.
    """
    y1, x1, y2, x2 = bbox # get box dims    
    mask = utils.resize(mask, (y2 - y1, x2 - x1)) # resize the map to box dims
    full_mask = np.zeros(image_shape[:2]) # prepare tensor for the mask
    full_mask[y1:y2, x1:x2] = mask # fill each pixel of the image with a probability of it being human
    return full_mask

### GET DATA FROM THE MODEL

In [0]:
frames = get_frames(vid_dir, 'KA.mp4')
boxes, maps = run_model(frames)

processing 111 frames
[##########] 100% complete
processing complete


### GENNERATE UNLOOPED VIDEO

In [0]:
frame_maps, c_idxs = get_frame_maps(frames, boxes, maps, apply_vid = 1)
rescaled_frame_maps = get_rescaled(frame_maps, c_idxs)
generate_videos(rescaled_frame_maps, 'unlooped/final_KA.mp4', do_loop = 0)


  warn('The default multichannel argument (None) is deprecated.  Please '
  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
  .format(dtypeobj_in, dtypeobj_out))


Generating final_KA.mp4:Success!


### LOOP GENERATED VIDEO

In [0]:
frames = get_frames(unlooped_vid_dir, 'final_KA.mp4')
generate_videos(frames, 'output/KA.mp4', do_loop = 1)

Generating looped_KA.mp4:Success!


#### EXTRAS/ OLD FEATURES: FUNCTIONS TO GENERATE INSTANCE SEGMENTAION MASKS

In [0]:
def get_segments(frames, boxes, maps):
    """
    frames: list of np.array frames
    boxes: demormalised boxes.
    maps: activation maps for a given frame
    Returns a list of frames with segmentation masks with gnu colormap determined colors.
    """
    cmap = plt.cm.gnuplot
    norm = plt.Normalize(vmin=0, vmax=20)
    segments = []
    for frame in range(len(frames)):
      masks = np.array([utils.unmold_mask(m, boxes[frame][i], frames[frame].shape) # get unmolded masks [num_instances, height, width]
                          for i, m in enumerate(maps[frame])])
      instances = maps[frame].shape[0]
      # get color of each instance in the frame
      colors = get_colors(instances)
      # fill frame with black background
      segment = np.zeros([frames[frame].shape[0],frames[frame].shape[1]]) 
      for i in range(instances): # for every instance
          color = colors[i] # choose mask color
          mask = masks[i, :, :] # choose the mask
          segment = apply(segment, mask, color) # apply the mask
      segments.append((cmap(norm(segment)) * 255).astype(np.uint8))
    return segments

def get_colors(num_instances):
    """
    num_instances: nnumber of people detected inn the frame
    Returns a color for that instance.
    """
    colors = []
    for i in range(num_instances):
      colors.append(4 + i * 2) # calculated to work on gnuplot cmap
    return colors
  
def apply(image, mask, color, alpha=1.):
    """Apply the given mask to the image.
    """
    image = np.where(mask == 1, image + color, image)
    return image

In [0]:
# GET SEGMETS
segments = get_segments(frames, boxes, maps)
generate_videos(segments, 'masks_KA.mp4', do_loop = 1)


Generating looped_masks_KA.mp4:Success!
