In [1]:
import os
print(os.environ['CONDA_DEFAULT_ENV'])
import keras
import tensorflow as tf
config = tf.ConfigProto(device_count={"CPU": 4})
keras.backend.tensorflow_backend.set_session(tf.Session(config=config))

base


Using TensorFlow backend.


In [2]:
import cv2
import numpy as np
import os
import sys
from samples import coco
from mrcnn import utils
from mrcnn import model as modellib
import matplotlib.pyplot as plt

# Prepare Image

First, load the original image from disk and show it in window by using OpenCV.

In [3]:
# Input the original image name

original_image = 'vv.png'

In [4]:
# Use OpenCV to read and show the original image
image = cv2.imread(original_image)
# cv2.imshow('original_image', image)

# print("Read original image successfully! The original image shape is:")
# print(image.shape)
# print("Press ESC to exit or press s to save and exit.")

# # Wait for keys to exit or save
# k = cv2.waitKey(0)
# if k == 27:                 
#     cv2.destroyAllWindows()
# elif k == ord('s'):        
#     cv2.imwrite('original_image.jpg', image)
#     cv2.destroyAllWindows()

Next, use cvtColor to accomplish image transformation from RGB image to gray image.

**Attention: the original image is an colorful image which has 3 channels(RGB), while the gray image is a grayscale, which only has 2D format.**

In [5]:
# Use cvtColor to accomplish image transformation from RGB image to gray image
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# cv2.imshow('gray_image', gray_image)

# print("Change gray image successfully! The gray image shape is:")
# print(gray_image.shape)
# print("Press ESC to exit or press s to save and exit.")

# # Wait for keys to exit or save
# k = cv2.waitKey(0)
# if k == 27:                 
#     cv2.destroyAllWindows()
# elif k == ord('s'):        
#     cv2.imwrite('gray_image.jpg', image)
#     cv2.destroyAllWindows()

# Prepare Model File and Configuration Information

Now load the pre-trained model data (Mask-RCNN trained by COCO dataset).

In [6]:
# Load the pre-trained model data
ROOT_DIR = os.getcwd()
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

The original configuration information is saved in config.py file. It can be changed if necessary. 

It's better to use the default value, but you can also change the GPU information to suit the personal GPU well.

In [7]:
# Change the config infermation
class InferenceConfig(coco.CocoConfig):
    GPU_COUNT = 1
    
    # Number of images to train with on each GPU. A 12GB GPU can typically
    # handle 2 images of 1024x1024px.
    # Adjust based on your GPU memory and image sizes. Use the highest
    # number that your GPU can handle for best performance.
    IMAGES_PER_GPU = 1
    
config = InferenceConfig()

In [8]:
# COCO dataset object names
model = modellib.MaskRCNN(
    mode="inference", model_dir=MODEL_DIR, config=config
)
model.load_weights(COCO_MODEL_PATH, by_name=True)
class_names = [
    'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
    'bus', 'train', 'truck', 'boat', 'traffic light',
    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
    'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
    'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
    'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard',
    'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
    'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
    'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
    'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
    'teddy bear', 'hair drier', 'toothbrush'
]

# Define The Image Process Function

Now define two image process functions. 

apply_mask is used to change the background information to grayscale.

display_instances is used to show the object detection result in original image.

In [9]:
def apply_mask(img, mask, back_image):
    image = np.copy(img)
    image[:, :, 0] = np.where(
        mask == 0,
        back_image[:, :,0],
        image[:, :, 0]
    )
    image[:, :, 1] = np.where(
        mask == 0,
        back_image[:, :,1],
        image[:, :, 1]
    )
    image[:, :, 2] = np.where(
        mask == 0,
        back_image[:, :,2],
        image[:, :, 2]
    )
    return image


def make_mask_black(img,mask):
    image = np.copy(img)
    image[:, :, 0] = np.where(
        mask == 0,
        255,
        0
    )
    image[:, :, 1] = np.where(
        mask == 0,
        255,
        0
    )
    image[:, :, 2] = np.where(
        mask == 0,
        255,
        0
    )
    return image

def apply_white(img,bw):
    image = np.copy(img)
    image[:, :, 0] = np.where(
        bw[:,:,0] == 255,
        255,
        image[:, :, 0]
    )
    image[:, :, 1] = np.where(
        bw[:,:,1] == 255,
        255,
        image[:, :, 1]
    )
    image[:, :, 2] = np.where(
       bw[:,:,2] == 255,
        255,
        image[:, :, 2]
    )
    return image


# This function is used to show the object detection result in original image.
def get_largest_mask(image, boxes, masks, ids, names, scores):
    # max_area will save the largest object for all the detection results
    max_area = 0
    
    # n_instances saves the amount of all objects
    n_instances = boxes.shape[0]

    if not n_instances:
        print('NO INSTANCES TO DISPLAY')
    else:
        assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]
    main_box = 0
    for i in range(n_instances):
        if not np.any(boxes[i]):
            continue

        # compute the square of each object
        y1, x1, y2, x2 = boxes[i]
        square = (y2 - y1) * (x2 - x1)

        # use label to select person object from all the 80 classes in COCO dataset
        label = names[ids[i]]
        if label == 'person':
            # save the largest object in the image as main character
            # other people will be regarded as background
            if square > max_area:
                max_area = square
                main_box = boxes[i]
                mask = masks[:, :, i]
            else:
                continue
        else:
            continue
    return mask


def bound_point(image,mask,y,x,r):
    h,w = mask.shape
    for y1 in range(y-r//2,y+r//2):
        for x1 in range(x-r//2,x+r//2):
            if(y1>0 and x1>0 and y1<h and x1<w and mask[y1][x1]==1 and abs((y-y1)**2+(x-x1)**2)<=r*r):
                image[y1,x1] = np.array([255,255,255])
                
def blur_around_mask(image,mask,r):
    h,w = mask.shape
    y = 0
    while(y<h):
        x = 0
        while(x<w):
            if(x<w-1 and mask[y][x]==0 and mask[y][x+1]==1):
                bound_point(image,mask,y,x,r)
#                 for x1 in range(x,x+r):
#                     if(x1>=w):
#                         continue
#                     if(mask[y][x1]==1):
#                         image[y,x1]=np.array([255,255,255])
            elif(x<w-1 and mask[y][x]==1 and mask[y][x+1]==0):
                bound_point(image,mask,y,x,r)
#                 for x1 in range(x-r+1,x+1):
#                     if(x1<0): 
#                         continue
#                     if(mask[y][x1]==1):
#                         image[y,x1]=np.array([255,255,255])
            x+=1
        y+=1
    return image

def convert_image(image,background):
    results = model.detect([image], verbose=1)
    r = results[0]
    mask = get_largest_mask(image, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
    imgWithBackground = apply_mask(image,mask,background)
    blackImage = make_mask_black(image,mask)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4))
    (thresh, binRed) = cv2.threshold(blackImage, 128, 255, cv2.THRESH_BINARY)
    opening = cv2.morphologyEx(blackImage, cv2.MORPH_GRADIENT, kernel, iterations=3)
    bimg = apply_white(imgWithBackground,opening)
    return bimg

In [10]:
# This function is used to show the object detection result in original image.
def display_instances(image, boxes, masks, ids, names, scores, back_image):
    # max_area will save the largest object for all the detection results
    max_area = 0
    
    # n_instances saves the amount of all objects
    n_instances = boxes.shape[0]

    if not n_instances:
        print('NO INSTANCES TO DISPLAY')
    else:
        assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]
    main_box = 0
    for i in range(n_instances):
        if not np.any(boxes[i]):
            continue

        # compute the square of each object
        y1, x1, y2, x2 = boxes[i]
        square = (y2 - y1) * (x2 - x1)

        # use label to select person object from all the 80 classes in COCO dataset
        label = names[ids[i]]
        if label == 'person':
            # save the largest object in the image as main character
            # other people will be regarded as background
            if square > max_area:
                max_area = square
                main_box = boxes[i]
                mask = masks[:, :, i]
            else:
                continue
        else:
            continue

        # apply mask for the image
    # by mistake you put apply_mask inside for loop or you can write continue in if also
    image = apply_mask(image, mask, back_image)
    y1, x1, y2, x2 = main_box
#     cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 2)
    return image

In [None]:
# image = cv2.imread("vv.png")
# back_img = cv2.imread("back.jpeg")
# back_img = cv2.resize(back_img, dsize=(image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC)
# bimg = convert_image(image,back_img)
# plt_imshow(bimg)

In [None]:
input_video = '5.mp4'
capture = cv2.VideoCapture(input_video)

# these 2 lines can be removed if you dont have a 1080p camera.
# capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
# capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)



import time
t1 = time.time()
# Recording Video
fps = 25.0
width = int(capture.get(3))
height = int(capture.get(4))
length = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
fcc = cv2.VideoWriter_fourcc('D', 'I', 'V', 'X')
out = cv2.VideoWriter("new_video.avi", fcc, fps, (width, height))
i = 0

ret,image = capture.read()
back_img = cv2.imread("back.jpg")
back_img = cv2.resize(back_img, dsize=(image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC)
# back_img = cv2.blur(back_img,(15,15))

result = None
while True:
    ret, frame = capture.read()
    frame = convert_image(frame,back_img)
    out.write(frame)
    i+=1
    if(i>length-2):
        break
t2 = time.time()
print(t2-t1,i,length)

Processing 1 images
image                    shape: (360, 640, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 1024, 1024, 3)    min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 93)               min:    0.00000  max: 1024.00000  float64
anchors                  shape: (1, 261888, 4)        min:   -0.35390  max:    1.29134  float32


  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


Processing 1 images
image                    shape: (360, 640, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 1024, 1024, 3)    min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 93)               min:    0.00000  max: 1024.00000  float64
anchors                  shape: (1, 261888, 4)        min:   -0.35390  max:    1.29134  float32
Processing 1 images
image                    shape: (360, 640, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 1024, 1024, 3)    min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 93)               min:    0.00000  max: 1024.00000  float64
anchors                  shape: (1, 261888, 4)        min:   -0.35390  max:    1.29134  float32
Processing 1 images
image                    shape: (360, 640, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 1024, 1024, 3)    min: -123.70000  max:  151.1