In [3]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [4]:
data_path = '/content/gdrive/MyDrive/Colab Notebooks/daimler/'

In [5]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import argparse
import time
import glob
import re



In [6]:
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
objects = [
     'person', 'bicycle', 'car', 'motorcycle', 'bus',
    'train', 'truck',  'traffic light', 'stop sign'
]

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('we are running on your '+ str(device))

model = models.detection.maskrcnn_resnet50_fpn(pretrained=True).to(device)
model.eval()

we are running on your cuda:0


MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
          (downsample): 

In [10]:
def get_prediction(img_path, threshold):
  img = Image.open(img_path)
  transform = transforms.Compose([transforms.ToTensor()])
  img = transform(img).to(device)
  start_time = time.time()
  pred = model([img])
  #print('prediction time: '+ str(time.time() - start_time))
  pred_score = list(pred[0]['scores'].detach().cpu().numpy())
  pred_t = [pred_score.index(x) for x in pred_score if x>threshold][-1]
  masks = (pred[0]['masks']>0.5).squeeze().detach().cpu().numpy()
  pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].cpu().numpy())]
  pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().cpu().numpy())]
  masks = masks[:pred_t+1]
  pred_boxes = pred_boxes[:pred_t+1]
  pred_class = pred_class[:pred_t+1]
  return masks, pred_boxes, pred_class



def random_colour_masks(image):
  colours = [[0, 255, 0],[0, 0, 255],[255, 0, 0],[0, 255, 255],[255, 255, 0],[255, 0, 255],[80, 70, 180],[250, 80, 190],[245, 145, 50],[70, 150, 250],[50, 190, 190]]
  r = np.zeros_like(image).astype(np.uint8)
  g = np.zeros_like(image).astype(np.uint8)
  b = np.zeros_like(image).astype(np.uint8)
  r[image == 1], g[image == 1], b[image == 1] = colours[random.randrange(0,10)]
  coloured_mask = np.stack([r, g, b], axis=2)
  return coloured_mask


def instance_segmentation(img_path, vis = True, path_seg = '/', path_mask = '/', threshold=0.5, rect_th=3, text_size=1, text_th=3):
  masks, boxes, pred_cls = get_prediction(img_path, threshold)
  img = cv2.imread(img_path)
  img0 = np.zeros((img.shape[0], img.shape[1], 3), dtype = "uint8")
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  for i in range(len(masks)):
    if pred_cls[i] in objects:
        rgb_mask = random_colour_masks(masks[i])
        img0 = cv2.addWeighted(img0, 1, rgb_mask, 0.5, 0)
        img = cv2.addWeighted(img, 1, rgb_mask, 0.5, 0)
        cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th)
        cv2.putText(img,pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
  if vis:
    plt.figure(figsize=(20,30))
    plt.imshow(img0)
    plt.xticks([])
    plt.yticks([])
    plt.show()
  else:
    cv2.imwrite(path_seg, img)
    cv2.imwrite(path_mask, img0)



def numericalSort(value):
    numbers = re.compile(r'(\d+)')
    # otherwise it would read randomly from the folder
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

def instance_segmentation_folder(path, threshold=0.5, rect_th=3, text_size=1, text_th=3):
  path_mask = data_path + 'mask_folder' + str(time.time()) + '/'
  path_segmented = data_path + 'segmented_folder' + str(time.time()) + '/'
  if not os.path.exists(path_mask):
    os.makedirs(path_segmented)
  if not os.path.exists(path_segmented):
    os.makedirs(path_mask)
  img_paths = []
  path = os.path.join(path, '*.png')
  for filename in sorted(glob.glob(path), key=numericalSort):
    img_paths.append(filename)
  for img_path in img_paths:
    name = img_path.split('/')[-1]
    output_mask_path = path_mask + name
    output_segmented_path = path_segmented + name
    instance_segmentation(img_path, vis = False,path_seg=output_segmented_path, path_mask = output_mask_path )
    # add mask 

def instance_segmentation_video(path, threshold=0.5, rect_th=3, text_size=1, text_th=3):
  name = path.split('/')[-1][:-4]
  output_path = data_path + name + str(time.time()) + '/' 
  if not os.path.exists(output_path):
    os.makedirs(output_path)
  
    # add mask 

    # read video
  





In [11]:
def instance_segmentation_api(source, path):
  if source == 'file':
    instance_segmentation(path)
  elif source == 'folder':
    instance_segmentation_folder (path)
  elif source == 'video':
    instance_segmentation_video(path)

In [12]:
path_file = data_path + 'pic_1.png'
path_folder = data_path + 'stuttgart'
video_path = data_path + 'stuttgart.avi'

instance_segmentation_api('folder', path_folder)

KeyboardInterrupt: ignored