In [26]:
pip install pytorch-lightning



In [27]:
from __future__ import division

import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np
import pytorch_lightning as pl
import cv2 
import random

In [28]:
class EmptyLayer(pl.LightningModule):
  """
  Use for route module
  """

  def __init__(self):
    super(EmptyLayer, self).__init__()

In [29]:
class DetectionLayer(pl.LightningModule):
  """
  Use for yolo module
  """
  
  def __init__(self, anchors):
    super(DetectionLayer, self).__init__()
    self.anchors = anchors

In [30]:
class Darknet(pl.LightningModule):
  def __init__(self, cfg_file):
    super(Darknet, self).__init__()
    self.blocks = parse_cfg(cfg_file)
    self.net, self.module_list = create_modules(self.blocks)

  def forward(self, x, CUDA):
    """
    Calculate the output
    Transform the output detection feature maps in a vay can be processed easier
    """
    modules = self.blocks[1:] # skip first element of blocks, which is net info
    outputs = {}
    check = 0

    for i, module in enumerate(modules):        
      module_type = (module["type"])
      
      if module_type == "convolutional" or module_type == "upsample" or module_type=="maxpool":
        x = self.module_list[i](x)
      elif module_type == "route":
        layers = module["layers"]
        layers = [int(a) for a in layers]

        if layers[0] > 0:
            layers[0] -= i

        if len(layers) == 1:
            x = outputs[i + layers[0]]
        else:
            if layers[1] > 0:
                layers[1] -= i

            map1 = outputs[i + layers[0]]
            map2 = outputs[i + layers[1]]
            x = torch.cat((map1, map2), 1)
      elif  module_type == "shortcut":
          f = int(module["from"])
          x = outputs[i - 1] + outputs[i + f]
      elif module_type == 'yolo':        
          anchors = self.module_list[i][0].anchors   # anchors
          input_dim = int (self.net["height"])       # input dimension
          num_classes = int (module["classes"])      # number of classes
  
          # transform 
          x = x.data
          x = predict_transform(x, input_dim, anchors, num_classes, CUDA)
          if not check:
              detections = x
              check = 1
          else:       
              detections = torch.cat((detections, x), 1)
  
      outputs[i] = x
    
    return detections

  def load_weight(self, file_path):
    file = open(file_path, "rb")

    # first 5 items in weight file are header information
    # major ver, minor ver, subversion, images seen by the network
    header = np.fromfile(file, dtype=np.int32, count=5)
    self.header = torch.from_numpy(header)
    self.network_seen = self.header[3]
    weights = np.fromfile(file, dtype=np.float32)

    n = 0
    for i in range(len(self.module_list)):
      module_type = self.blocks[i + 1]["type"]
      # if not convolutional, ignore
      if module_type == "convolutional":
        module = self.module_list[i]
        try:
          batch_normalize = int(self.blocks[i + 1]["batch_normalize"])
        except:
          batch_normalize = 0
        
        convol_layer = module[0]

        # batch normalize layer
        if batch_normalize:
          batch_norm_layer = module[1]
          num_biases = batch_norm_layer.bias.numel()
          
          # load weights
          bnl_biases = torch.from_numpy(weights[n: n + num_biases])
          n += num_biases

          bnl_weights = torch.from_numpy(weights[n: n + num_biases])
          n += num_biases

          bnl_running_mean = torch.from_numpy(weights[n: n + num_biases])
          n += num_biases

          bnl_running_var = torch.from_numpy(weights[n: n + num_biases])
          n += num_biases

          # cast weights into dimensions of model weights
          bnl_biases = bnl_biases.view_as(batch_norm_layer.bias.data)
          bnl_weights = bnl_weights.view_as(batch_norm_layer.weight.data)
          bnl_running_mean = bnl_running_mean.view_as(batch_norm_layer.running_mean)
          bnl_running_var = bnl_running_var.view_as(batch_norm_layer.running_var)

          # copy data to model
          batch_norm_layer.bias.data.copy_(bnl_biases)
          batch_norm_layer.weight.data.copy_(bnl_weights)
          batch_norm_layer.running_mean.copy_(bnl_running_mean)
          batch_norm_layer.running_var.copy_(bnl_running_var)
        else:     # convolutional layer
          num_biases = convol_layer.bias.numel()

          # load weights
          convol_biases = torch.from_numpy(weights[n: n + num_biases])
          n += num_biases

          # cast weights into dimensions of model weights
          convol_biases = convol_biases.view_as(convol_layer.bias.data)

          # copy data to model
          convol_layer.bias.data.copy_(convol_biases)
        
        # weights of convolutional layerss
        num_weights = convol_layer.weight.numel()
        convol_weights = torch.from_numpy(weights[n: n + num_weights])
        n += num_weights
        convol_weights = convol_weights.view_as(convol_layer.weight.data)
        convol_layer.weight.data.copy_(convol_weights)

In [31]:
def predict_transform(predict, input_dim, anchors, num_classes, CUDA = True):
  """
  Transfer input (which is output of forward()) into 2d tensor.
  Each row of the tensor corresponds to attributes of a bounding box.
  """

  batch_size = predict.size(0)
  stride = input_dim // predict.size(2)
  grid_size = input_dim // stride
  bounding_box_attrs = num_classes + 5

  predict = predict.view(batch_size, bounding_box_attrs * len(anchors), grid_size ** 2)
  predict = predict.transpose(1,2).contiguous()
  predict = predict.view(batch_size, grid_size ** 2 * len(anchors), bounding_box_attrs)

  # dimensions of anchors are in accordance to height and width attr of net block
  anchors = [(a[0] / stride, a[1] / stride) for a in anchors]

  # sigmoid x, y coordinates and objectness score
  # center_x, center_y, object_confidence
  predict[:, :, 0] = torch.sigmoid(predict[:, :, 0])
  predict[:, :, 1] = torch.sigmoid(predict[:, :, 1])
  predict[:, :, 4] = torch.sigmoid(predict[:, :, 4])

  # add center offsets
  grid = np.arange(grid_size)
  x, y = np.meshgrid(grid, grid)
  x_offset = torch.FloatTensor(x).view(-1, 1)
  y_offset = torch.FloatTensor(y).view(-1, 1)


  if CUDA:
    x_offset = x_offset.cuda()
    y_offset = y_offset.cuda()
    anchors = anchors.cuda()
  
  xy_offset = torch.cat((x_offset, y_offset), 1).repeat(1, len(anchors)).view(-1, 2).unsqueeze(0)
  predict[:, :, :2] += xy_offset

  # apply anchors to dimensions of bounding box
  anchors = torch.FloatTensor(anchors)
  if CUDA:
    anchors = anchors.cuda()

  anchors = anchors.repeat(grid_size ** 2, 1).unsqueeze(0)

  predict[:, :, 2: 4] = torch.exp(predict[:, :, 2: 4]) * anchors
  # apply sigmoid to class scores
  predict[:, :, 5: num_classes + 5] = torch.sigmoid(predict[:, :, 5: num_classes + 5])
  # resize detections map to size of input image
  predict[:, :, :4] *= stride

  return predict

In [32]:
def parse_cfg(file):
  """
  Parse config from file. Returns a list of blocks.
  Each blocks describes a block in neural network to be built.
  """

  file = open(file, 'r')
  lines = file.read().split('\n')
  lines = [l for l in lines if len(l) > 0]
  lines = [l for l in lines if l[0] != '#']
  lines = [l.rstrip().lstrip() for l in lines]

  b = {}
  blocks = []

  for l in lines:
    if l[0] == "[":                 # Check for new block
      if len(b) != 0:               # Check if block not empty
        blocks.append(b)
        b = {}
      b["type"] = l[1:-1].rstrip()
    else:
      key, value = l.split("=")     # get key-value from line
      b[key.rstrip()] = value.lstrip()

  blocks.append(b)
  return blocks

In [33]:
def create_modules(blocks):
  net = blocks[0]                  # net info about the input and pre-processing
  modules = nn.ModuleList()
  in_channels = 3
  output_filters = []

  for i, x in enumerate(blocks[1:]):
    module = nn.Sequential()
    module_type = x["type"]

    # check type of block
    # create new module for block
    # append to module list (modules variable)
    if module_type == "convolutional":
      activation = x["activation"]
      try:
          batch_normalize = int(x["batch_normalize"])
          bias = False
      except:
          batch_normalize = 0
          bias = True
      
      filters = int(x["filters"])
      padding = int(x["pad"])
      kernel_size = int(x["size"])
      stride = int(x["stride"])

      if padding:
        pad = (kernel_size - 1) // 2
      else:
        pad = 0
      
      # convolutional layer
      convol_layer = nn.Conv2d(
          in_channels=in_channels,
          out_channels=filters,
          kernel_size=kernel_size,
          stride=stride,
          padding=pad,
          bias=bias
      )
      module.add_module("conv_{}".format(i), convol_layer)

      # batch norm layer
      if batch_normalize:
          batch_norm_layer = nn.BatchNorm2d(num_features=filters)
          module.add_module("batch_norm_{}".format(i), batch_norm_layer)
      
      if activation == "leaky":      # linear or leaky relu for yolo
          leaky_layer = nn.LeakyReLU(0.1, inplace=True)
          module.add_module("leaky_{}".format(i), leaky_layer)
    # maxpool layers
    elif module_type == "maxpool":
      kernel_size = int(x["size"])
      stride = int(x["stride"])

      maxpool = nn.MaxPool2d(
          kernel_size=kernel_size,
          stride=stride,
          padding=int((kernel_size - 1) // 2)
      )

      if kernel_size == 2 and stride == 1:
        module.add_module('ZeroPad2d',nn.ZeroPad2d((0, 1, 0, 1)))
        module.add_module('MaxPool2d',maxpool)
      else:
        module = maxpool
    # unsample layers
    elif module_type == "upsample":
      stride = int(x["stride"])
      upsample = nn.Upsample(scale_factor = 2, mode = "nearest")
      module.add_module("upsample_{}".format(i), upsample)
    # route layer
    elif module_type == "route":
      x["layers"] = x["layers"].split(",")
      start = int(x["layers"][0])
      try:
        end = int(x["layers"][1])
      except:
        end = 0
      
      if start > 0:
        start -= i
      if end > 0:
        end -= i

      route = EmptyLayer()
      module.add_module("route_{}".format(i), route)
      if end < 0:
        filters = output_filters[i + start] + output_filters[i + end]
      else:
        filters = output_filters[i + start]
    # shortcut
    elif module_type == "shortcut":
      shortcut = EmptyLayer()
      module.add_module("shortcut_{}".format(i), shortcut)
    # yolo: detection layer
    elif module_type == "yolo":
      mask = x["mask"].split(",")
      mask = [int(m) for m in mask]

      anchors = x["anchors"].split(",")
      anchors = [int(a) for a in anchors]
      anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
      anchors = [anchors[m] for m in mask]

      detection = DetectionLayer(anchors)
      module.add_module("Detection_{}".format(i), detection)

    modules.append(module)
    in_channels = filters
    output_filters.append(filters)
  return (net, modules)

In [34]:
def test_input(file_path, img_size):
    img = cv2.imread(file_path)
    img = cv2.resize(img, img_size)
    img_result = img[:, :, ::-1].transpose((2, 0, 1))     # BGR -> RGB
    img_result = img_result[np.newaxis, :, :, :]/255.0    # Add a channel at 0
    img_result = torch.from_numpy(img_result).float()     # Convert to float
    img_result = Variable(img_result)                     # Convert to Variable
    return img_result

In [35]:
def get_result(prediction, confidence, num_classes, nms_conf=0.4):
  # object confidence thresholding
  # each bounding box having objectness score below a threshold
  # set the value of entrie row representing the bounding box to zero
  conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
  prediction *= conf_mask

  # transform center_x, center_y, height, width of box
  # to top_left_corner_x, top_right_corner_y, right_bottom_corner_x, right_bottom_corner_y 
  box = prediction.new(prediction.shape)
  box[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
  box[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
  box[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
  box[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
  prediction[:, :, :4] = box[:, :, :4]

  batch_size = prediction.size(0)
  check = False

  # the number of true detections in every image may be different
  # confidence thresholding and nms has to be done for one image at conce
  # must loop over the 1st dimension of prediction
  for i in range(batch_size):
    image_prediction = prediction[i]      # image tensor

    # each bounding box has 85 attri
    # 80 attri are class scores
    max_confidence, max_confidence_score = torch.max(image_prediction[:, 5: num_classes + 5], 1)
    max_confidence = max_confidence.float().unsqueeze(1)
    max_confidence_score = max_confidence_score.float().unsqueeze(1)
    image_prediction = torch.cat((image_prediction[:, :5], max_confidence, max_confidence_score), 1)

    non_zero = torch.nonzero(image_prediction[:, 4])
    try:
      image_prediction_ = image_prediction[non_zero.squeeze(), :].view(-1, 7)
    except:
      continue
    
    if image_prediction_.shape[0] == 0:
      continue
    
    # get various classes detected in image
    image_classes = get_unique(image_prediction_[:, -1])

    for c in image_classes:
      # nms
      # get detections with 1 particular class
      class_mask = image_prediction_ * (image_prediction_[:, -1] == c).float().unsqueeze(1)
      class_mask_index = torch.nonzero(class_mask[:, -2]).squeeze()
      image_prediction_class = image_prediction_[class_mask_index].view(-1, 7)

      # sort detection
      # confidence at top
      confidence_sorted_index = torch.sort(image_prediction_class[:, 4], descending=True)[1]
      image_prediction_class = image_prediction_class[confidence_sorted_index]
      index = image_prediction_class.size(0)

      for idx in range(index):
        # get ious of all boxes
        try:
          ious = get_bounding_boxes_iou(image_prediction_class[idx].unsqueeze(0), image_prediction_class[idx + 1:])
        except ValueError:
          break
        except IndexError:
          break
        
        # mark zero all detections iou > threshold
        iou_mask = (ious < nms_conf).float().unsqueeze(1)
        image_prediction_class[idx + 1:] *= iou_mask

        # remove non-zero entries
        non_zero_index = torch.nonzero(image_prediction_class[:, 4]).squeeze()
        image_prediction_class = image_prediction_class[non_zero_index].view(-1, 7)
      
      batch_index = image_prediction_class.new(image_prediction_class.size(0), 1).fill_(i)
      s = batch_index, image_prediction_class

      if not check:
        output = torch.cat(s, 1)
        check = True
      else:
        output = torch.cat((output, torch.cat(s, 1)))
      
  try:
    return output
  except:
    return 0

In [36]:
def get_unique(tensor):
  np_tensor = tensor.cpu().numpy()
  unique = np.unique(np_tensor)
  unique_tensor = torch.from_numpy(unique)
  result = tensor.new(unique_tensor.shape)
  result.copy_(unique_tensor)

  return result

In [37]:
def get_bounding_boxes_iou(b1, b2):
  """
  Returns iou of 2 bouding boxes
  """

  # get coordinates of 2 bounding boxes
  b1_x1, b1_y1, b1_x2, b1_y2 = b1[:, 0], b1[:, 1], b1[:, 2], b1[:, 3]
  b2_x1, b2_y1, b2_x2, b2_y2 = b2[:, 0], b2[:, 1], b2[:, 2], b2[:, 3]

  # get coordinates of overclap rectangle
  x1 = torch.max(b1_x1, b2_x1)
  y1 = torch.max(b1_y1, b2_y1)
  x2 = torch.min(b1_x2, b2_x2)
  y2 = torch.min(b1_y2, b2_y2)

  # overclap area
  area = torch.clamp(x2 - x1 + 1, min=0) * torch.clamp(y2 - y1 + 1, min=0)

  # union area
  b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
  b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)

  return area / (b1_area + b2_area - area)

In [38]:
def resize_image(img, input_dim):
    """
    resize image with unchanged aspect ratio using padding
    """
    width, height = img.shape[1], img.shape[0]
    w, h = input_dim
    new_width = int(width * min(w / width, h / height))
    new_height = int(height * min(w / width, h / height))
    resized_image = cv2.resize(img, (new_width, new_height), interpolation = cv2.INTER_CUBIC)
    
    canvas = np.full((input_dim[1], input_dim[0], 3), 128)
    canvas[(h - new_height) // 2: (h - new_height) // 2 + new_height,(w - new_width) // 2: (w - new_width) // 2 + new_width,  :] = resized_image
    return canvas

In [39]:
def pre_image(img, input_dim):
  """
  Prepare image as input for neural network
  """

  img = resize_image(img, (input_dim, input_dim))
  img = img[:, :, ::-1].transpose((2, 0, 1)).copy()
  img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
  return img

In [40]:
def draw_result(x, results, colors, classes):
  t1 = tuple(x[1: 3].int())
  t2 = tuple(x[3: 5].int())
  img = results[int(x[0])]
  text_font = cv2.FONT_HERSHEY_PLAIN
  cls = int(x[-1])
  color = random.choice(colors)
  label = "{}".format(classes[cls])
  cv2.rectangle(img, t1, t2, color, 1)
  text_size = cv2.getTextSize(label, text_font, 1, 1)[0]
  t2 = t1[0] + text_size[0] + 3, t1[1] + text_size[1] + 4
  cv2.rectangle(img, t1, t2, color, -1)
  text_pos = t1[0], t1[1] + text_size[1] + 4
  cv2.putText(img, label, text_pos, text_font, 1, [255, 255, 255], 1)
  return img

In [41]:
def load_dataset(file_path):
  file = open(file_path, "r")
  names = file.read().split("\n")[:-1]
  return names

In [42]:
from __future__ import division
import time
import torch 
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2 
import argparse
import os 
import os.path as osp
import pickle as pkl
import pandas as pd
import random

def parse_arg():
  """
  Parse arguments to detect module
  """

  parser = argparse.ArgumentParser(description="reYOLO Detection Module")
  parser.add_argument("--images", dest="images", default="/content/dog-cycle-car.png", type=str, help="Image path or directory containing images to perform detection")
  parser.add_argument("--det", dest="det", default="det", type=str, help="Imgage path or directory to store detections")
  parser.add_argument("--bs", dest="bs", default=1, help="Batch size")
  parser.add_argument("--confidence", dest="confidence", default=0.5, help="Object confidence to filter predictions")
  parser.add_argument("--nms", dest="nms", default=0.4, help="NMS Threshold")
  parser.add_argument("--cfg", dest="cfg_file", default="/content/yolov3.cfg", type=str, help="Config file path")
  parser.add_argument("--weights", dest="weights_file", default="/content/yolov3.weights", type=str, help="Weights file path")
  parser.add_argument("--dataset", dest="dataset", default="/content/coco.names", type=str, help="Dataset file path")
  parser.add_argument("--colors", dest="colors_file", default="/content/pallete", type=str, help="Colors file path")

  args, unknown = parser.parse_known_args()
  return args

class ImageDetect():
  def __init__(self):
    args = parse_arg()
    self.images = args.images
    self.cfg_file = args.cfg_file
    self.weights_file = args.weights_file
    self.det = args.det
    self.batch_size = int(args.bs)
    self.confidence = float(args.confidence)
    self.nms = float(args.nms)
    self.CUDA = torch.cuda.is_available()
    self.classes = load_dataset(args.dataset)
    self.num_classes = len(self.classes)
    self.colors_file = args.colors_file
  
  def load_network(self):
    """
    Setup neural network
    """
    self.model = Darknet(self.cfg_file)
    self.model.load_weight(self.weights_file)
    self.input_dim = int(self.model.net["height"])
    assert self.input_dim % 32 == 0
    assert self.input_dim > 32
  
  def get_detections(self):
    self.load_network()
    if self.CUDA:         # if cuda available
      self.model.cuda()
    
    self.model.eval()       # set model in evaluation mode
    read_time = time.time()

    try:
      image_list = [osp.join(osp.realpath("."), self.images, img) for img in os.listdir(self.images)]
    except NotADirectoryError:
      image_list = []
      image_list.append(osp.join(osp.realpath("."), self.images))
    except FileNotFoundError:
      print("No file or directory with name {}".format(self.images))
      exit()

    if not os.path.exists(self.det):
      os.makedirs(self.det)

    load_batch_time = time.time()
    loaded_img_list = [cv2.imread(x) for x in image_list]
    # pytorch variables for images
    img_batches = list(map(pre_image, loaded_img_list, [self.input_dim for i in range(len(image_list))]))
    # dimensions of original images
    img_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_img_list]
    img_dim_list = torch.FloatTensor(img_dim_list).repeat(1, 2)

    # create batches
    left_over = 0
    if len(img_dim_list) % self.batch_size:
      left_over = 1
    
    if self.batch_size != 1:
      num_batches = len(image_list) // self.batch_size + left_over
      img_batches = [torch.car((img_batches[i * self.batch_size: min((i + 1) * self.batch_size, len(img_batches))])) for i in range(num_batches)]
    
    check = 0
    if self.CUDA:
      img_dim_list = img_dim_list.cuda()

    start_detect_loop_time = time.time()

    # detection loop
    for i, batch in enumerate(img_batches):
      start = time.time()
      if self.CUDA:
        batch = batch.cuda()
      with torch.no_grad():
        prediction = self.model(Variable(batch), self.CUDA)
      
      prediction = get_result(prediction, self.confidence, self.num_classes, nms_conf=self.nms)

      end = time.time()
      if type(prediction) == int:
        for img_num, image in enumerate(image_list[i * self.batch_size: min((i + 1) * self.batch_size, len(image_list))]):
          img_id = i * self.batch_size + img_num
          print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start) / self.batch_size))
          print("{0:20s} {1:s}".format("Objects Detected:", ""))
          print("*********************************************")
        continue
      
      # transform attr from index in batch to index in image list
      prediction[:, 0] += i * self.batch_size
      if not check:           # initialize output
        output = prediction
        check = 1
      else:
        output = torch.cat((output, prediction))
      
      for img_num, image in enumerate(image_list[i * self.batch_size: min((i + 1) * self.batch_size, len(image_list))]):
          img_id = i * self.batch_size + img_num
          objects = [self.classes[int(x[-1])] for x in output if int(x[0]) == img_id]
          print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start) / self.batch_size))
          print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objects)))
          print("*********************************************")
      
      if self.CUDA:
        torch.cuda.synchronize()

    # draw bouding boxes on images
    try:
      output
    except NameError:
      print("No detection were made")
      exit()
    
    img_dim_list = torch.index_select(img_dim_list, 0, output[:, 0].long())
    scale_factor = torch.min(self.input_dim / img_dim_list, 1)[0].view(-1, 1)
    output[:, [1, 3]] -= (self.input_dim - scale_factor * img_dim_list[:, 0].view(-1, 1)) / 2
    output[:, [2, 4]] -= (self.input_dim - scale_factor * img_dim_list[:, 1].view(-1, 1)) / 2
    output[:, 1:5] /= scale_factor

    for i in range(output.shape[0]):
      output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, img_dim_list[i, 0])
      output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, img_dim_list[i, 1])
    
    output_recast_time = time.time()
    class_load_time = time.time()
    colors = pkl.load(open(self.colors_file, "rb"))
    draw_time = time.time()

    list(map(lambda x: draw_result(x, loaded_img_list, colors, self.classes), output))
    detect_names = pd.Series(image_list).apply(lambda x: "{}/detect_{}".format(self.det, x.split("/")[-1]))
    list(map(cv2.imwrite, detect_names, loaded_img_list))

    end = time.time()
    print("Results")
    print("*********************************************")
    print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
    print("{:25s}: {:2.3f}".format("Reading", load_batch_time - read_time))
    print("{:25s}: {:2.3f}".format("Loading batch", start_detect_loop_time - load_batch_time))
    print("{:25s}: {:2.3f}".format("Detection (" + str(len(image_list)) +  " images)", output_recast_time - start_detect_loop_time))
    print("{:25s}: {:2.3f}".format("Output processing", class_load_time - output_recast_time))
    print("{:25s}: {:2.3f}".format("Drawing boxes", end - draw_time))
    print("{:25s}: {:2.3f}".format("Average time per img", (end - load_batch_time) / len(image_list)))

    torch.cuda.empty_cache()

test = ImageDetect()
test.get_detections()

dog-cycle-car.png    predicted in  2.362 seconds
Objects Detected:    bicycle truck dog
*********************************************
Results
*********************************************
Task                     : Time Taken (in seconds)
Reading                  : 0.000
Loading batch            : 0.022
Detection (1 images)     : 2.364
Output processing        : 0.000
Drawing boxes            : 0.017
Average time per img     : 2.403


In [51]:
from google.colab.patches import cv2_imshow

from __future__ import division
import time
import torch 
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2 
import argparse
import os 
import os.path as osp
import pickle as pkl
import pandas as pd
import random

def parse_arg():
  """
  Parse arguments to detect module
  """

  parser = argparse.ArgumentParser(description="reYOLO Detection Module")
  parser.add_argument("--video", dest="video_file", default="/content/videoplayback.mp4", type=str, help="Image path or directory containing images to perform detection")
  parser.add_argument("--bs", dest="bs", default=1, help="Batch size")
  parser.add_argument("--confidence", dest="confidence", default=0.5, help="Object confidence to filter predictions")
  parser.add_argument("--nms", dest="nms", default=0.4, help="NMS Threshold")
  parser.add_argument("--cfg", dest="cfg_file", default="/content/yolov3.cfg", type=str, help="Config file path")
  parser.add_argument("--weights", dest="weights_file", default="/content/yolov3.weights", type=str, help="Weights file path")
  parser.add_argument("--dataset", dest="dataset", default="/content/coco.names", type=str, help="Dataset file path")
  parser.add_argument("--colors", dest="colors_file", default="/content/pallete", type=str, help="Colors file path")

  args, unknown = parser.parse_known_args()
  return args

class VideoDetect():
  def __init__(self):
    args = parse_arg()
    self.video_file = args.video_file
    self.batch_size = args.bs
    self.confidence = args.confidence
    self.nms = args.nms
    self.cfg_file = args.cfg_file
    self.weights_file = args.weights_file
    self.classes = load_dataset(args.dataset)
    self.num_classes = len(self.classes)
    self.colors_file = args.colors_file
    self.CUDA = torch.cuda.is_available()
  
  def load_network(self):
    """
    Setup neural network
    """
    self.model = Darknet(self.cfg_file)
    self.model.load_weight(self.weights_file)
    self.input_dim = int(self.model.net["height"])
    assert self.input_dim % 32 == 0
    assert self.input_dim > 32
  
  def get_detections(self):
    self.load_network()
    if self.CUDA:         # if cuda available
      self.model.cuda()
    
    self.model.eval()     # set model in evaluation mode

    # get video capture from source (file/webcam)
    cap = cv2.VideoCapture(self.video_file)
    # cap = cv2.VideoCapture(0)   # webcam
    assert cap.isOpened(), 'Cannot captutre video source'
    
    frames = 0
    start = time.time()
    while cap.isOpened():
      ret, frame = cap.read()

      if ret:
        image = pre_image(frame, self.input_dim)
        img_dim = frame.shape[1], frame.shape[0]
        img_dim = torch.FloatTensor(img_dim).repeat(1, 2)

        if self.CUDA:
          img_dim = img_dim.cuda()
          image = image.cuda()
        
        with torch.no_grad():
          prediction = self.model(Variable(image, volatile=True), self.CUDA)
        prediction = get_result(prediction, self.confidence, self.num_classes, nms_conf=self.nms)
        if type(prediction) == int:
          frames += 1
          print("FPS: {:5.4f}".format(frames / (time.time() - start)))
          # cv2.imshow("frame", frame)
          cv2_imshow(frame)
          key = cv2.waitKey(1)
          if key & 0xFF == ord('q'):    # exit if press q
            break
          continue
        
        img_dim = img_dim.repeat(prediction.size(0), 1)
        scale_factor = torch.min(self.input_dim / img_dim, 1)[0].view(-1, 1)
        prediction[:, [1, 3]] -= (self.input_dim - scale_factor * img_dim[:, 0].view(-1, 1)) / 2
        prediction[:, [2, 4]] -= (self.input_dim - scale_factor * img_dim[:, 1].view(-1, 1)) / 2
        prediction[:, 1: 5] /= scale_factor

        for i in range(prediction.shape[0]):
          prediction[i, [1, 3]] = torch.clamp(prediction[i, [1, 3]], 0.0, img_dim[i, 0])
          prediction[i, [2, 4]] = torch.clamp(prediction[i, [2, 4]], 0.0, img_dim[i, 1])
        
        list(map(lambda x: draw_result(x, frame, self.colors, self.classes), prediction))
        # cv2.imshow("frame", frame)
        cv2_imshow(frame)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
          break
        frames += 1
        t = time.time() - start
        print("Predicted in {1:6.3f} seconds".format(t))
        print("FPS: {:5.2f}".format(frames / (time.time() - start)))
      else:
        break

test = VideoDetect()
test.get_detections()

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
curl: (51) SSL: no alternative certificate subject name matches target host name 'www.sample-videos.com'


AssertionError: ignored