<a href="https://colab.research.google.com/github/rdesarz/cnnmot/blob/develop/notebook/yolov3_object_detection_colab_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Object detection based on Yolo V3
This notebook is inspired by the following tutorials: https://machinelearningmastery.com/how-to-perform-object-detection-with-yolov3-in-keras/. The main goal is to try to use Yolo v3 model to detect and classify object in a scene. 

## Environment setup
The next step are required to configure properly the environment



In [0]:
from google.colab import drive
from google.colab import files
import os

# upload ssh key
os.chdir("/content")
uploaded = files.upload()

Install cnnmot package

In [0]:
!pip3 install --upgrade git+https://github.com/rdesarz/cnnmot.git

In [0]:
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
import io
from PIL import Image

webcam_input_html = """
<video autoplay
 width=%d height=%d style='cursor: pointer;'></video>
<script>

var video = document.querySelector('video')

navigator.mediaDevices.getUserMedia({ video: true })
  .then(stream=> video.srcObject = stream)
  
var data = new Promise(resolve=>{
  video.onclick = ()=>{
    var canvas = document.createElement('canvas')
    var [w,h] = [video.offsetWidth, video.offsetHeight]
    canvas.width = w
    canvas.height = h
    canvas.getContext('2d')
          .drawImage(video, 0, 0, w, h)
    video.srcObject.getVideoTracks()[0].stop()
    video.replaceWith(canvas)
    resolve(canvas.toDataURL('image/jpeg', %f))
  }
})
</script>
"""

def get_camera_input(filename='photo.jpg', quality=0.8, size=(800,600)):
  display(HTML(webcam_input_html % (size[0],size[1],quality)))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])
  f = io.BytesIO(binary)
  return np.asarray(Image.open(f))

frame = get_camera_input()

In [0]:
import cnnmot.yolo.model as yolo_model

# Download the weights of yolov3 model
!wget  https://pjreddie.com/media/files/yolov3.weights
model = yolo_model.make_yolov3_model()
# load the model weights
weight_reader = yolo_model.WeightReader('yolov3.weights')
# set the model weights into the model
weight_reader.load_weights(model)

In [0]:
from numpy import expand_dims
from keras.models import load_model
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from cnnmot.yolo.preprocessing import process_image
from cnnmot.yolo.postprocessing import do_nms, correct_yolo_boxes, decode_netout, get_boxes
from cnnmot.yolo.output import draw_boxes

# Setup model variables
input_shape = (416, 416)
threshold = 0.6
labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
          "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
          "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
          "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
          "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
          "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
          "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
          "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
          "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
          "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
anchors = [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]

# preprocess image to input it in the network
image, image_w, image_h = process_image(frame, input_shape)
# make prediction
yhat = model.predict(image)
boxes = list()
for i in range(len(yhat)):
    # decode the output of the network
    boxes += decode_netout(yhat[i][0], anchors[i], threshold, input_shape[0], input_shape[1])
# correct the sizes of the bounding boxes for the shape of the image
correct_yolo_boxes(boxes, image_h, image_w, input_shape[0], input_shape[1])
# suppress non-maximal boxes
do_nms(boxes, 0.5)
# get the details of the detected objects
v_boxes, v_labels, v_scores = get_boxes(boxes, labels, threshold)
# draw the resulting prediction
draw_boxes(frame, v_boxes, v_labels, v_scores)