# **Object Detection with OpenCV**
![대체 텍스트](https://www.pyimagesearch.com/wp-content/uploads/2017/09/example06_result.jpg)

we’ll discuss how to apply object detection using deep learning and OpenCV.

When it comes to deep learning-based object detection there are three primary object detection methods that you’ll likely encounter:



1.   Faster R-CNNs (Girshick et al., 2015) - 7 FPS.
2.   You Only Look Once (YOLO) (Redmon and Farhadi, 2015) - 40-90 FPS.
3.   Single Shot Detectors (SSDs) (Liu et al., 2015) - 22-46 FPS.

object detection networks we normally use an existing network architecture



1.   VGG
2.   ResNet
3.   MobileNets

these network architectures(VGG, ResNet) can be very large in the order of 200-500MB. could be unsuitable for resource constrained devices.
“MobileNets", they are designed for resource constrained devices such as your smartphone.  



**MobileNet architecture and the Single Shot Detector (SSD) framework, we arrive at a fast, efficient deep learning-based method to object detection.**











# Data Load of image, video and model

In [1]:
%%shell
mkdir objectDetection
cd objectDetection
mkdir model
curl https://raw.githubusercontent.com/djmv/MobilNet_SSD_opencv/master/MobileNetSSD_deploy.prototxt > ./model/MobileNetSSD_deploy.prototxt
curl https://github.com/djmv/MobilNet_SSD_opencv/blob/master/MobileNetSSD_deploy.caffemodel > ./model/MobileNetSSD_deploy.caffemodel
mkdir images
wget https://raw.githubusercontent.com/djmv/MobilNet_SSD_opencv/master/img.jpeg -P./images

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100 29353  100 29353    0     0   139k      0 --:--:-- --:--:-- --:--:--  139k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 80275    0 80275    0     0   237k      0 --:--:-- --:--:-- --:--:--  237k
--2020-08-04 12:29:22--  https://raw.githubusercontent.com/djmv/MobilNet_SSD_opencv/master/img.jpeg
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 337503 (330K) [image/jpeg]
Saving to: ‘./images/img.jpeg’


2020-08-04 



In [2]:
# import the necessary packages
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
import os

In [27]:
# Root directory of the project
ROOT_DIR = os.path.abspath("./objectDetection/")
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "model")
# Directory of images/video to run detection on
IMAGE_DIR = os.path.join(ROOT_DIR, "images")

prototxtPath = os.path.sep.join([MODEL_DIR, "MobileNetSSD_deploy.prototxt"])  
weightsPath = os.path.sep.join([MODEL_DIR, "MobileNetSSD_deploy.caffemodel"]) 
objectNet = cv2.dnn.readNetFromCaffe(prototxtPath, weightsPath)

In [4]:
# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat","bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
	          "dog", "horse", "motorbike", "person", "pottedplant", "sheep","sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
conf_threshold = 0.25     

# **1**.Detection model demo by picture

In [6]:
# load the input image
image_list = []
for file in os.listdir(IMAGE_DIR):
  file_path = os.path.join(IMAGE_DIR, file)
  if(os.path.isfile(file_path)): 
    image = cv2.imread(file_path)
    print("The image of {} : {}".format(file, image.shape))
    image_list.append(image)   

The image of img.jpeg : (1058, 1880, 3)


In [7]:
# construct an input blob for the image
blob_list = []
for image in image_list:
  blob = cv2.dnn.blobFromImage(image, 0.007843, (300, 300), 127.5)   
  blob_list.append(blob)

print("The Blob : {}".format(np.array(blob_list).shape))

The Blob : (1, 1, 3, 300, 300)


In [8]:
# pass the blob through the network and obtain the detections and predictions
detections_list = []
for blob in blob_list:
  objectNet.setInput(blob)
  detections = objectNet.forward()    
  detections_list.append(detections)
print("The Detections: {}".format(np.array(detections_list).shape))

The Detections: (1, 1, 1, 100, 7)


In [10]:
# loop over the detections
for i, detections in enumerate(detections_list):
	for j in np.arange(0, detections.shape[2]):
		confidence = detections[0, 0, j, 2]

		if confidence > conf_threshold:
			idx = int(detections[0, 0, j, 1])
			box = detections[0, 0, j, 3:7]* np.array([image_list[i].shape[1], image_list[i].shape[0], image_list[i].shape[1], image_list[i].shape[0]])    
			(startX, startY, endX, endY) = box.astype("int")

			label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
			cv2.rectangle(image_list[i], (startX, startY), (endX, endY), COLORS[idx], 2)
			y = startY - 15 if startY - 15 > 15 else startY + 15
			cv2.putText(image_list[i], label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)


In [11]:
# show the output image
for image in image_list:
  cv2_imshow(image)

Output hidden; open in https://colab.research.google.com to view.

# **2**.Detection model demo by video

In [None]:
%%shell
cd objectDetection
mkdir videos
# wget https://github.com/Tony607/blog_statics/releases/download/v1.0/trailer1.mp4 -P ./videos
cd videos
mkdir save

In [13]:
VIDEO_DIR = os.path.join(ROOT_DIR, "videos")
VIDEO_SAVE_DIR = os.path.join(VIDEO_DIR, "save")

## Processing Funtions

In [14]:
import base64, logging
import numpy as np
from PIL import Image
from io import BytesIO

def data_uri_to_img(uri):
  try:
    image = base64.b64decode(uri.split(',')[1], validate=True)
    image = Image.open(BytesIO(image))
    image = np.array(image, dtype=np.uint8); 
    return image
  except Exception as e:
    logging.exception(e);print('\n')
    return None

def video_to_data_url(filename):
    ext = filename.split('.')[-1]
    prefix = 'data:video/{};base64,'.format(ext)
    with open(filename, 'rb') as f:
        vidoe = f.read()
    return prefix + base64.b64encode(vidoe).decode()

In [15]:
def detect_and_predict_object(frame):
  height, width = frame.shape[:2]
  
  blob = cv2.dnn.blobFromImage(frame, 0.007843, (300, 300), 127.5)  
  objectNet.setInput(blob)
  detections = objectNet.forward()  

  for i in np.arange(0, detections.shape[2]):
    confidence = detections[0, 0, i, 2]

    if confidence > conf_threshold:           
      idx = int(detections[0, 0, i, 1])
      box = detections[0, 0, i, 3:7]* np.array([width, height, width, height])    
      (startX, startY, endX, endY) = box.astype("int")

      label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
      cv2.rectangle(frame, (startX, startY), (endX, endY), COLORS[idx], 2)
      y = startY - 15 if startY - 15 > 15 else startY + 15
      cv2.putText(frame, label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)



## Videdo Capture
Using a webcam to capture images for processing on the runtime.
Source: https://colab.research.google.com/notebooks/snippets/advanced_outputs.ipynb#scrollTo=2viqYx97hPMi




In [16]:
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode

# playing webcam or video with javascript
def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''     
    async function takePhoto(filename, quality) {
                  
      const div = document.createElement('div');
      document.body.appendChild(div);

      const exit = document.createElement('button');
      exit.textContent = 'Exit';
      div.appendChild(exit);

      const video = document.createElement('video');
      video.style.display = 'block';
           
      if('photo.jpg' == filename){
        const stream = await navigator.mediaDevices.getUserMedia({video: true}); 
        video.srcObject = stream;   
      }else{
        video.src = filename;
        video.type="video/mp4"
      }
      await video.play();  
      div.appendChild(video);       
                   

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
      
      let jsLog = function(abc) {
        document.querySelector("#output-area").appendChild(document.createTextNode(`${abc}... `));
      }

      // when Exit button is clicked.   
      var isOpened = true; 
      var exitPromise = new Promise((resolve) => {exit.onclick = resolve});   
      exitPromise.then(()=>{isOpened = false; stream.getVideoTracks()[0].stop();});
      
      //when end of video
      var endPromise = new Promise((resolve) => {video.onended = resolve});   
      endPromise.then(()=>{isOpened = false; video.stop();});

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;

      for (let i = 0; isOpened; i++) {
        canvas.getContext("2d").clearRect(0, 0, canvas.width, canvas.height);
        canvas.getContext('2d').drawImage(video, 0, 0);
        img = canvas.toDataURL('image/jpeg', quality);

        // jsLog(i + "sending");
        // Call a python function and send this image
        google.colab.kernel.invokeFunction('notebook.run_objectDetection', [img], {});
        // jsLog(i + "SENT");

        // wait for X miliseconds second, before next capture
        await new Promise(resolve => setTimeout(resolve, 250));        
      }       
      div.remove();      
    }    
    ''')  
  # make the provided HTML, part of the cell
  display(js)
  #call the takePhoto() JavaScript function
  eval_js('takePhoto({},{})'.format("'"+filename+"'", quality)) 

In [17]:
from google.colab import output
frame_count = 0
writer = None

# InvokeFunction
def run_objectDetection(uri): 
  global frame_count, writer

  image = data_uri_to_img(uri)   
  if writer is None:		
      fourcc = cv2.VideoWriter_fourcc(*'DIVX')  
      writer = cv2.VideoWriter(outVideo, fourcc, 2, (image.shape[1], image.shape[0]), True)
  try:    
    detect_and_predict_object(image)    

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    frame_count+=1    
    name = '{0}.jpg'.format(frame_count)
    name = os.path.join(VIDEO_SAVE_DIR, name)
    cv2.imwrite(name, image)

    if writer is not None:
      writer.write(image)
  except Exception as e:
    logging.exception(e)
    print('\n')

# register this function, so JS code could call this
output.register_callback('notebook.run_objectDetection', run_objectDetection)

## Apply Detection model

In [26]:
 %%shell
 cd objectDetection
 rm ./videos/save/* 
 rm ./videos/out.avi



In [21]:
inVideo = os.path.join(VIDEO_DIR, "pedestrian.mp4")
outVideo= os.path.join(VIDEO_DIR, "out.avi")

In [None]:
data_url = video_to_data_url(inVideo)
try: 
  # put the JS code in cell and run it
  take_photo()  
  if writer is not None:
    writer.release()
except Exception as e:
  logging.exception(e)
  print('\n')

## Downlod  to our local machine

In [23]:
from google.colab import files
files.download(outVideo)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>