<a href="https://colab.research.google.com/github/tonyscan6003/CE6003/blob/master/Example_5_2_CNN_Object_det.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Example 5_2: Simple Object Detection in Video

This example notebook demonstrates object detection on Youtube videos using Tensorflow Object detection API & openCV. This notebook can be used as a starting point for object detection & tracking based projects.
![link text](https://github.com/tonyscan6003/CE6003/blob/master/images/plan_land_example_5_2.JPG?raw=true)


Note that this notebook only demonstrates raw object detection with the Pre-trained Tensorflow object detection models on a video stream. [Object tracking](https://arxiv.org/abs/1907.12740) would also be required to be implemeted to ensure consistent identification of an object from frame to frame. (The notebook is configured to display detection of just one object type. See step 4 to change detected object type)

In step 3 below the object detection model can be set from a selection of models available in Tensorflow object detection API including SSD and RCNN implementations.

In step 4 the test video can be changed along with the target object, the number of frames processed and the starting frame in the video. (Note that 1 second of video may correspond to 30 or 60 frames depending on the source video frame rate)


 Mount google drive to store output video

In [None]:
# Mount google drive
#https://stackoverflow.com/questions/46986398/import-data-into-google-colaboratory?rq=1
from google.colab import drive
drive.mount('/content/gdrive')

#Step 1:
Obtain access to youtube video for object detection. Using [Pafy](https://pypi.org/project/pafy/) a Python library to download YouTube content and retrieve metadata.

In [None]:
!pip install pafy
!pip install --upgrade youtube_dl
import cv2, pafy
from google.colab.patches import cv2_imshow


#Step 2: 
Setup [Tensorflow object detection API](https://github.com/tensorflow/models/tree/master/research/object_detection)

In [None]:
import os
import pathlib
from tqdm import tqdm  
import matplotlib
import matplotlib.pyplot as plt

import io
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from six.moves.urllib.request import urlopen

import tensorflow as tf
import tensorflow_hub as hub

tf.get_logger().setLevel('ERROR')
print(tf.__version__)
import numpy as np

In [None]:
# Clone the tensorflow models repository
!git clone --depth 1 https://github.com/tensorflow/models

In [None]:
%%bash
sudo apt install -y protobuf-compiler
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [None]:
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops

%matplotlib inline


In [None]:
def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: the file path to the image

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  image = None
  if(path.startswith('http')):
    response = urlopen(path)
    image_data = response.read()
    image_data = BytesIO(image_data)
    image = Image.open(image_data)
  else:
    image_data = tf.io.gfile.GFile(path, 'rb').read()
    image = Image.open(BytesIO(image_data))

  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (1, im_height, im_width, 3)).astype(np.uint8)


ALL_MODELS = {
'CenterNet HourGlass104 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512/1',
'CenterNet HourGlass104 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1',
'CenterNet HourGlass104 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024/1',
'CenterNet HourGlass104 Keypoints 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024_kpts/1',
'CenterNet Resnet50 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1',
'CenterNet Resnet50 V1 FPN Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512_kpts/1',
'CenterNet Resnet101 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet101v1_fpn_512x512/1',
'CenterNet Resnet50 V2 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512/1',
'CenterNet Resnet50 V2 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512_kpts/1',
'EfficientDet D0 512x512' : 'https://tfhub.dev/tensorflow/efficientdet/d0/1',
'EfficientDet D1 640x640' : 'https://tfhub.dev/tensorflow/efficientdet/d1/1',
'EfficientDet D2 768x768' : 'https://tfhub.dev/tensorflow/efficientdet/d2/1',
'EfficientDet D3 896x896' : 'https://tfhub.dev/tensorflow/efficientdet/d3/1',
'EfficientDet D4 1024x1024' : 'https://tfhub.dev/tensorflow/efficientdet/d4/1',
'EfficientDet D5 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d5/1',
'EfficientDet D6 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d6/1',
'EfficientDet D7 1536x1536' : 'https://tfhub.dev/tensorflow/efficientdet/d7/1',
'SSD MobileNet v2 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2',
'SSD MobileNet V1 FPN 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v1/fpn_640x640/1',
'SSD MobileNet V2 FPNLite 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1',
'SSD MobileNet V2 FPNLite 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_640x640/1',
'SSD ResNet50 V1 FPN 640x640 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_640x640/1',
'SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_1024x1024/1',
'SSD ResNet101 V1 FPN 640x640 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_640x640/1',
'SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_1024x1024/1',
'SSD ResNet152 V1 FPN 640x640 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_640x640/1',
'SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_1024x1024/1',
'Faster R-CNN ResNet50 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1',
'Faster R-CNN ResNet50 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_1024x1024/1',
'Faster R-CNN ResNet50 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_800x1333/1',
'Faster R-CNN ResNet101 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_640x640/1',
'Faster R-CNN ResNet101 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_1024x1024/1',
'Faster R-CNN ResNet101 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_800x1333/1',
'Faster R-CNN ResNet152 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_640x640/1',
'Faster R-CNN ResNet152 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_1024x1024/1',
'Faster R-CNN ResNet152 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_800x1333/1',
'Faster R-CNN Inception ResNet V2 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1',
'Faster R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_1024x1024/1',
'Mask R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1'
}


In [None]:
PATH_TO_LABELS = './models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

#Step 3: Select Model
In the code cell below, the object detection model can be selected from the list above.

In [None]:
model_display_name = 'CenterNet HourGlass104 512x512'
model_handle = ALL_MODELS[model_display_name]

print('Selected model:'+ model_display_name)
print('Model Handle at TensorFlow Hub: {}'.format(model_handle))

In [None]:
print('loading model...')
hub_model = hub.load(model_handle)
print('model loaded!')

#Step 4
Run model and process video (frame by frame), write an output video with bounding box added. (Note that more information including class is available from the object detector and can be annotated to the video)

Note that the object to be detected in the video must be selected from the [MS COCO labels](https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/). Set the `obj_label` to the correct label value from the list to identify the object of interest.





In [None]:
# Configuration
url = "https://www.youtube.com/watch?v=3FXUw98rrUY"   # Recognising pedestrians (set obj_label =1 for Person)
#url = "https://www.youtube.com/watch?v=Psw6uL8x8Ak"     # Ariport take off (set obj_label =5 for Aiplane)

no_frames = 250         # Set total number of frames
strt_frame = 300         # Set starting frame
obj_label = 1           # Set object to be detected (MS COCO label)

In [None]:

# Helper function to Add bounding box to current frame 
# Applies this to single object in frame
greenColor = (0, 255, 0)
lineThickness = 2

def box_2_pixel(bb,frame):
    
    bb = results['detection_boxes'][0]
    score = results['detection_scores'][0]
    obj_class = tf.cast((results['detection_classes'][0]),tf.int32)
    x_pix = np.shape(frame)[0]
    y_pix = np.shape(frame)[1]
    ptr = 0
    # Add bounding box to image frames.
    for boxes in bb:
       # Format convert to rectangular from Corner
       y_TL = tf.cast((x_pix*(boxes[0])),tf.uint16)
       x_TL = tf.cast((y_pix*(boxes[1])),tf.uint16)
       y_BR = tf.cast((x_pix*(boxes[2])),tf.uint16)
       x_BR = tf.cast((y_pix*(boxes[3])),tf.uint16) 
       bb_cen = (tf.cast((y_pix*(boxes[1]+boxes[3])/2),tf.uint16),tf.cast((x_pix*(boxes[0]+boxes[2])/2),tf.uint16))
       # Add 
       if score[ptr]>0.5 and obj_class[ptr]==obj_label:
          # Add bounding box rectangle to current frame
          cv2.rectangle(frame, (int(x_TL), int(y_TL)), (int(x_BR),int(y_BR)), greenColor, lineThickness)
       ptr+=1   

In [None]:
# Use Pafy to read video frames, apply tensorflow model and write to video
# using OpenCV writen functions. 
writer = None
video = pafy.new(url)
frame_no =0             # Frame Counter Intialised to 0
print(video.title)
streams = video.streams

for s in streams:
    print(s.resolution, s.extension, s.get_filesize(), s.url)
best  = video.getbest(preftype="mp4")
capture = cv2.VideoCapture(best.url)
capture.set(1,strt_frame) 

for vals in tqdm(range(no_frames)):
  # Read in frames
  check, frame = capture.read()

  if frame is None:
    break

  # Process with tracker/object detector.
  results = hub_model(np.expand_dims(np.asarray(frame),axis=0))
  frame_no+=1

  # draw the final bounding boxes
  box_2_pixel(results,frame)

  # Build a frame of our output video
  if writer is None:
    # Initialize our video writer
    fourcc = cv2.VideoWriter_fourcc(*'VP80') #codec
    writer = cv2.VideoWriter('video.webm', fourcc, 30, (frame.shape[1], frame.shape[0]), True)

  # Write to video
  writer.write(frame)

writer.release()  


In [None]:
!cp video.webm /content/gdrive/MyDrive/video.webm
#!ls /content/gdrive/MyDrive

In [None]:
from IPython.display import HTML
from base64 import b64encode
webm = open('video.webm','rb').read()
data_url = "data:video/webm;base64," + b64encode(webm).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/webm">
</video>
""" % data_url)