# OpenCV Faster R-CNN

> ## Video Object Detection

* Pretrained 모델 파일 로드 후 OpenCV 기반 Object Detection 수행

In [1]:
import warnings
warnings.filterwarnings('ignore')

# I. Video File Download

* matrix.mp4 다운로드

In [2]:
!wget https://raw.githubusercontent.com/rusita-ai/pyData/master/image/matrix.mp4

--2023-04-17 04:04:05--  https://raw.githubusercontent.com/rusita-ai/pyData/master/image/matrix.mp4
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 386522 (377K) [application/octet-stream]
Saving to: ‘matrix.mp4’


2023-04-17 04:04:05 (80.2 MB/s) - ‘matrix.mp4’ saved [386522/386522]



* matrix.mp4 확인

In [3]:
import io
import base64
from IPython.display import HTML

video = io.open('/content/matrix.mp4', 'r+b').read()

encoded = base64.b64encode(video)

HTML(data='''<video width = "70%" controls>
    <source src = "data:video/mp4;base64,{0}" type = "video/mp4"/>
    </video>'''.format(encoded.decode('ascii')))

# II. Video Frame Information

> ## 1) VideoCapture( )

* cv2.CAP_PROP_FRAME_COUNT

In [4]:
import cv2
video_input_path = '/content/matrix.mp4'
cap = cv2.VideoCapture(video_input_path)
frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print('Frame Count:', frame_cnt)

Frame Count: 134


> ## 2) VideoWriter( )

* Encoding Codec and FPS and Size

In [5]:
video_input_path = '/content/matrix.mp4'
video_output_path = '/content/matrix_rcnn.mp4'
cap = cv2.VideoCapture(video_input_path)

codec = cv2.VideoWriter_fourcc(*'XVID')
vid_size = (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
vid_fps = cap.get(cv2.CAP_PROP_FPS )
vid_writer = cv2.VideoWriter(video_output_path, codec, vid_fps, vid_size)
print('FPS Count:', vid_fps)
print('Video Size:', vid_size)

FPS Count: 30.0
Video Size: (640, 360)


# III. Object Detection API Download

> ## 1) Pretrained Model Download

* https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API
* http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz
* faster_rcnn_resnet50_coco_2018_01_28.tar.gz 다운로드

In [6]:
!wget http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz

--2023-04-17 04:06:53--  http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 172.253.62.128, 2607:f8b0:4004:c1b::80
Connecting to download.tensorflow.org (download.tensorflow.org)|172.253.62.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 381355771 (364M) [application/x-tar]
Saving to: ‘faster_rcnn_resnet50_coco_2018_01_28.tar.gz’


2023-04-17 04:06:54 (272 MB/s) - ‘faster_rcnn_resnet50_coco_2018_01_28.tar.gz’ saved [381355771/381355771]



* faster_rcnn_resnet50_coco_2018_01_28.tar.gz 압축해제

In [7]:
!tar -xvf faster_rcnn_resnet50_coco_2018_01_28.tar.gz

faster_rcnn_resnet50_coco_2018_01_28/
faster_rcnn_resnet50_coco_2018_01_28/model.ckpt.index
faster_rcnn_resnet50_coco_2018_01_28/checkpoint
faster_rcnn_resnet50_coco_2018_01_28/pipeline.config
faster_rcnn_resnet50_coco_2018_01_28/model.ckpt.data-00000-of-00001
faster_rcnn_resnet50_coco_2018_01_28/model.ckpt.meta
faster_rcnn_resnet50_coco_2018_01_28/saved_model/
faster_rcnn_resnet50_coco_2018_01_28/saved_model/saved_model.pb
faster_rcnn_resnet50_coco_2018_01_28/saved_model/variables/
faster_rcnn_resnet50_coco_2018_01_28/frozen_inference_graph.pb


* 결과 확인
  * frozen_inference_graph.pb

In [8]:
!ls -l /content/faster_rcnn_resnet50_coco_2018_01_28

total 296068
-rw-r--r-- 1 345018 5000        77 Feb  1  2018 checkpoint
-rw-r--r-- 1 345018 5000 120549957 Feb  1  2018 frozen_inference_graph.pb
-rw-r--r-- 1 345018 5000 176914228 Feb  1  2018 model.ckpt.data-00000-of-00001
-rw-r--r-- 1 345018 5000     14460 Feb  1  2018 model.ckpt.index
-rw-r--r-- 1 345018 5000   5675175 Feb  1  2018 model.ckpt.meta
-rw-r--r-- 1 345018 5000      3240 Feb  1  2018 pipeline.config
drwxr-xr-x 3 345018 5000      4096 Feb  1  2018 saved_model


> ## 2) Model Configuration Download

* https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/faster_rcnn_resnet50_coco_2018_01_28.pbtxt

In [9]:
!wget https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/faster_rcnn_resnet50_coco_2018_01_28.pbtxt

--2023-04-17 04:13:13--  https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/faster_rcnn_resnet50_coco_2018_01_28.pbtxt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 90893 (89K) [text/plain]
Saving to: ‘faster_rcnn_resnet50_coco_2018_01_28.pbtxt’


2023-04-17 04:13:13 (31.6 MB/s) - ‘faster_rcnn_resnet50_coco_2018_01_28.pbtxt’ saved [90893/90893]



# IV. Model and Configuration load

* v2.dnn.readNetFromTensorflow()

In [10]:
cv_net = cv2.dnn.readNetFromTensorflow('/content/faster_rcnn_resnet50_coco_2018_01_28/frozen_inference_graph.pb',
                                       '/content/faster_rcnn_resnet50_coco_2018_01_28.pbtxt')

# V. Class ID Setting

* COCO Dataset

> ## 1) OpenCV - TensorFlow

* Faster R-CNN

In [11]:
labels_to_names_0 = {0:'person',1:'bicycle',2:'car',3:'motorcycle',4:'airplane',5:'bus',6:'train',7:'truck',8:'boat',9:'traffic light',
                     10:'fire hydrant',11:'street sign',12:'stop sign',13:'parking meter',14:'bench',15:'bird',16:'cat',17:'dog',18:'horse',19:'sheep',
                     20:'cow',21:'elephant',22:'bear',23:'zebra',24:'giraffe',25:'hat',26:'backpack',27:'umbrella',28:'shoe',29:'eye glasses',
                     30:'handbag',31:'tie',32:'suitcase',33:'frisbee',34:'skis',35:'snowboard',36:'sports ball',37:'kite',38:'baseball bat',39:'baseball glove',
                     40:'skateboard',41:'surfboard',42:'tennis racket',43:'bottle',44:'plate',45:'wine glass',46:'cup',47:'fork',48:'knife',49:'spoon',
                     50:'bowl',51:'banana',52:'apple',53:'sandwich',54:'orange',55:'broccoli',56:'carrot',57:'hot dog',58:'pizza',59:'donut',
                     60:'cake',61:'chair',62:'couch',63:'potted plant',64:'bed',65:'mirror',66:'dining table',67:'window',68:'desk',69:'toilet',
                     70:'door',71:'tv',72:'laptop',73:'mouse',74:'remote',75:'keyboard',76:'cell phone',77:'microwave',78:'oven',79:'toaster',
                     80:'sink',81:'refrigerator',82:'blender',83:'book',84:'clock',85:'vase',86:'scissors',87:'teddy bear',88:'hair drier',89:'toothbrush',
                     90:'hair brush'}

# VI. Object Detection

* 약 20분
* 총 Frame 별로 iteration 하면서 Object Detection 수행. 개별 frame별로 단일 이미지 Object Detection과 유사

In [12]:
%%time

import time

green_color = (0, 255, 0)
red_color = (0, 0, 255)

while True:
  hasFrame, img_frame = cap.read()
  if not hasFrame:
    print('No more Frames')
    break

  rows = img_frame.shape[0]
  cols = img_frame.shape[1]
  cv_net.setInput(cv2.dnn.blobFromImage(img_frame, swapRB = True))
  start= time.time()
  cv_out = cv_net.forward()
  frame_index = 0

  for detection in cv_out[0, 0, :, :]:
    score = float(detection[2])
    class_id = int(detection[1])
    if score > 0.5:
      left = detection[3] * cols
      top = detection[4] * rows
      right = detection[5] * cols
      bottom = detection[6] * rows
      caption = "{}: {:.4f}".format(labels_to_names_0[class_id], score)
      cv2.rectangle(img_frame, (int(left), int(top)), (int(right), int(bottom)), color = green_color, thickness = 2)
      cv2.putText(img_frame, caption, (int(left), int(top - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, red_color, 1)
  print('Detection Time:', round(time.time() - start, 2), '초')
  vid_writer.write(img_frame)

vid_writer.release()
cap.release()

Detection Time: 7.49 초
Detection Time: 7.71 초
Detection Time: 7.91 초
Detection Time: 6.84 초
Detection Time: 8.05 초
Detection Time: 6.52 초
Detection Time: 7.84 초
Detection Time: 6.89 초
Detection Time: 7.85 초
Detection Time: 7.83 초
Detection Time: 6.59 초
Detection Time: 7.85 초
Detection Time: 6.75 초
Detection Time: 7.89 초
Detection Time: 6.61 초
Detection Time: 7.92 초
Detection Time: 7.57 초
Detection Time: 6.9 초
Detection Time: 7.85 초
Detection Time: 6.69 초
Detection Time: 7.84 초
Detection Time: 6.63 초
Detection Time: 7.99 초
Detection Time: 6.78 초
Detection Time: 7.51 초
Detection Time: 7.97 초
Detection Time: 6.61 초
Detection Time: 7.9 초
Detection Time: 6.69 초
Detection Time: 7.86 초
Detection Time: 6.61 초
Detection Time: 7.87 초
Detection Time: 7.5 초
Detection Time: 6.93 초
Detection Time: 7.93 초
Detection Time: 6.68 초
Detection Time: 7.86 초
Detection Time: 6.68 초
Detection Time: 7.92 초
Detection Time: 7.25 초
Detection Time: 7.14 초
Detection Time: 7.88 초
Detection Time: 6.64 초
Detection Time

# VII. Define Object Detection Function

> ## 1) get_detected_img( ) 정의

In [13]:
import time
def get_detected_img(cv_net, img_array, score_threshold, use_copied_array = True, is_print = True):
  rows = img_array.shape[0]
  cols = img_array.shape[1]
  draw_img = None

  if use_copied_array:
    draw_img = img_array.copy()
  else:
    draw_img = img_array
    cv_net.setInput(cv2.dnn.blobFromImage(img_array, swapRB = True))
    start = time.time()
    cv_out = cv_net.forward()
    green_color = (0, 255, 0)
    red_color = (0, 0, 255)

    for detection in cv_out[0, 0, :, :]:
      score = float(detection[2])
      class_id = int(detection[1])

      # score_threshold 이상만 추출
      if score > score_threshold:
        left = detection[3] * cols
        top = detection[4] * rows
        right = detection[5] * cols
        bottom = detection[6] * rows
        caption = "{}: {:.4f}".format(labels_to_names_0[class_id], score)
        print(caption)
        cv2.rectangle(draw_img, (int(left), int(top)), (int(right), int(bottom)), color = green_color, thickness = 2)
        cv2.putText(draw_img, caption, (int(left), int(top - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, red_color, 1)
        
    if is_print:
      print('Detection Time:',round(time.time() - start, 2), '초')
  return draw_img

> ## 2) do_deteced_video( ) 정의

In [14]:
def do_detected_video(cv_net, input_path, output_path, score_threshold, is_print):
  cap = cv2.VideoCapture(input_path)
  codec = cv2.VideoWriter_fourcc(*'XVID')
  vid_size = (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
  vid_fps = cap.get(cv2.CAP_PROP_FPS)
  vid_writer = cv2.VideoWriter(output_path, codec, vid_fps, vid_size)
  frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  print('Frame Count:', frame_cnt)
  green_color=(0, 255, 0)
  red_color=(0, 0, 255)
  while True:
    hasFrame, img_frame = cap.read()
    if not hasFrame:
      print('No more Frames')
      break
    img_frame = get_detected_img(cv_net, img_frame, score_threshold = score_threshold, use_copied_array = False, is_print = is_print)
    vid_writer.write(img_frame)

  # end of while loop
  vid_writer.release()
  cap.release()

> ## 3) korea2.mp4 Download

In [15]:
!wget https://raw.githubusercontent.com/rusita-ai/pyData/master/image/korea2.mp4

--2023-04-17 04:34:58--  https://raw.githubusercontent.com/rusita-ai/pyData/master/image/korea2.mp4
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1447138 (1.4M) [application/octet-stream]
Saving to: ‘korea2.mp4’


2023-04-17 04:34:58 (79.4 MB/s) - ‘korea2.mp4’ saved [1447138/1447138]



* korea2.mp4 확인

In [16]:
import io
import base64
from IPython.display import HTML

video = io.open('/content/korea2.mp4', 'r+b').read()
encoded = base64.b64encode(video)

HTML(data='''<video width = "70%" controls>
  <source src = "data:video/mp4;base64,{0}" type = "video/mp4"/>
  </video>'''.format(encoded.decode('ascii')))

> ## 4) do_deteced_video( ) 실행

* 약 33분

In [17]:
%%time
do_detected_video(cv_net, '/content/korea2.mp4', '/content/korea2_rcnn.mp4', 0.3, False)

Frame Count: 251
person: 0.8919
backpack: 0.7089
person: 0.9235
backpack: 0.5941
person: 0.9297
backpack: 0.6627
backpack: 0.3945
person: 0.9039
backpack: 0.7465
backpack: 0.3817
person: 0.9426
backpack: 0.5906
backpack: 0.3256
person: 0.9157
backpack: 0.4152
person: 0.9155
backpack: 0.4344
person: 0.9353
backpack: 0.4038
person: 0.9500
person: 0.9259
backpack: 0.3012
person: 0.8858
person: 0.9180
person: 0.9040
person: 0.9653
person: 0.9690
person: 0.8520
person: 0.4351
person: 0.6908
person: 0.6078
person: 0.5596
car: 0.9960
car: 0.9851
car: 0.8598
person: 0.7734
person: 0.7609
person: 0.6983
person: 0.5518
person: 0.5120
person: 0.4585
person: 0.4038
person: 0.3625
person: 0.3454
person: 0.3250
car: 0.9976
car: 0.9840
car: 0.8092
person: 0.6140
person: 0.5295
person: 0.5170
person: 0.5027
person: 0.4947
person: 0.4889
person: 0.4738
person: 0.4284
person: 0.3892
person: 0.3745
person: 0.3655
person: 0.3012
car: 0.9991
car: 0.9567
car: 0.6855
person: 0.5087
car: 0.5040
person: 0.4972