In [None]:
import os
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
%matplotlib inline

import cv2
import tensorflow as tf
from tf2_yolov4.anchors import YOLOV4_ANCHORS
from tf2_yolov4.model import YOLOv4

from app.tools import draw_boxes

### Configure camera resolution

In [None]:
# Possible image shapes for NOVIGA webcam
WIDTH, HEIGHT = [(640, 360), (640, 480), (800, 600), (1280, 720), (1280, 960)][1]
WIDTH, HEIGHT

### Configure CNN input shape

In [None]:
#WIDTH_CNN, HEIGHT_CNN =  32 * 16, 32 * 12 # (Good enough+)
WIDTH_CNN, HEIGHT_CNN =  32 * 8, 32 * 6 # (Good enough)
#WIDTH_CNN, HEIGHT_CNN =  WIDTH, HEIGHT
WIDTH_CNN, HEIGHT_CNN

### Load YOLOv4 model

In [None]:
max_objects = 20

model = YOLOv4(
    input_shape=(HEIGHT_CNN, WIDTH_CNN, 3),
    anchors=YOLOV4_ANCHORS,
    num_classes=80,
    training=False,
    yolo_max_boxes=max_objects,
    yolo_iou_threshold=0.5,
    yolo_score_threshold=0.6,
)

model.load_weights("../binaries/yolov4.h5")
model.summary()

In [None]:
# COCO classes. Ref https://gist.github.com/AruniRC/7b3dadd004da04c80198557db5da4bda
CLASSES = [
    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
    'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    'hair drier', 'toothbrush'
]

### Detect objects from an image file (jpg, png)

In [None]:
# rad raw image
image_bgr_raw = cv2.imread('../images/dip3.png')
# resize image to expected CNN input
image_resized = tf.image.resize(image_bgr_raw, (HEIGHT_CNN, WIDTH_CNN))
# Add extra dimention and normalize pixel values (/255)
images_array = tf.expand_dims(image_resized, axis=0) / 255.0

In [None]:
# detect objects
boxes, scores, classes, valid_detections = model.predict(images_array)

In [None]:
# create texts fields containing label and scores for every object detected
texts = ['{} {:.2%}'.format(CLASSES[classes[0].astype(int)[_]], scores.round(3)[0][_]) for _ in range(classes.shape[1])]

In [None]:
# Add boxes and text based on model output
new_img = draw_boxes(
    img=image_bgr_raw,
    rec_coordinates=boxes[0],
    texts=texts,
    colors=None, relative_coordinates=True,
    rec_thickness=3)

In [None]:
scores

In [None]:
# plot raw image
image_rgb_raw = cv2.cvtColor(image_bgr_raw, cv2.COLOR_BGR2RGB)
plt.imshow(image_rgb_raw)

In [None]:
# plot image with detected objects
new_img_rgb = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB)
plt.imshow(new_img_rgb)

In [None]:
# Use OpenCV to show both images
cv2.imshow('Raw image', image_bgr_raw)
cv2.imshow('Detected objects in image', new_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Detect from streaming video

In [None]:
# Configure camera
cap = cv2.VideoCapture(0)
cap.set(3, WIDTH)
cap.set(4, HEIGHT)

while(True):
    # Capture frame-by-frame
    ret, bgr_frame_i = cap.read()
    
    # convert BGR to RGB
    rgb_frame_i = cv2.cvtColor(bgr_frame_i, cv2.COLOR_BGR2RGB)
    
    # Resize image to match YOLOv4 input
    image_resized = tf.image.resize(bgr_frame_i, (HEIGHT_CNN, WIDTH_CNN))
    
    # Expand array dimension by 1 axis and nomarlize the array (/255)
    frame_i = tf.expand_dims(tf.cast(image_resized, tf.float32), axis=0) / 255.0

    # model predictions
    boxes_i, scores_i, classes_i, valid_detections_i = model.predict(frame_i)
    
    # create text fields containing label and score
    texts_i = ['{} {:.2%}'.format(CLASSES[classes_i[0].astype(int)[_]], scores_i.round(3)[0][_]) for _ in range(classes_i.shape[1])]
    
    # Display the resulting frame
    new_img = draw_boxes(
        img=bgr_frame_i,
        rec_coordinates=boxes_i[0],
        texts=texts_i,
        relative_coordinates=True)

    cv2.imshow('Raw Image', bgr_frame_i)
    cv2.imshow('New Image', new_img)

    #Waits for a user input to quit the application
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


print('We reached the end...')

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()