# 范例3. 使用CNN进行物件侦测
### 修改自 [Turning any CNN image classifier into an object detector with Keras, TensorFlow, and OpenCV - PyImageSearch](https://www.pyimagesearch.com/2020/06/22/turning-any-cnn-image-classifier-into-an-object-detector-with-keras-tensorflow-and-opencv/)

In [1]:
# 载入套件，需额外安装 imutils 套件
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications import imagenet_utils
from imutils.object_detection import non_max_suppression
import numpy as np
import imutils
import time
import cv2

In [2]:
# 参数设定
image_path = './images_Object_Detection/bike.jpg'  # 要辨识的图档
WIDTH = 600              # 图像缩放为 (600, 600)
PYR_SCALE = 1.5          # 影像金字塔缩放比例
WIN_STEP = 16            # 视窗滑动步数
ROI_SIZE = (250, 250)    # 视窗大小
INPUT_SIZE = (224, 224)  # CNN的输入尺寸

In [3]:
# 载入 ResNet50 模型
model = ResNet50(weights="imagenet", include_top=True)

In [4]:
# 读取要辨识的图片
orig = cv2.imread(image_path)
orig = imutils.resize(orig, width=WIDTH)
(H, W) = orig.shape[:2]

In [5]:
# 定义滑动视窗与影像金字塔函数

# 滑动视窗        
def sliding_window(image, step, ws):
    for y in range(0, image.shape[0] - ws[1], step):     # 向下滑动 stepSize 格
        for x in range(0, image.shape[1] - ws[0], step): # 向右滑动 stepSize 格
            # 传回裁剪后的视窗
            yield (x, y, image[y:y + ws[1], x:x + ws[0]])

# 影像金字塔操作
# image：原图，scale：每次缩小倍数，minSize：最小尺寸
def image_pyramid(image, scale=1.5, minSize=(224, 224)):
    # 第一次传回原图
    yield image

    # keep looping over the image pyramid
    while True:
        # 计算缩小后的尺寸
        w = int(image.shape[1] / scale)
        image = imutils.resize(image, width=w)

        # 直到最小尺寸为止
        if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
            break

        # 传回缩小后的图像
        yield image

In [6]:
# 输出候选框
rois = []    # 候选框
locs = []    # 位置
SHOW_BOX = False  # 是否显示要找的框

# 产生影像金字塔
pyramid = image_pyramid(orig, scale=PYR_SCALE, minSize=ROI_SIZE)
# 逐一视窗辨识
for image in pyramid:
    # 框与原图的比例
    scale = W / float(image.shape[1])

    # 滑动视窗
    for (x, y, roiOrig) in sliding_window(image, WIN_STEP, ROI_SIZE):
        # 取得候选框
        x = int(x * scale)
        y = int(y * scale)
        w = int(ROI_SIZE[0] * scale)
        h = int(ROI_SIZE[1] * scale)

        # 缩放图形以符合模型输入规格 
        roi = cv2.resize(roiOrig, INPUT_SIZE)
        roi = img_to_array(roi)
        roi = preprocess_input(roi)

        # 加入输出变数中
        rois.append(roi)
        locs.append((x, y, x + w, y + h))

        # 是否显示要找的框
        if SHOW_BOX:
            clone = orig.copy()
            cv2.rectangle(clone, (x, y), (x + w, y + h),
                (0, 255, 0), 2)

            # 显示正在找的框
            cv2.imshow("Visualization", clone)
            cv2.imshow("ROI", roiOrig)
            cv2.waitKey(0)
            
cv2.destroyAllWindows()

In [7]:
# 预测
MIN_CONFIDENCE = 0.9  # 辨识机率门槛值

rois = np.array(rois, dtype="float32")
preds = model.predict(rois)
preds = imagenet_utils.decode_predictions(preds, top=1)
labels = {}

# 检查预测结果，辨识机率须大于设定值
for (i, p) in enumerate(preds):
    # grab the prediction information for the current ROI
    (imagenetID, label, prob) = p[0]

    # 机率大于设定值，则放入候选名单
    if prob >= MIN_CONFIDENCE:
        # 放入候选名单
        box = locs[i]
        L = labels.get(label, [])
        L.append((box, prob))
        labels[label] = L

# 扫描每一个类别
for label in labels.keys():
    # 复制原图
    clone = orig.copy()

    # 画框
    for (box, prob) in labels[label]:
        (startX, startY, endX, endY) = box
        cv2.rectangle(clone, (startX, startY), (endX, endY),
            (0, 255, 0), 2)

    # 显示 NMS(non-maxima suppression) 前的框
    cv2.imshow("Before NMS", clone)
    clone = orig.copy()

    # NMS
    boxes = np.array([p[0] for p in labels[label]])
    proba = np.array([p[1] for p in labels[label]])
    boxes = non_max_suppression(boxes, proba)

    for (startX, startY, endX, endY) in boxes:
        # 画框及类别
        cv2.rectangle(clone, (startX, startY), (endX, endY), (0, 255, 0), 2)
        y = startY - 10 if startY - 10 > 10 else startY + 10
        cv2.putText(clone, label, (startX, y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)

    # 显示
    cv2.imshow("After NMS", clone)
    cv2.waitKey(0)
            
cv2.destroyAllWindows()    # 关闭所有视窗