In [None]:
# import the necessary packages
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications import imagenet_utils
from imutils.object_detection import non_max_suppression
from pyimagesearch.detection_helpers import sliding_window
from pyimagesearch.detection_helpers import image_pyramid
import numpy as np
import argparse
import imutils
import time
import cv2

In [30]:
def image_pyramid(image, scale=1.5, minSize=(224, 224)):
    yield image
    while True:
        w = int(image.shape[1] / scale)
        image = imutils.resize(image, width=w)
        if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
            break
        yield image

- image : input image
- scale : 축소할 비율 (resize)
- minSize : 일단 pre-training 모델인 (ResNet50)을 사용하기 때문에 ResNet50에 기본 size 224, 224이하가 되면 축소를 멈춤.

In [31]:
def sliding_window(image, step, ws):
# slide a window across the image
    for y in range(0, image.shape[0] - ws[1], step):
        for x in range(0, image.shape[1] - ws[0], step):
            # yield the current window
            yield (x, y, image[y:y + ws[1], x:x + ws[0]])

- image : image Pyramids를 통과해 추출된 이미지들 
- step : window(kernel)이 움직이는 sliding step
- ws : window(kernel) 크기
- Line 2, 3은 행과 열로 sliding하기 위한 코드
- 각 sliding별로 x, y 및 이미지의 커널 크기만큼의 array값들 추출

In [37]:
img = cv2.imread('cameraman.tif')
img.shape

(256, 256, 3)

In [38]:
WIDTH = 600
PYR_SCALE = 1.5
WIN_STEP = 16
ROI_SIZE = (56, 56)
INPUT_SIZE = (100, 100)
(H, W) = img.shape[:2]

In [1]:
rois = []
locs = []
start = time.time()
# image pyramid 적용
pyramid = image_pyramid(img, scale=PYR_SCALE, minSize=ROI_SIZE)  

# Generator로 하나씩 이미지를 불음
# 오리지널 이미지 W를 pyramid로 불러온 이미지를 나눠 scale 정의
# 나중에 object bounding box를 upscale 할 때 필요 
for image in pyramid:
    scale = W / float(image.shape[1])
    print(scale)
    for (x, y, roiOrig) in sliding_window(image, WIN_STEP, ROI_SIZE):
        # Generator로 하나씩 sliding할 때의 sliding window 결과값을 불러와
        # Scale 비율로 각 sliding window(x, y, w, h)를 증가 시킴
        x = int(x * scale)
        y = int(y * scale)
        w = int(ROI_SIZE[0] * scale)
        h = int(ROI_SIZE[1] * scale)   
        print(x, y, w, h)
        # sliding window 사이즈(scale비율로 증가)로 이미지를 분류하기 위해
        # ResNet50모델의 기본 사이즈인 224,224 비율로 resize해서 분류기에 넣겠다는 것임
        roi = cv2.resize(roiOrig, INPUT_SIZE)
        roi = img_to_array(roi)
        roi = preprocess_input(roi)
        # roi값과 각 좌표정보를 저장함.
        rois.append(roi)
        locs.append((x, y, x + w, y + h))
        
        # 오리지널 이미지 위에 bounding box sliding되고 scale되는 것을 
        # 보여주고 해당 sliding window이미지를 보여줌 
        clone = img.copy()
        cv2.rectangle(clone, (x, y), (x + w, y + h), (0, 255, 0), 2)
        # show the visualization and current ROI
        cv2.imshow("Visualization", clone)
        cv2.imshow("ROI", roiOrig)
        cv2.waitKey(0)
        
# image pyramid와 sliding window의 loop시간 측정을 위해
end = time.time()
print("[INFO] looping over pyramid/windows took {:.5f} seconds".format(
    end - start))

# 넘파이어레이로 변경
rois = np.array(rois, dtype="float32")
# 모델 예측
print("[INFO] classifying ROIs...")
start = time.time()
preds = model.predict(rois)
end = time.time()
print("[INFO] classifying ROIs took {:.5f} seconds".format(end - start))

# 
preds = imagenet_utils.decode_predictions(preds, top=1)
labels = {}

# loop over the predictions
for (i, p) in enumerate(preds):
    (imagenetID, label, prob) = p[0]
    if prob >= args["min_conf"]:
        box = locs[i]
        L = labels.get(label, [])
        L.append((box, prob))
        labels[label] = L
        
        
for label in labels.keys():
    print("[INFO] showing results for '{}'".format(label))
    clone = orig.copy()
    for (box, prob) in labels[label]:
        (startX, startY, endX, endY) = box
        cv2.rectangle(clone, (startX, startY), (endX, endY),
            (0, 255, 0), 2)
        
    cv2.imshow("Before", clone)
    clone = orig.copy()
    
    boxes = np.array([p[0] for p in labels[label]])
    proba = np.array([p[1] for p in labels[label]])
    boxes = non_max_suppression(boxes, proba)

    for (startX, startY, endX, endY) in boxes:
        cv2.rectangle(clone, (startX, startY), (endX, endY),
            (0, 255, 0), 2)
        y = startY - 10 if startY - 10 > 10 else startY + 10
        cv2.putText(clone, label, (startX, y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
    cv2.imshow("After", clone)
    cv2.waitKey(0)

NameError: name 'time' is not defined