In [10]:
import torch
from net import Net
import cv2
from image_pyramid import image_pyramid
from sliding_window import sliding_window
import torchvision.transforms as transforms
import warnings
import torchvision.ops as ops
import imutils
import torch.nn.functional as F

# Suppress warnings
warnings.filterwarnings("ignore")

In [11]:
# Load the trained model
PATH = './saved_model.pth'
net = Net()
net.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [12]:
# initialize variables used for the object detection procedure
MAX_WIDTH = 1600
PYR_SCALE = 1.5
WIN_STEP = 16
ROI_SIZE = (128, 128)
INPUT_SIZE = (36, 36)

In [13]:
FILE_NAME = 'detection_test2.jpg'
FILE = './face_detection_images/' + FILE_NAME
original_image = cv2.imread(FILE, cv2.IMREAD_GRAYSCALE)
if original_image.shape[1] > MAX_WIDTH:
	original_image = imutils.resize(original_image, width=MAX_WIDTH)
(H, W) = original_image.shape[:2]

In [14]:
transform = transforms.Compose(
    [transforms.ToPILImage(),
	 transforms.ToTensor(),
	 transforms.Resize(INPUT_SIZE),
     ])

In [15]:
pyramid = image_pyramid(original_image, scale=PYR_SCALE, min_size=ROI_SIZE)
rois = [] # Regions of interest
locs = [] # Locations where the regions of interest were found

In [16]:
# loop over the image pyramid
for image in pyramid:
	# determine the scale factor between the *original* image
	# dimensions and the *current* layer of the pyramid
	scale = W / float(image.shape[1])
	# for each layer of the image pyramid, loop over the sliding
	# window locations
	for (x, y, roiOrig) in sliding_window(image, WIN_STEP, ROI_SIZE):
		# scale the (x, y)-coordinates of the ROI with respect to the
		# *original* image dimensions
		x = int(x * scale)
		y = int(y * scale)
		w = int(ROI_SIZE[0] * scale)
		h = int(ROI_SIZE[1] * scale)

		#roi = cv2.resize(roiOrig, INPUT_SIZE)
		roi_grayscaled = transform(roiOrig)
		# update our list of ROIs and associated coordinates
		rois.append(roi_grayscaled)
		locs.append((x, y, x + w, y + h))

# Turn the list of tensors into a tensor to feed into the network
rois_stacked = torch.stack(rois, dim=0)

In [17]:
# show how long it took to loop over the image pyramid layers and
# sliding window locations
output = net(rois_stacked)
# decode the predictions and initialize a dictionary which maps class
# labels (keys) to any ROIs associated with that label (values)
probs = F.softmax(output, dim=1)
probs_list = probs.tolist()
labels = {'confidence': [], 'boxes': []}

for i in range(0, len(probs_list)):
	if probs_list[i][1] >= 0.95:
		box = locs[i]
		labels['confidence'].append(probs_list[i][1])
		labels['boxes'].append(box)
tensor_boxes = (torch.tensor(labels['boxes'])).float()
tensor_probs = torch.Tensor(labels['confidence'])

valid_box = ops.nms(tensor_boxes, tensor_probs, iou_threshold=0.1)

In [18]:
original_image = cv2.imread(FILE)
if original_image.shape[1] > MAX_WIDTH:
	original_image = imutils.resize(original_image, width=MAX_WIDTH)
# loop over the original bounding boxes
for i in valid_box:
	# scale the bounding box coordinates based on the respective
	# ratios
	box = labels['boxes'][i]
	(startX, startY, endX, endY) = box
	# draw the bounding box on the image
	cv2.rectangle(original_image, (startX, startY), (endX, endY), (0, 255, 0), 2)

# show the output image
cv2.imshow("Output", original_image)
cv2.waitKey(0)
cv2.destroyAllWindows()