In [172]:
# import the necessary packages
import torch
import torchvision.transforms as transforms
import torchvision.ops as ops
from net import Net
import cv2
import time
import imutils

import warnings
#suppress warning
warnings.filterwarnings('ignore')

In [173]:
face_detection_net = Net()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
face_detection_net.load_state_dict(torch.load("./saved_model.pth",map_location=device))

<All keys matched successfully>

In [174]:
MAX_WIDTH = 1500
def resize(image):
    if image.shape[1] > MAX_WIDTH:
        # resize the image to a fixed width, while ensuring the aspect
        # ratio is maintained
        resized = imutils.resize(image, width=MAX_WIDTH)
        return resized

In [175]:
image_path = "./face_detection_images/detection_test2.jpg"
original_image = cv2.imread(image_path)
original_image = resize(original_image)

In [176]:
# initialize OpenCV's selective search implementation and set the
# input image
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(original_image)

In [177]:
print("[INFO] using *quality* selective search")
#ss.switchToSelectiveSearchFast()
ss.switchToSelectiveSearchQuality()

[INFO] using *quality* selective search


In [178]:
start = time.time()
rects = ss.process()
end = time.time()
print("[INFO] selective search took {:.4f} seconds".format(end - start))
print(len(rects))

[INFO] selective search took 93.5325 seconds
35293


In [179]:
transform = transforms.Compose([ 
    transforms.ToPILImage(),
    transforms.Grayscale(),
    transforms.ToTensor(),    
    transforms.Resize((36,36))
])

In [180]:
(H, W) = original_image.shape[:2]

rois = []
locs = []

for (x, y, w, h) in rects:
	# if the width or height of the region is less than 5% of the
	# image width or height, ignore it (i.e., filter out small
	# objects that are likely false-positives)
	if w / float(W) < 0.05 or h / float(H) < 0.05:
		continue
	# extract the region from the input image, convert it from BGR to
	# RGB channel ordering
	roi = original_image[y:y + h, x:x + w]
	roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)

	# further preprocess by the ROI

	roi_tensor_gray = transform(roi)
	
	# update our list of ROIs and associated coordinates
	rois.append(roi_tensor_gray)
	locs.append((x, y, x + w, y + h))

In [181]:
### DEBUGGING
# print(len(rois))
# print(len(locs))

The list of region proposals is "stacked" into a tensor in order to be fed into the CNN.

In [182]:
stacked_tensor = torch.stack(rois, dim=0)

print(stacked_tensor.size())

output = face_detection_net(stacked_tensor)

torch.Size([14173, 1, 36, 36])


In [183]:
probs = torch.nn.functional.softmax(output, dim=1)
probs_list = probs.tolist()

labels = {'valid_probs': [],
          'boxes': []}

In [184]:
for i in range(0,len(probs_list)):
    if (probs_list[i][1] >= 0.95):
        box = locs[i]

        labels['valid_probs'].append(probs_list[i][1])
        labels['boxes'].append(box)

tensor_boxes = torch.Tensor(labels['boxes'])
tensor_probs = torch.Tensor(labels['valid_probs'])

valid_box = ops.nms(tensor_boxes, tensor_probs, iou_threshold=0.1)
        

In [185]:
print(labels['boxes'])

print(valid_box)

[(505, 570, 668, 691), (814, 419, 1007, 634), (827, 420, 1037, 640), (825, 420, 956, 581), (875, 416, 1064, 699), (812, 420, 1017, 679), (109, 434, 224, 644), (220, 421, 321, 533), (847, 425, 1051, 532), (847, 413, 1051, 532), (202, 411, 321, 532), (745, 310, 1046, 604), (812, 405, 1047, 634), (59, 499, 209, 610), (847, 424, 1051, 532), (218, 411, 323, 536), (812, 245, 1082, 639), (56, 494, 209, 610), (816, 420, 1051, 572), (58, 495, 209, 605), (815, 413, 1051, 639), (816, 301, 1082, 639), (825, 416, 1064, 699), (862, 522, 1008, 639), (504, 570, 668, 691), (756, 405, 1047, 634), (220, 421, 323, 533), (503, 570, 667, 687), (697, 1171, 921, 1239), (814, 419, 1040, 634), (1241, 0, 1413, 799), (473, 522, 596, 587), (816, 420, 1036, 639), (202, 408, 327, 511), (475, 522, 594, 595), (214, 411, 323, 536), (736, 1088, 1008, 1189), (594, 1059, 1074, 1280), (814, 419, 1007, 639), (594, 1059, 1079, 1220), (783, 1080, 1011, 1189), (597, 984, 1032, 1275), (501, 565, 667, 681), (597, 984, 1032, 1269

In [186]:
 
# Color in BGR
color = (0, 255, 0)
thickness = 2

img_color = cv2.imread(image_path)
img_color = resize(img_color)
  
# Using cv2.rectangle() method 
# Draw a rectangle with blue line borders of thickness of 1 px 
for index in valid_box:
    box = labels['boxes'][index]
    (x,y,z,t) = box
    cv2.rectangle(img_color, (x,y), (z,t), color, thickness)


cv2.imshow('image', img_color)

# add wait key. window waits until user presses a key
cv2.waitKey(0)
# and finally destroy/close all open windows
cv2.destroyAllWindows()

In [187]:
### DEBUGGING
# # loop over the region proposals in chunks (so we can better
# # visualize them)
# for i in range(0, len(rects), 100):
# 	# clone the original image, so we can draw on it
# 	output = image.copy()
# 	# loop over the current subset of region proposals
# 	for (x, y, w, h) in rects[i:i + 100]:
# 		# draw the region proposal bounding box on the image
# 		color = [random.randint(0, 255) for j in range(0, 3)]
# 		cv2.rectangle(output, (x, y), (x + w, y + h), color, 2)
# 	# show the output image
# 	cv2.imshow("Output", output)
# 	key = cv2.waitKey(0) & 0xFF
# 	# if the `q` key was pressed, break from the loop
# 	if key == ord("q"):
# 		break