# 4. Face Detection using Sliding Window

In [91]:
import torch
import torchvision.transforms as transforms
import torchvision.ops as ops
from net import Net
import cv2
import warnings
from sliding_window import sliding_window
from image_pyramid import image_pyramid
import torch.nn.functional as F
import imutils

#suppress warning
warnings.filterwarnings('ignore')

In [92]:
face_detection_net = Net()
face_detection_net.load_state_dict(torch.load("./saved_model.pth"))

<All keys matched successfully>

In [93]:
# initialize variables used for the object detection procedure
MAX_WIDTH = 1200
PYR_SCALE = 1.5
WINDOW_STEP = 16
ROI_SIZE = (128,128)
INPUT_SIZE = (36,36)

In [94]:
transform = transforms.Compose([ 
    transforms.ToPILImage(),
    transforms.ToTensor(),    
    transforms.Resize(INPUT_SIZE)
])

In [95]:
rois = []
locs = []

#image_path = "./image_face_detection/0000bee39176697a.jpg"
image_path = './face_detection_images/detection_test2.jpg'
original_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

if original_image.shape[1] > MAX_WIDTH:
	original_image = imutils.resize(original_image, width=MAX_WIDTH)

(H, W) = original_image.shape[:2]

pyramid = image_pyramid(original_image, scale=PYR_SCALE, min_size=ROI_SIZE)

for image in pyramid:
    # determine the scale factor between the *original* image
    # dimensions and the *current* layer of the pyramid
    scale = W / float(image.shape[1])
    
    # for each layer of the image pyramid, loop over the sliding
    # window locations
    for (x, y, roiOrig) in sliding_window(image, WINDOW_STEP, ROI_SIZE):
        # scale the (x, y)-coordinates of the ROI with respect to the
        # *original* image dimensions
        x = int(x * scale)
        y = int(y * scale)
        w = int(ROI_SIZE[0] * scale)
        h = int(ROI_SIZE[1] * scale)
        # take the ROI and preprocess it so we can later classify the region 

        roi_tensor_gray = transform(roiOrig)

        # update our list of ROIs and associated coordinates
        rois.append(roi_tensor_gray)
        locs.append((x, y, x + w, y + h))


In [None]:
stacked_tensor = torch.stack(rois, dim=0)
output = face_detection_net(stacked_tensor)

In [97]:
probs = F.softmax(output, dim=1)
probs_list = probs.tolist()

labels = {'valid_probs': [],
          'boxes': []}

In [None]:
for i in range(0,len(probs_list)):
    if probs_list[i][1] >= 0.95:
        box = locs[i]

        labels['valid_probs'].append(probs_list[i][1])
        labels['boxes'].append(box)

tensor_boxes = torch.Tensor(labels['boxes'])
tensor_probs = torch.Tensor(labels['valid_probs'])

valid_box = ops.nms(tensor_boxes, tensor_probs, iou_threshold=0.1)

In [99]:
print(labels['boxes'][1])

print(valid_box)

(560, 192, 688, 320)
tensor([ 8, 20, 42, 25, 40, 45])


In [None]:
 # Blue color in BGR 
color = (255, 0, 0) 
  
# Line thickness of 2 px
thickness = 2

img_color = cv2.imread(image_path)

if img_color.shape[1] > MAX_WIDTH:
	img_color = imutils.resize(img_color, width=MAX_WIDTH)
  
# Using cv2.rectangle() method 
# Draw a rectangle with blue line borders of thickness of 1 px 
for index in valid_box:
    box = labels['boxes'][index]
    (x,y,z,t) = box
    cv2.rectangle(img_color, (x,y), (z,t), color, thickness)

cv2.imshow('image', img_color)

# add wait key. window waits until user presses a key
cv2.waitKey(0)
# and finally destroy/close all open windows
cv2.destroyAllWindows()