In [22]:
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.ops as ops
from net import Net
import cv2
import warnings

#suppress warning
warnings.filterwarnings('ignore')


In [106]:
face_detection_net = Net()
face_detection_net.load_state_dict(torch.load("./saved_model.pth"))

<All keys matched successfully>

In [107]:
# initialize variables used for the object detection procedure
PYR_SCALE = 1.5
WINDOW_STEP = 16
ROI_SIZE = (128,128)
INPUT_SIZE = (36,36)

In [26]:
def resize_image(image, max_size):
    height, width = image.shape[:2]
    if height > width:
        new_width = width
        new_height = int(height*max_size/width)
    else:
        new_height = height
        new_width = int(width*max_size/height)

    image = cv2.resize(image, (new_width, new_height))
    return image

In [27]:
def sliding_window(image, step, ws):
	# slide a window across the image
	for y in range(0, image.shape[0] - ws[1], step):
		for x in range(0, image.shape[1] - ws[0], step):
			# yield the current window
			yield (x, y, image[y:y + ws[1], x:x + ws[0]])

In [75]:
def image_pyramid(image, scale=1.5, minSize=ROI_SIZE):
	# yield the original image
	yield image
	# keep looping over the image pyramid
	while True:
		# compute the dimensions of the next image in the pyramid
		new_width = int(image.shape[1] / scale)
		new_height = int(image.shape[0] / scale)
		image = cv2.resize(image, (new_width, new_height))
		# if the resized image does not meet the supplied minimum
		# size, then stop constructing the pyramid
		if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
			break
		# yield the next image in the pyramid
		yield image

In [99]:
transform = transforms.Compose([ 
    transforms.ToPILImage(),
    transforms.ToTensor(),    
    transforms.Resize((36,36))
])


In [115]:
rois = []
locs = []

image_path = "./image_face_detection/0000bee39176697a.jpg"
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

#img = resize_image(img, 720)

(H, W) = img.shape[:2]

pyramid = image_pyramid(img, scale=PYR_SCALE, minSize=ROI_SIZE)

for image in pyramid:
    # determine the scale factor between the *original* image
    # dimensions and the *current* layer of the pyramid
    scale = W / float(image.shape[1])
    
    # for each layer of the image pyramid, loop over the sliding
    # window locations
    for (x, y, roiOrig) in sliding_window(image, WINDOW_STEP, ROI_SIZE):
        # scale the (x, y)-coordinates of the ROI with respect to the
        # *original* image dimensions
        x = int(x * scale)
        y = int(y * scale)
        w = int(ROI_SIZE[0] * scale)
        h = int(ROI_SIZE[1] * scale)
        # take the ROI and preprocess it so we can later classify the region 

        roi_tensor_gray = transform(roiOrig)

        # update our list of ROIs and associated coordinates
        rois.append(roi_tensor_gray)
        locs.append((x, y, x + w, y + h))


In [116]:
stacked_tensor = torch.stack(rois, dim=0)

print(stacked_tensor.size())

output = face_detection_net(stacked_tensor)

torch.Size([3277, 1, 36, 36])


In [117]:
probs = torch.nn.functional.softmax(output, dim=1)
probs_list = probs.tolist()

labels = {'valid_probs': [],
          'boxes': []}

In [131]:
for i in range(0,len(probs_list)):
    if (probs_list[i][1] >= 0.95):
        box = locs[i]

        labels['valid_probs'].append(probs_list[i][1])
        labels['boxes'].append(box)

tensor_boxes = torch.Tensor(labels['boxes'])
tensor_probs = torch.Tensor(labels['valid_probs'])

valid_box = ops.nms(tensor_boxes, tensor_probs, iou_threshold=0.1)
        

In [125]:
print(labels['boxes'][1])

print(valid_box)

(640, 112, 768, 240)
tensor([ 9,  5, 21, 17, 22, 25])


In [132]:
 
# Blue color in BGR 
color = (255, 0, 0) 
  
# Line thickness of 1 px 
thickness = 1

img_color = cv2.imread(image_path)
  
# Using cv2.rectangle() method 
# Draw a rectangle with blue line borders of thickness of 1 px 
for index in valid_box:
    box = labels['boxes'][index]
    (x,y,z,t) = box
    cv2.rectangle(img_color, (x,y), (z,t), color, thickness)


cv2.imshow('image', img_color)

# add wait key. window waits until user presses a key
cv2.waitKey(0)
# and finally destroy/close all open windows
cv2.destroyAllWindows()
    

In [15]:
x = torch.tensor([[3,4,5],[2,3,4]])
print(x.size(dim=1))

3
