In [1]:
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, PReLU, Flatten, Softmax
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import tensorflow.compat.v1 as tf
import numpy as np
import cv2
import random

In [2]:
X = Input(shape = (12, 12, 3), name='PNet_Input')
L = Conv2D(10, kernel_size=(3, 3), strides=(1, 1), padding='valid', name='PNet_CONV1')(X)
L = PReLU(shared_axes=[1, 2], name='PNet_PRELU1')(L)
L = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='PNet_MAXPOOL1')(L)

L = Conv2D(16, kernel_size=(3, 3), strides=(1, 1), padding='valid', name='PNet_CONV2')(L)
L = PReLU(shared_axes=[1, 2], name='PNet_PRELU2')(L)

L = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='valid', name='PNet_CONV3')(L)
L = PReLU(shared_axes=[1, 2], name='PNet_PRELU3')(L)

classifier = Conv2D(1, kernel_size= (1, 1), strides= (1, 1), activation='sigmoid', name = 'FACE_CLASSIFIER')(L)
regressor = Conv2D(4, kernel_size=(1, 1), strides=(1, 1), name = 'BB_REGRESSION')(L)

p_net = Model(X, [classifier, regressor], name = 'PNet')

In [3]:
# define the loss
losses = {
    "FACE_CLASSIFIER": "binary_crossentropy",
    "BB_REGRESSION": "mse",
}
lossWeights = {"FACE_CLASSIFIER": 1.0, "BB_REGRESSION": 0.5}

In [4]:
p_net.compile(
    loss=losses,
    loss_weights = lossWeights,
    optimizer=tf.keras.optimizers.Adam(), 
    metrics=['accuracy']
)

In [5]:
p_net.summary()

Model: "PNet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
PNet_Input (InputLayer)         [(None, 12, 12, 3)]  0                                            
__________________________________________________________________________________________________
PNet_CONV1 (Conv2D)             (None, 10, 10, 10)   280         PNet_Input[0][0]                 
__________________________________________________________________________________________________
PNet_PRELU1 (PReLU)             (None, 10, 10, 10)   10          PNet_CONV1[0][0]                 
__________________________________________________________________________________________________
PNet_MAXPOOL1 (MaxPooling2D)    (None, 5, 5, 10)     0           PNet_PRELU1[0][0]                
_______________________________________________________________________________________________

In [6]:
EPOCHS = 50
INIT_LR = 1e-3
BS = 32
IMAGE_DIMS = (12, 12, 3)

In [7]:
IMAGE_SIZE = 12
image_path = os.path.join('data', 'raw_%s' % IMAGE_SIZE)
image_path_pos = os.path.join(image_path, 'pos')
image_path_neg = os.path.join(image_path, 'neg')
image_path_par = os.path.join(image_path, 'part')
anno_path_pos = os.path.join(image_path_pos, '%s.txt' % IMAGE_SIZE)
anno_path_neg = os.path.join(image_path_neg, '%s.txt' % IMAGE_SIZE)
anno_path_par = os.path.join(image_path_par, '%s.txt' % IMAGE_SIZE)

In [8]:
pos = open(anno_path_pos, 'r')
images_pos = pos.readlines()
neg = open(anno_path_neg, 'r')
images_neg = neg.readlines()
par = open(anno_path_par, 'r')
images_par = par.readlines()

images = images_pos + images_neg + images_par

In [9]:
random.seed(42)
random.shuffle(images)

In [10]:
img_ = []
cls_ = []
bbx_ = []

In [11]:
images[:10]

['./data/raw_12/part/2059.jpg -1 0.15 -0.06 -0.06 -0.18\n',
 './data/raw_12/part/1904.jpg -1 0.33 0.16 -0.09 0.06\n',
 './data/raw_12/neg/1845.jpg 0\n',
 './data/raw_12/neg/871.jpg 0\n',
 './data/raw_12/part/341.jpg -1 0.16 -0.16 -0.32 -0.13\n',
 './data/raw_12/part/1742.jpg -1 -0.01 -0.19 -0.24 -0.13\n',
 './data/raw_12/neg/2042.jpg 0\n',
 './data/raw_12/pos/30.jpg 1 0.11 -0.26 0.09 -0.03\n',
 './data/raw_12/neg/1086.jpg 0\n',
 './data/raw_12/part/1048.jpg -1 0.32 0.00 -0.12 -0.20\n']

In [12]:
for image in images[:10]:
    # remove new line character
    image = image[:-1]
    image_path = image.split(' ')[0]
    # load and scale image
    img = load_img(image_path, target_size=(IMAGE_SIZE, IMAGE_SIZE))
    # scale image to [0,1]
    img = img_to_array(img) / 255
    # append to images
    img_.append(img)

    image_cat = int(image.split(' ')[1])
    cls_.append(image_cat)
    if image_cat is not 0:
        [x1, y1, x2, y2] = image.split(' ')[2:]
        bbx_.append((float(x1), float(y1), float(x2), float(y2)))
    else:
        bbx_.append((0.0,0.0,0.0,0.0))

In [13]:
# convert data to numpy arrays
data = np.array(img_, dtype='float32')
bbx = np.array(bbx_, dtype='float32')
cat = np.array(cls_, dtype='float32')

In [14]:
bbx = bbx.reshape(bbx.shape[0], 1, 1, -1)
cat = cat.reshape(cat.shape[0], 1, 1, -1)

In [16]:
H = p_net.fit(
    x=data,
    y={
        'FACE_CLASSIFIER' : cat,
        'BB_REGRESSION' : bbx
    },
    epochs=EPOCHS)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
