In [1]:
import tensorflow 
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D,MaxPooling2D, LeakyReLU, PReLU , Reshape

In [2]:
def read_single_tfrecord(tfrecord_file, batch_size, net):
    # generate a input queue
    # each epoch shuffle
    filename_queue = tf.string_input_producer([tfrecord_file],shuffle=True)
    # read tfrecord
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    image_features = tf.parse_single_example(
        serialized_example,
        features={
            'image/encoded': tf.FixedLenFeature([], tf.string),#one image  one record
            'image/label': tf.FixedLenFeature([], tf.int64),
            'image/roi': tf.FixedLenFeature([4], tf.float32),
            'image/landmark': tf.FixedLenFeature([10],tf.float32)
        }
    )
    if net == 'PNet':
        image_size = 12
    elif net == 'RNet':
        image_size = 24
    else:
        image_size = 48
    image = tf.decode_raw(image_features['image/encoded'], tf.uint8)
    image = tf.reshape(image, [image_size, image_size, 3])
    image = (tf.cast(image, tf.float32)-127.5) / 128
    
    # image = tf.image.per_image_standardization(image)
    label = tf.cast(image_features['image/label'], tf.float32)
    roi = tf.cast(image_features['image/roi'],tf.float32)
    landmark = tf.cast(image_features['image/landmark'],tf.float32)
    image, label,roi,landmark = tf.train.batch(
        [image, label,roi,landmark],
        batch_size=batch_size,
        num_threads=2,
        capacity=1 * batch_size
    )
    label = tf.reshape(label, [batch_size])
    roi = tf.reshape(roi,[batch_size,4])
    landmark = tf.reshape(landmark,[batch_size,10])
    return image, label, roi,landmark

In [3]:
#Class PNet():
    #P-Net


    # Data Preprocessing
    # before passing your data into a neural network you normalize the data by Scaling it

    # scaling the data 
    #X = X/255.0

    #you can also use tf.keras.utils.normalize(X) 
#     x = layers.Dense(64, activation='relu')(inputs)
#     x = layers.Dense(64, activation='relu')(x)
#     predictions = layers.Dense(10, activation='softmax')(x)
    # Convoluted neural network

def p_net():

    model = Sequential() #there are 2 types of models but this is the most common
    model.add(Conv2D(10,(3, 3), strides=1, name='conv1',padding='SAME', input_shape =(12, 12, 3) ) )
    model.add(PReLU(name='prelu1'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2,2)))

    model.add(Conv2D(16,(3, 3), strides=1,name='conv2', input_shape =(5, 5, 10)))
    model.add(PReLU(name='prelu2'))

    model.add(Conv2D(32,(3,3),strides=1,name='conv3'))
    model.add(PReLU(name='prelu3'))

    model.add(Conv2D(2, (1, 1), activation='softmax',name='classifier1'))
    model.add(PReLU(name='prelu4'))

    model.add(Dense(4,name='bbox_regression'))

    model.add(Dense(10,name='landmark'))

    #model = tf.keras.Model(inputs=, outputs=)
    model.compile(optimizer=tf.optimizers.Adam(0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

    #my_adam = adam(lr = 0.00001)




#R-Net

def r_net():
    model = Sequential() #there are 2 types of models but this is the most common
    model.add(Conv2D(28, (3, 3), strides=1, name='conv1', input_shape =(24, 24, 3)))
    model.add(PReLU(name='prelu1'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2,2)))

    model.add(Conv2D(48, (3, 3), strides=1, name='conv2'))
    model.add(PReLU(name='prelu2'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2,2)))

    model.add(Conv2D(64, (2, 2), strides=1, name='conv3'))
    model.add(PReLU(name='prelu3'))

    model.add(Dense(128))
    model.add(PReLU(name='prelu4'))

    model.add(Dense(2))
    model.add(Activation('softmax'))

    model.add(Dense(4))#THere is something being used input_layer_name='prelu4)

#O-Net

def o_net():
    model = Sequential() #there are 2 types of models but this is the most common
    model.add(Conv2D(32, (3, 3), strides=1, name='conv1'), input_shape =(48, 48, 3))
    model.add(PReLU(name='prelu1'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2,2)))

    model.add(Conv2D(64, (3, 3), strides=1, name='conv2'))
    model.add(PReLU(name='prelu2'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2,2)))

    model.add(Conv2D(64, (3, 3), strides=1, name='conv3'))
    model.add(PReLU(name='prelu3'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))

    model.add(Conv2D(128, (2, 2), strides=1, name='conv4'))
    model.add(PReLU(name='prelu4'))

    model.add(Dense(256))
    model.add(PReLU(name='prelu5'))

    model.add(Dense(2))
    model.add(Activation('softmax'))

    model.add(Dense(4))#THere is something being used input_layer_name='prelu5

    model.add(Dense(10))#THere is something being used input_layer_name='prelu5

In [None]:
# net = PNet
# batch_size = 32


# def read_single_tfrecord(tfrecord_file, batch_size, net):
#     # generate a input queue
#     # each epoch shuffle
#     filename_queue = tf.string_input_producer([tfrecord_file],shuffle=True)
#     # read tfrecord
#     reader = tf.TFRecordReader()
#     _, serialized_example = reader.read(filename_queue)
#     image_features = tf.parse_single_example(
#         serialized_example,
#         features={
#             'image/encoded': tf.FixedLenFeature([], tf.string),#one image  one record
#             'image/label': tf.FixedLenFeature([], tf.int64),
#             'image/roi': tf.FixedLenFeature([4], tf.float32),
#             'image/landmark': tf.FixedLenFeature([10],tf.float32)
#         }
#     )
#     if net == 'PNet':
#         image_size = 12
#     elif net == 'RNet':
#         image_size = 24
#     else:
#         image_size = 48
#     image = tf.decode_raw(image_features['image/encoded'], tf.uint8)
#     image = tf.reshape(image, [image_size, image_size, 3])
#     image = (tf.cast(image, tf.float32)-127.5) / 128
    
#     # image = tf.image.per_image_standardization(image)
#     label = tf.cast(image_features['image/label'], tf.float32)
#     roi = tf.cast(image_features['image/roi'],tf.float32)
#     landmark = tf.cast(image_features['image/landmark'],tf.float32)
#     image, label,roi,landmark = tf.train.batch(
#         [image, label,roi,landmark],
#         batch_size=batch_size,
#         num_threads=2,
#         capacity=1 * batch_size
#     )
#     label = tf.reshape(label, [batch_size])
#     roi = tf.reshape(roi,[batch_size,4])
#     landmark = tf.reshape(landmark,[batch_size,10])
#     return image, label, roi,landmark

In [19]:
model = p_net()

In [13]:
x.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 12, 12, 10)        280       
_________________________________________________________________
prelu1 (PReLU)               (None, 12, 12, 10)        1440      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 10)          0         
_________________________________________________________________
conv2 (Conv2D)               (None, 3, 3, 16)          1456      
_________________________________________________________________
prelu2 (PReLU)               (None, 3, 3, 16)          144       
_________________________________________________________________
conv3 (Conv2D)               (None, 1, 1, 32)          4640      
_________________________________________________________________
prelu3 (PReLU)               (None, 1, 1, 32)         

In [None]:
#Write your own reader function.
image_features = tf.parse_single_example(
       serialized_example,
       features={
           'image/encoded': tf.FixedLenFeature([], tf.string),#one image  one record
           'image/label': tf.FixedLenFeature([], tf.int64),
           'image/roi': tf.FixedLenFeature([4], tf.float32),
           'image/landmark': tf.FixedLenFeature([10],tf.float32)
       }

In [5]:
def read_record(example):
    image_features = tf.io.parse_single_example(
       example,
       features={
           'image/encoded': tf.io.FixedLenFeature([], tf.string),#one image  one record
           'image/label': tf.io.FixedLenFeature([], tf.int64),
           'image/roi': tf.io.FixedLenFeature([4], tf.float32),
           'image/landmark': tf.io.FixedLenFeature([10],tf.float32)
       }
    )
    image = tf.io.decode_raw(image_features['image/encoded'], tf.uint8)
    image = tf.reshape(image, [12, 12, 3])
    image = (tf.cast(image, tf.float32)-127.5) / 128
    
    # image = tf.image.per_image_standardization(image)
    label = tf.cast(image_features['image/label'], tf.float32)
    roi = tf.cast(image_features['image/roi'],tf.float32)
    landmark = tf.cast(image_features['image/landmark'],tf.float32)
    return image, (label, roi,landmark)
        

    
# image, label,roi,landmark = tf.train.batch(
#     [image, label,roi,landmark],
#     batch_size=batch_size,
#     num_threads=2,
#     capacity=1 * batch_size
# )
# label = tf.reshape(label, [batch_size])
# roi = tf.reshape(roi,[batch_size,4])
# landmark = tf.reshape(landmark,[batch_size,10])

In [6]:
BATCH_SIZE = 384

buffer_size=10 * BATCH_SIZE

def get_batch(filenames):
    option_no_order = tf.data.Options()
    option_no_order.experimental_deterministic = False
    
    
    dataset = tf.data.Dataset.list_files(filenames)
    dataset = dataset.with_options(option_no_order)
    
    dataset = dataset.interleave(tf.data.TFRecordDataset)
    dataset = dataset.map(read_record)
    
    dataset = dataset.repeat()
    dataset = dataset.shuffle(2048)
    
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    
    dataset = dataset.prefetch(buffer_size=buffer_size)
    
    return dataset

In [7]:
training_filename = "train_PNet_landmark.tfrecord_shuffle"
validation_filename = "train_PNet_landmark.tfrecord_shuffle"
def get_training_datest():
    return get_batch(training_filename)

def get_validation_dataset():
    return get_batch(validation_filename)

In [8]:
def p_net():

        inputs = tf.keras.Input(shape=(12,12,3))

        model = tf.keras.layers.Conv2D(10,(3, 3), strides=1, name='conv1',padding='SAME')(inputs)
        model = tf.keras.layers.PReLU(name='prelu1')(model)
        model = tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2,2))(model)

        model = tf.keras.layers.Conv2D(16,(3, 3), strides=1,name='conv2')(model)
        model = tf.keras.layers.PReLU(name='prelu2')(model)

        model = tf.keras.layers.Conv2D(32,(3,3),strides=1,name='conv3')(model)
        model = tf.keras.layers.PReLU(name='prelu3')(model)

        classifier1 = tf.keras.layers.Conv2D(2, (1, 1), activation='softmax',name='classifier1')(model)
        #I am not sure if this line of code is impoertant
        classifier1 = tf.keras.layers.Reshape((2,))(classifier1)   # this layer has to be deleted in order to enalbe arbitraty shape input

        bbox_regress = Conv2D(4, (1, 1),name='bbox1')(model)
        #I am not sure if this line of code is impoertant
        bbox_regress = tf.keras.layers.Reshape((4,))(bbox_regress)

        #THis last code is not usually included in most mtcnn replicas
        landmark = Conv2D(10, (1, 1),name='landmark1')(model)
        landmark =  tf.keras.layers.Reshape((10,))(landmark)

        model = tf.keras.Model(inputs=inputs, outputs=[classifier1,bbox_regress,landmark])
        model.compile(optimizer=tf.optimizers.Adam(0.01), loss='categorical_crossentropy', metrics=['accuracy'])

        return model

In [9]:
modelsss = p_net()

In [10]:
modelsss.fit(get_training_datest(), steps_per_epoch=100, epochs=10, validation_data=get_validation_dataset(), validation_steps=50)

Train for 100 steps, validate for 50 steps
Epoch 1/10
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x12d9d5b00>

In [49]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 12, 12, 3)]  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 12, 12, 10)   280         input_4[0][0]                    
__________________________________________________________________________________________________
prelu1 (PReLU)                  (None, 12, 12, 10)   1440        conv1[0][0]                      
__________________________________________________________________________________________________
max_pooling2d_6 (MaxPooling2D)  (None, 5, 5, 10)     0           prelu1[0][0]                     
____________________________________________________________________________________________