In [15]:
import cv2
import time
import tensorflow
import detect_face
import numpy as np

#----tensorflow version check
if tensorflow.__version__.startswith('1.'):
    import tensorflow as tf
else:
    import tensorflow.compat.v1 as tf
    tf.disable_v2_behavior()
print("Tensorflow version: ",tf.__version__)


def video_init(is_2_write=False,save_path=None):
    writer = None
    cap = cv2.VideoCapture(0,cv2.CAP_DSHOW)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)#default 480
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)#default 640

    # width = 480
    # height = 640
    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

    '''
    ref:https://docs.opencv.org/master/dd/d43/tutorial_py_video_display.html
    FourCC is a 4-byte code used to specify the video codec. 
    The list of available codes can be found in fourcc.org. 
    It is platform dependent. The following codecs work fine for me.
    In Fedora: DIVX, XVID, MJPG, X264, WMV1, WMV2. (XVID is more preferable. MJPG results in high size video. X264 gives very small size video)
    In Windows: DIVX (More to be tested and added)
    In OSX: MJPG (.mp4), DIVX (.avi), X264 (.mkv).
    FourCC code is passed as `cv.VideoWriter_fourcc('M','J','P','G')or cv.VideoWriter_fourcc(*'MJPG')` for MJPG.
    '''

    if is_2_write is True:
        #fourcc = cv2.VideoWriter_fourcc('x', 'v', 'i', 'd')
        #fourcc = cv2.VideoWriter_fourcc('X', 'V', 'I', 'D')
        fourcc = cv2.VideoWriter_fourcc(*'divx')
        if save_path is None:
            save_path = 'demo.avi'
        writer = cv2.VideoWriter(save_path, fourcc, 30, (int(width), int(height)))

    return cap,height,width,writer

def face_detection_MTCNN(detect_multiple_faces=False):
    #----var
    frame_count = 0
    FPS = "Initialing"
    no_face_str = "No faces detected"

    #----video streaming init
    cap, height, width, writer = video_init(is_2_write=False)

    #----MTCNN init
    color = (0,255,0)
    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    with tf.Graph().as_default():
        config = tf.ConfigProto(log_device_placement=True,
                                allow_soft_placement=True,  # 允許當找不到設備時自動轉換成有支援的設備
                                )
        # config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.1
        sess = tf.Session(config=config)
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)

    count = 0
    while (cap.isOpened()):

        #----get image
        ret, img = cap.read()

        if ret is True:
            #----image processing
            img_rgb = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
            print("image shape:",img_rgb.shape)

            #----face detection
            t_1 = time.time()
            bounding_boxes, points = detect_face.detect_face(img_rgb, minsize, pnet, rnet, onet, threshold, factor)
            d_t = time.time() - t_1
            print("Time of face detection: ",d_t)

            #----bounding boxes processing
            nrof_faces = bounding_boxes.shape[0]
            if nrof_faces > 0:
                points = np.array(points)
                points = np.transpose(points, [1, 0])
                points = points.astype(np.int16)

                det = bounding_boxes[:, 0:4]
                det_arr = []
                img_size = np.asarray(img.shape)[0:2]
                if nrof_faces > 1:
                    if detect_multiple_faces:
                        for i in range(nrof_faces):
                            det_arr.append(np.squeeze(det[i]))
                    else:
                        bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
                        img_center = img_size / 2
                        offsets = np.vstack(
                            [(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
                        offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
                        index = np.argmax(
                            bounding_box_size - offset_dist_squared * 2.0)  # some extra weight on the centering
                        det_arr.append(det[index, :])
                else:
                    det_arr.append(np.squeeze(det))

                det_arr = np.array(det_arr)
                det_arr = det_arr.astype(np.int16)

                for i, det in enumerate(det_arr):
                    #det = det.astype(np.int32)
#                     cv2.rectangle(img, (det[0],det[1]), (det[2],det[3]), color, 2)
#                     print(det[0],det[1])
#                     print(det[2],det[3])
                    if nrof_faces > 0:
                        count+=1
                        file_name_path = './Dataset/Allne_0726/' + str(count) + '.jpg'
                        crop_img = img[det[1]-20:det[0]+30, det[3]-20:det[2]+30]
                        crop_img = cv2.resize(crop_img, (250, 250), interpolation=cv2.INTER_AREA)
#                         cv2.imwrite(file_name_path, crop_img)
                        cv2.imwrite(file_name_path, img)
#                       
                    #----draw 5 point on tha face
                    facial_points = points[i]
                    for j in range(0,5,1):
                        #cv2.circle(影像, 圓心座標, 半徑, 顏色, 線條寬度)
                        cv2.circle(img, (facial_points[j], facial_points[j + 5]), 2, (0, 0, 255), -1, 1)

            # ----no faces detected
            else:
                cv2.putText(img, no_face_str, (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)


            #----FPS count
            if frame_count == 0:
                t_start = time.time()
            frame_count += 1
            if frame_count >= 20:
                FPS = "FPS=%1f" % (frame_count / (time.time() - t_start))
                frame_count = 0

            # cv2.putText(影像, 文字, 座標, 字型, 大小, 顏色, 線條寬度, 線條種類)
            cv2.putText(img, FPS, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

            #----image display
            cv2.imshow("臉擺好來!!!", img)

            #----image writing
            if writer is not None:
                writer.write(img)

            #----'q' key pressed?
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            if count == 10:
                break
        else:
            print("get image failed")
            break

    #----release
    cap.release()
    if writer is not None:
        writer.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    face_detection_MTCNN(detect_multiple_faces=False)


Tensorflow version:  1.15.0
Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce RTX 2080, pci bus id: 0000:02:00.0, compute capability: 7.5

image shape: (480, 640, 3)
Time of face detection:  0.2461414337158203
image shape: (480, 640, 3)
Time of face detection:  0.054025888442993164
image shape: (480, 640, 3)
Time of face detection:  0.05666923522949219
image shape: (480, 640, 3)
Time of face detection:  0.051720619201660156
image shape: (480, 640, 3)
Time of face detection:  0.05036139488220215
image shape: (480, 640, 3)
Time of face detection:  0.05180859565734863
image shape: (480, 640, 3)
Time of face detection:  0.05468392372131348
image shape: (480, 640, 3)
Time of face detection:  0.05063033103942871
image shape: (480, 640, 3)
Time of face detection:  0.05108308792114258
image shape: (480, 640, 3)
Time of face detection:  0.05122089385986328
image shape: (480, 640, 3)
Time of face detection:  0.06228375434875488
image shape: (480, 640, 3)
Ti

Time of face detection:  0.049158334732055664
image shape: (480, 640, 3)
Time of face detection:  0.04742741584777832
image shape: (480, 640, 3)
Time of face detection:  0.04868006706237793
image shape: (480, 640, 3)
Time of face detection:  0.0487062931060791
image shape: (480, 640, 3)
Time of face detection:  0.04807567596435547
image shape: (480, 640, 3)
Time of face detection:  0.048531293869018555
image shape: (480, 640, 3)
Time of face detection:  0.047734975814819336
image shape: (480, 640, 3)
Time of face detection:  0.047437191009521484
image shape: (480, 640, 3)
Time of face detection:  0.04748964309692383
image shape: (480, 640, 3)
Time of face detection:  0.046952247619628906
image shape: (480, 640, 3)
Time of face detection:  0.04808616638183594
image shape: (480, 640, 3)
Time of face detection:  0.04646730422973633
image shape: (480, 640, 3)
Time of face detection:  0.04836893081665039
image shape: (480, 640, 3)
Time of face detection:  0.04734206199645996
image shape: (4

Time of face detection:  0.05065011978149414
image shape: (480, 640, 3)
Time of face detection:  0.05202460289001465
image shape: (480, 640, 3)
Time of face detection:  0.04825735092163086
image shape: (480, 640, 3)
Time of face detection:  0.04754304885864258
image shape: (480, 640, 3)
Time of face detection:  0.052510976791381836
image shape: (480, 640, 3)
Time of face detection:  0.05367398262023926
image shape: (480, 640, 3)
Time of face detection:  0.06685733795166016
image shape: (480, 640, 3)
Time of face detection:  0.050049543380737305
image shape: (480, 640, 3)
Time of face detection:  0.0486292839050293
image shape: (480, 640, 3)
Time of face detection:  0.050704240798950195
image shape: (480, 640, 3)
Time of face detection:  0.04938244819641113
image shape: (480, 640, 3)
Time of face detection:  0.04763340950012207
image shape: (480, 640, 3)
Time of face detection:  0.04599928855895996
image shape: (480, 640, 3)
Time of face detection:  0.04642510414123535
image shape: (480

image shape: (480, 640, 3)
Time of face detection:  0.054308176040649414
image shape: (480, 640, 3)
Time of face detection:  0.05602216720581055
image shape: (480, 640, 3)
Time of face detection:  0.052469491958618164
image shape: (480, 640, 3)
Time of face detection:  0.05273580551147461
image shape: (480, 640, 3)
Time of face detection:  0.054967403411865234
image shape: (480, 640, 3)
Time of face detection:  0.06177020072937012
image shape: (480, 640, 3)
Time of face detection:  0.0657510757446289
image shape: (480, 640, 3)
Time of face detection:  0.06187629699707031
image shape: (480, 640, 3)
Time of face detection:  0.05930900573730469
image shape: (480, 640, 3)
Time of face detection:  0.055036067962646484
image shape: (480, 640, 3)
Time of face detection:  0.058162689208984375
image shape: (480, 640, 3)
Time of face detection:  0.05786538124084473
image shape: (480, 640, 3)
Time of face detection:  0.053411006927490234
image shape: (480, 640, 3)
Time of face detection:  0.06119

Time of face detection:  0.04977297782897949
image shape: (480, 640, 3)
Time of face detection:  0.058914899826049805
image shape: (480, 640, 3)
Time of face detection:  0.04685330390930176
image shape: (480, 640, 3)
Time of face detection:  0.04939436912536621
image shape: (480, 640, 3)
Time of face detection:  0.04845857620239258
image shape: (480, 640, 3)
Time of face detection:  0.049199819564819336
image shape: (480, 640, 3)
Time of face detection:  0.04644370079040527
image shape: (480, 640, 3)
Time of face detection:  0.07111811637878418
image shape: (480, 640, 3)
Time of face detection:  0.04596877098083496
image shape: (480, 640, 3)
Time of face detection:  0.04691195487976074
image shape: (480, 640, 3)
Time of face detection:  0.05441761016845703
image shape: (480, 640, 3)
Time of face detection:  0.05727076530456543
image shape: (480, 640, 3)
Time of face detection:  0.06078338623046875
image shape: (480, 640, 3)
Time of face detection:  0.05322575569152832
image shape: (480