In [1]:
# Zhang - Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
# model: https://drive.google.com/open?id=0BxINLo5jshCRUWZsZlJBbmNDdDA

from __future__ import print_function

# coding: utf-8
import mxnet as mx
from mtcnn_detector import MtcnnDetector
import cv2
import os
import shutil
import sys
import xml.etree.ElementTree

import multiprocessing

from IPython.display import clear_output
import matplotlib.pyplot as plt

In [2]:
detector = MtcnnDetector(model_folder='model', ctx=mx.gpu(0), num_worker = 4, accurate_landmark = False)

save_faces = 1
save_aligned_face = 0
save_original_faces = 1
show_result = 1
select_most_frequent_face = 1
delete_unfrequent_face = 1

t = 50

  **kwargs)


In [3]:
def get_file_title(file_name):
    return os.path.splitext(file_name)[0]


def cluster_face(x, y, w, h, data, t):
    if select_most_frequent_face == 0:
        return 0, 0, 0, 0, 0
    if len(data) == 0:
        min_location = 0
        min_size = 0
        min_dif = 0
        final_g_indx = 0
    else:
        # for each group
        for g_indx in range(0, len(data)):
            # calculate absolute different between the last face in each group and
            # the current face 
            # # 1) location
            # abs_location = abs(data[g_indx][0][0] - x) + abs(data[g_indx][0][1] - y)
            # # 2) size
            # abs_size     = abs(data[g_indx][0][2] - w) + abs(data[g_indx][0][3] - h)
            # 1) location
            abs_location = abs(data[g_indx][len(data[g_indx]) - 1][0] - x) + abs(data[g_indx][len(data[g_indx]) - 1][1] - y)
            # 2) size
            abs_size = abs(data[g_indx][len(data[g_indx]) - 1][2] - w) + abs(data[g_indx][len(data[g_indx]) - 1][3] - h)
            # compare
            if g_indx == 0:
                min_location = abs_location
                min_size = abs_size
                min_dif = 1.0*(min_location + min_size)/2
                final_g_indx = 0
            else:
                # if abs_location < min_location and abs_size < min_size:
                if 1.0*(abs_location + abs_size)/2 < min_dif:
                    min_location = abs_location
                    min_size = abs_size
                    min_dif = 1.0*(min_location + min_size)/2
                    final_g_indx = g_indx
    
    # decision
    # if min_location <= t1 and min_size <= t2:
    if min_dif < t:
        # print str(final_g_indx) + '_' + str(min_location) + '_' + str(min_size)
        return final_g_indx, min_location, min_size, final_g_indx, min_dif
    else:
        # print str(len(data)) + '_' + str(min_location) + '_' + str(min_size)
        return len(data), min_location, min_size, final_g_indx, min_dif


def detect_single_face_in_image(img, faces, frm_indx, DIR, file):
    # run detector
    results = detector.detect_face(img)

    draw = img.copy()

    if results is not None:
        total_boxes = results[0]
        points = results[1]

        group_indx_for_chips = []
        for i, b in enumerate(total_boxes):
            # naive face clustering based on location and size
            face_indx, location_dif, size_dif, second_face_indx, dif = cluster_face(int(b[0]),
                                                                                    int(b[1]), abs(
                    int(b[2]) - int(b[0])), abs(int(b[3]) - int(b[1])), faces, t)  # t1, t2)

            group_indx_for_chips.append(face_indx)

            # add group in case there is another face group
            if face_indx + 1 > len(faces):
                faces.append([])

            # add item in the group
            faces[face_indx].append([])

            # add x, y, w, and h data
            faces[face_indx][len(faces[face_indx]) - 1].append(int(b[0]))
            faces[face_indx][len(faces[face_indx]) - 1].append(int(b[1]))
            faces[face_indx][len(faces[face_indx]) - 1].append(abs(int(b[2]) - int(b[0])))
            faces[face_indx][len(faces[face_indx]) - 1].append(abs(int(b[3]) - int(b[1])))

            face = draw[int(b[1]):int(b[3]), int(b[0]):int(b[2])]

            if save_faces == 1 and save_original_faces == 1:
                cv2.imwrite(DIR + '/' + file + '_unaligned_frame_' + str(frm_indx) + '_g' + str(
                    face_indx) + '_sg' + str(second_face_indx) + '_' + str(dif) + '_' + str(
                    location_dif) + '_' + str(size_dif) + '_original_face_' + str(i) + '.jpg', face)
            cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0), 3)
            # if save_faces == 1 and save_original_faces == 1:
            #     cv2.imwrite(DIR + '/unaligned_frame_' + file, face)
            # cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0))

        # extract aligned face chips
        chips = detector.extract_image_chips(img, points, 144, 0.37)
        for i, chip in enumerate(chips):
            if save_faces == 1 and save_aligned_face == 1:
                cv2.imwrite(DIR + '/' + file + '_aligned_frame_' + str(frm_indx) + '_g' + str(
                    group_indx_for_chips[i]) + '_sg' + str(second_face_indx) + '_' + str(
                    dif) + '_' + str(location_dif) + '_' + str(size_dif) + '_chip_' + str(i) + '.jpg',
                            chip)

        if show_result == 1:
            cv2.imshow("detection result", draw)
            # cv2.imwrite('/home/duong/Downloads/EmotiW/AFEW_6_2016/Train/Faces/seminar.17.01.03/' + video_path + '/' + file + '.images/' + file + '_frame_' + str(frm_indx) + '.jpg', draw)
            cv2.waitKey(1)

    return faces, draw

def delete_unfrequent_faces(faces):
    # the most frequent face
    n_of_faces_per_group = []
    for i in range(0, len(faces)):
        n_of_faces_per_group.append(len(faces[i]))
    print(n_of_faces_per_group)

    # best group
    for i in range(0, len(faces)):
        if len(faces[i]) == max(n_of_faces_per_group):
            best_group = i
            break
    print(best_group)

    if delete_unfrequent_face == 1:
        # remove unfrequent faces
        for exported_file in os.listdir(DIR):
            if '_g' + str(best_group) + '_' not in exported_file:
                os.remove(DIR + '/' + exported_file)

In [6]:
def main():
    list_of_dirs = ['Angry',
                    'Disgust',
                    'Fear',
                    'Happy',
                    'Neutral',
                    'Sad',
                    'Surprise']
    # list_of_dirs = ['Surprise']

    dir_indx = 0
    for video_path in list_of_dirs:
        # PATH = '/mnt/DATA/nhduong/MotionRecognition/data/AFEW/AFEW_6_2016/Train/Data/' + video_path
        PATH = 'E:/EmotiW2017/Val_AFEW/' + video_path
        vid_indx = 0
        for file in os.listdir(PATH):
            if file.endswith('.mp4'):
                file_name = PATH + '/' + file

                DIR = file_name + '.images'
                
                if save_faces == 1:
                    # Create folder to contains faces
                    if os.path.exists(DIR):
                        shutil.rmtree(DIR)
                    os.makedirs(DIR)

                # # Create folder to contains faces (seminar)
                # if os.path.exists('/home/duong/Downloads/EmotiW/AFEW_6_2016/Train/Faces/seminar.17.01.03/' + video_path + '/' + file + '.images'):
                #     shutil.rmtree('/home/duong/Downloads/EmotiW/AFEW_6_2016/Train/Faces/seminar.17.01.03/' + video_path + '/' + file + '.images')
                # os.makedirs('/home/duong/Downloads/EmotiW/AFEW_6_2016/Train/Faces/seminar.17.01.03/' + video_path + '/' + file + '.images')

                # OpenCV reads video
                vid = cv2.VideoCapture(file_name)
                
                faces = []
                
                frm_indx = 0
                suc = True

                while suc:
                    suc, img = vid.read()
                    
                    # Display the frame until new frame is available
                    clear_output(wait=True)
                    
                    print('(' + str(dir_indx + 1) + '/7) processing files in ' + video_path)
                    print(' >>> frame %d from %s...' % (frm_indx, file))
                    print('')

                    # run detector
                    results = detector.detect_face(img)

                    if results is not None:
                        total_boxes = results[0]
                        points = results[1]
                        # print total_boxes
                        # print points
                        
                        group_indx_for_chips = []
                        draw = img.copy()
                        for i, b in enumerate(total_boxes):
                            # naive face clustering based on location and size
                            face_indx, location_dif, size_dif, second_face_indx, dif = cluster_face(int(b[0]), int(b[1]), abs(int(b[2]) - int(b[0])), abs(int(b[3]) - int(b[1])), faces, t)# t1, t2)

                            group_indx_for_chips.append(face_indx)
                            
                            # add group in case there is another face group
                            if face_indx + 1 > len(faces):
                                faces.append([])
                            
                            # add item in the group
                            faces[face_indx].append([])
                            
                            # add x, y, w, and h data
                            faces[face_indx][len(faces[face_indx]) - 1].append(int(b[0]))
                            faces[face_indx][len(faces[face_indx]) - 1].append(int(b[1]))
                            faces[face_indx][len(faces[face_indx]) - 1].append(abs(int(b[2]) - int(b[0])))
                            faces[face_indx][len(faces[face_indx]) - 1].append(abs(int(b[3]) - int(b[1])))
                            
                            face = draw[int(b[1]):int(b[3]), int(b[0]):int(b[2])]
                            if save_faces == 1 and save_original_faces == 1:
                                cv2.imwrite(DIR + '/' + file + '_unaligned_frame_'+str(frm_indx)+'_g'+str(face_indx)+'_sg'+str(second_face_indx)+'_'+str(dif)+'_'+str(location_dif)+'_'+str(size_dif)+'_original_face_'+str(i)+'.jpg', face)
                            cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0), 3)
                        
                        # extract aligned face chips
                        chips = detector.extract_image_chips(img, points, 144, 0.37)
                        for i, chip in enumerate(chips):
                            if save_faces == 1 and save_aligned_face == 1:
                                cv2.imwrite(DIR + '/' + file + '_aligned_frame_'+str(frm_indx)+'_g'+str(group_indx_for_chips[i])+'_sg'+str(second_face_indx)+'_'+str(dif)+'_'+str(location_dif)+'_'+str(size_dif)+'_chip_'+str(i)+'.jpg', chip)

                        # for p in points:
                        #     for i in range(5):
                        #         cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2)

                        if show_result == 1:
                            cv2.imshow("detection result", draw)
                            # cv2.imwrite('/home/duong/Downloads/EmotiW/AFEW_6_2016/Train/Faces/seminar.17.01.03/' + video_path + '/' + file + '.images/' + file + '_frame_' + str(frm_indx) + '.jpg', draw)
                            k = cv2.waitKey(1)
                            if k == 27:
                                cv2.destroyAllWindows()
                                sys.exit(0)
                            
#                             # Turn off the axis
#                             plt.axis('off')
#                             # Display the frame
#                             plt.suptitle('face detection')
#                             plt.imshow(draw)
#                             plt.show()

                    frm_indx += 1
                    
                # the most frequent face
                n_of_faces_per_group = []
                for i in range(0, len(faces)):
                    n_of_faces_per_group.append(len(faces[i]))
                print(n_of_faces_per_group)
                
                # best group
                for i in range(0, len(faces)):
                    if len(faces[i]) == max(n_of_faces_per_group):
                        best_group = i
                        break                
                print(best_group)
                
                if delete_unfrequent_face == 1:
                    # remove unfrequent faces
                    for exported_file in os.listdir(DIR):
                        if '_g' + str(best_group) + '_' not in exported_file:
                            os.remove(DIR + '/' + exported_file)
                
                vid_indx += 1
            
#            if vid_indx > 0:
#                break
#        break                    
        dir_indx += 1
    cv2.destroyAllWindows()

In [7]:
if __name__ == '__main__':
    multiprocessing.freeze_support()
    main()

(7/7) processing files in Surprise
 >>> frame 53 from 015646720.avi.mp4...

[53]
0
