# **Detect face(s) of people in images and then Extract and Save just faces as image using MTCNN (Multi-task Cascaded Convolutional Neural Networks) and OpenCV**
Extracting just the faces can be helpful when you are creating a dataset of faces from many images to pass as input to ML pipeline.



## Step 1: Mount your Google Drive
- Execute the below cell and when prompted click at the URL and login to your Google account that is linked to your drive. When prompted Select 'Allow' and then copy authorization code.
- Paste the authoriation code in the box.
- Create a folder to keep source and target files and set that as the BASE_PATH

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [4]:
import os

BASE_PATH = '/content/drive/My Drive/01_Aqila/' #this is my path, change path according to your 
SRC_PHOTOS_PATH = BASE_PATH + 'source_images/'
TRG_FACES_PATH = BASE_PATH + 'target_faces/'

if not os.path.exists(SRC_PHOTOS_PATH): os.mkdir(SRC_PHOTOS_PATH)
if not os.path.exists(TRG_FACES_PATH): os.mkdir(TRG_FACES_PATH)
      


## Step 2: Install MTCNN and other dependencies

In [5]:
!pip install mtcnn

Collecting mtcnn
[?25l  Downloading https://files.pythonhosted.org/packages/67/43/abee91792797c609c1bf30f1112117f7a87a713ebaa6ec5201d5555a73ef/mtcnn-0.1.0-py3-none-any.whl (2.3MB)
[K     |████████████████████████████████| 2.3MB 3.4MB/s 
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.0


In [6]:
from mtcnn.mtcnn import MTCNN
import cv2
from google.colab.patches import cv2_imshow

## Step 3: Function that will detect, extract and save faces as individual image files

In [10]:
def detect_extract_save_faces(files, resize=False, extract=True, mark_faces=False):
  for filename in files:
    img = cv2.imread(filename)

    if resize==True: #if source images are too big in size resize them in half (large images take longer to process)
      # Resize the image by half
      width = int(img.shape[1]/2)
      height = int(img.shape[0]/2)
      img = cv2.resize(img, (width, height), interpolation = cv2.INTER_AREA)

    # Convert the image to RGB from BGR format which Open CV uses as default
    rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    face_locations = []

    face_detector = MTCNN()
    face_locations = face_detector.detect_faces(rgb_image)
    print('# file name:', filename, '# of faces detected:', len(face_locations))
    print(face_locations)

    for index, face_location in enumerate(face_locations):      
      if face_location['confidence'] > 0.94: #only trust if the confidence level is over 94%
        x1, y1, width, height = face_location['box']
        x1, y1 = abs(x1), abs(y1)
        x2, y2 = x1 + width, y1 + height

        face = img[y1:y2, x1:x2]
        face = cv2.resize(face, (80, 80), interpolation = cv2.INTER_AREA)

        #build target path keeping actual source file name with index of the face image
        face_file_path = os.path.splitext(filename.replace(SRC_PHOTOS_PATH, TRG_FACES_PATH))[0] + '_' + str(index) + '.jpg'
        face_base_path = os.path.dirname(face_file_path)
        print('face_path:',face_file_path)

        #cv2.imwrite will not create target directory - to keep the target files in the same folder tree we need to create the directory first
        if not os.path.exists(face_base_path): os.mkdir(face_base_path)

        cv2.imwrite(face_file_path, face)

    if mark_faces == True: #if just boxes around detected image is required  
      for face_location in face_locations:
        x1, y1, width, height = face_location['box']
        x2, y2 = x1 + width, y1 + height
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)

      detect_file_path = os.path.splitext(filename.replace(SRC_PHOTOS_PATH, TRG_FACES_PATH))[0] + '_face_detected.jpg'
      detect_base_path = os.path.dirname(detect_file_path)      

      #cv2.imwrite will not create target directory - to keep the target files in the same folder tree we need to create the directory first
      if not os.path.exists(detect_base_path): os.mkdir(detect_base_path)
      print('detect_path:', detect_file_path)
      cv2.imwrite(detect_file_path, img)

      



Function to get the list of all source files - root and files in sub-folders 

In [11]:
'''
    For the list of files in source directory and all sub-foders in it 
'''
def getListOfFiles(dirName):
    # create a list of file and sub folders 
    # names in the given folder 
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        if os.path.getsize(fullPath) >0 and not entry.startswith('.') :                        
          # If entry is a directory then get the list of files in this directory 
          if os.path.isdir(fullPath):
              allFiles = allFiles + getListOfFiles(fullPath)
          else:
              allFiles.append(fullPath)
        else:
          print(fullPath, ': Zero byte file - ignored!')

    return allFiles



## Get list of files from source image folder and extract faces.

In [12]:
# Get the list of all files in source and sub-directories
listOfFiles = getListOfFiles(SRC_PHOTOS_PATH)
#print(listOfFiles)
detect_extract_save_faces(listOfFiles, False, True, True)

/content/drive/My Drive/01_Aqila/source_images/.ipynb_checkpoints : Zero byte file - ignored!
/content/drive/My Drive/01_Aqila/source_images/Amir/.DS_Store : Zero byte file - ignored!
# file name: /content/drive/My Drive/01_Aqila/source_images/IMG_3156.jpeg # of faces detected: 6
[{'box': [685, 773, 72, 109], 'confidence': 0.8601534962654114, 'keypoints': {'left_eye': (726, 811), 'right_eye': (752, 810), 'nose': (749, 834), 'mouth_left': (725, 855), 'mouth_right': (744, 857)}}, {'box': [243, 1211, 43, 56], 'confidence': 0.846126139163971, 'keypoints': {'left_eye': (256, 1232), 'right_eye': (272, 1227), 'nose': (266, 1241), 'mouth_left': (262, 1255), 'mouth_right': (274, 1252)}}, {'box': [1975, 1621, 110, 129], 'confidence': 0.8216217160224915, 'keypoints': {'left_eye': (2012, 1690), 'right_eye': (2043, 1661), 'nose': (2037, 1697), 'mouth_left': (2041, 1730), 'mouth_right': (2067, 1706)}}, {'box': [761, 1268, 47, 55], 'confidence': 0.7677661776542664, 'keypoints': {'left_eye': (772, 129