<a href="https://colab.research.google.com/github/syeds-git/face-cropping-pipeline/blob/main/FaceCroppingPipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Face cropping pipeline

This pipeline scans a folder containing images and extracts out all the faces from each image and saves it into the cropped folder.

Required:

1. zip file containing images of people arranged in separate folders. The images can have multiple faces.
2. The zip is uploaded to Google Drive.

# Step 1
Load and extract the data set

In [None]:
# Mount google drive
from google.colab import drive
drive.mount('/content/gdrive')

# Unzip train data
!unzip /content/gdrive/MyDrive/dataset.zip -d dataset

# Unzip test data if available
# !unzip test.zip -d dataset

# clean up all images cropped folder from previous runs
!rm -rf cropped/

# Step 2

Load all image paths in an array

In [8]:
path_to_data = "dataset/"
path_to_cr_data = "cropped/"

import os
img_dirs = []
for entry in os.scandir(path_to_data):
    if entry.is_dir():
        img_dirs.append(entry.path)

# add test directory to the array
# img_dirs = ["test/"]

# Another way to remove the a directory
import shutil
if os.path.exists(path_to_cr_data):
     shutil.rmtree(path_to_cr_data)

# Create cropped/ folder
os.mkdir(path_to_cr_data)

# Step 3

Create helper functions

In [3]:
import cv2

# Return 0 if param is < 0
def checkLessThanZero(param):
  if param < 0:
    param = 0
  return param

# Gets coordinates of faces
def get_cropped_faces(image_path, result_list):
    print('Scanned: ' + image_path)
    img = cv2.imread(image_path)
    rois = []

    # plot each face as a subplot
    print(len(result_list))
    # print(result_list)
    for i in range(len(result_list)):      
      x1, y1, width, height = result_list[i]['box']
      x1 = checkLessThanZero(x1)
      y1 = checkLessThanZero(y1)
      
      x2, y2 = x1 + width, y1 + height
      roi_color = img[y1:y2, x1:x2]
      rois.append(roi_color)
    
    # print("rois: " + str(len(rois)))
    return rois

# Step 4

Extract and save detected faces in the images using MTCNN, PIL and OpenCV

In [5]:
pip install mtcnn

Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 5.1 MB/s 
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.1


In [6]:
import mtcnn
# print version
print(mtcnn.__version__)

0.1.0


In [None]:
# extract and save each detected face in a photograph
from matplotlib import pyplot
from matplotlib.patches import Rectangle
from matplotlib.patches import Circle
from mtcnn.mtcnn import MTCNN
from PIL import Image

cropped_image_dirs = []
person_file_names_dict = {}
cv2.destroyAllWindows()

# create the detector, using default weights
detector = MTCNN()

def converToJpg(pngFile):
  im = Image.open(pngFile)
  rgb_im = im.convert('RGB')
  rgb_im.save(pngFile+'.jpg')
  return pngFile+'.jpg'

for img_dir in img_dirs:
    count = 1
    person_name = img_dir.split('/')[-1]

    # This will be the name of the folder
    print(person_name)
    
    person_file_names_dict[person_name] = []    
    
    for entry in os.scandir(img_dir):
      filename = entry.path
      if (any(substring in filename for substring in ['.png', 'jpg', '.jpeg'])):
        print('Scanning: ' + filename)
        if ('.png' in filename):
          print('Convert to jpg: ' + filename)
          filename = converToJpg(filename)
          print('Converted to jpg: ' + filename)
          
        # load image from file
        pixels = pyplot.imread(filename)
        
        # detect faces in the image
        faces = detector.detect_faces(pixels)
        
        # get cropped faces
        cropped_images = get_cropped_faces(filename, faces)
        
        if cropped_images is not None:
          for roi_color in cropped_images:
            cropped_folder = path_to_cr_data + person_name
            if not os.path.exists(cropped_folder):
              os.makedirs(cropped_folder)
              cropped_image_dirs.append(cropped_folder)
              print("Generating cropped images in folder: ",cropped_folder)
                  
            cropped_file_name = person_name + str(count) + ".png"
            cropped_file_path = cropped_folder + "/" + cropped_file_name 
            print('Saving: ' + cropped_file_path)
              
            cv2.imwrite(cropped_file_path, roi_color)
            person_file_names_dict[person_name].append(cropped_file_path)
            count += 1

# Step 5

zip cropped faces and transfer to drive

In [None]:
# Zip cropped faces
import shutil
shutil.make_archive('faces', 'zip', 'cropped')

In [None]:
# Copy back to drive
!cp faces.zip /content/gdrive/MyDrive/faces.zip