In [4]:
from imutils import paths
import numpy as np
import imutils
import cv2
import os

In [5]:
def face_detection(image):
  cascadePath = "haarcascade_frontalface_default.xml"
  detector = cv2.CascadeClassifier(cascadePath)

  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  rects = detector.detectMultiScale(gray, scaleFactor=1.05,
	minNeighbors=10, minSize=(30, 30),
	flags=cv2.CASCADE_SCALE_IMAGE)

  return rects

In [6]:
def load_face_dataset(inputPath, minSamples = 15):
  # get all the image paths in the dataset folder structure and grab
  # the name(i.e. groundtruth) of all the images and count each of them
  # and then put all of the groundtruths into a list
  imagePaths = list(paths.list_images(inputPath))
  names = [p.split(os.path.sep)[-2] for p in imagePaths]
  (names , counts) = np.unique(names , return_counts = True)
  names = names.tolist()

  faces = []
  labels = []

  # loop over all of the image paths
  for imagePath in imagePaths:
    # read the image and grab the image label
    image = cv2.imread(imagePath)
    name = imagePath.split(os.path.sep)[-2]

    # check whether the count of this specific label is
    # below our minSamples threshold or not
    if counts[names.index(name)] < minSamples:
      continue

    # perform face detection
    boxes = face_detection(image)

    # loop over the bounding boxes
    for (x , y , w , h) in boxes:
      try:
        # extract the face ROI, resize it and convert
        # it into grayscale format
        faceROI = image[y:y+h , x:x+w]
        faceROI = cv2.resize(faceROI , (47 , 62))
        faceROI = cv2.cvtColor(faceROI , cv2.COLOR_BGR2GRAY)

        # update the faces and labels list
        faces.append(faceROI)
        labels.append(name)
      except:
        continue
  # convert the faces and labels lists into Numpy array
  faces = np.array(faces)
  labels = np.array(labels)

  return (faces , labels)