In [1]:
# imports

import numpy as np
import cv2
import os

In [2]:
# use a openCV cascade to find all the faces in an image
# input:  image (already read in with cv2)
#         cascade file
# output: LIST of tuples, (x, y, width, height) for a box that encases each face

CASCADE = "haarcascade_frontalface_default.xml"

def detect_faces(image, cascade_path=CASCADE):
    
    # read in the cascade
    face_cascade = cv2.CascadeClassifier(cascade_path)
    
    # convert the image to grayscale-- this makes processing easier
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # detect the faces with adjust params
    faces = face_cascade.detectMultiScale(
            grayscale_image,
            scaleFactor=1.1,
            minNeighbors=5)
    
    # output a LIST of tuples, each representing a face
    return faces

In [3]:
# find the faces in a single image, and crop the photo down around them
# input:  image path
# output: None
#         saves the cropped images to the "faces" directory

OUTPUT = "all_pictures/faces/{}_{}.jpg"

def get_faces_in_image(image_path, output_format=OUTPUT):
    
    # read in the image
    # path is in the form: "all_pictures/covers/vvv_ii_yyyy.jpg
    image_name = image_path.split("/")[2].split(".")[0]
    image = cv2.imread(image_path)
    print(image_name)
    
    # get a list of faces in the image
    faces = detect_faces(image)
    
    # save each face as its own image
    face_number = 0
    for (x, y, width, height) in faces:
        
        # openCV treats each image as a numpy array, so you can "slice" the pixels
        cropped = image[y : y + height, x : x + width]
        
        # record how many faces are in each picture
        file_name = output_format.format(image_name, face_number)
        cv2.imwrite(file_name, cropped)
        face_number += 1
    
    # were there any faces found?
    return (face_number != 0)

In [4]:
# find all the faces in all the covers
# input:  directory to search through
# output: None

DIR = "all_pictures/covers"
    
def get_all_faces(directory=DIR):
    
    # loop through all cover images
    for image in os.listdir(directory):
        
        # image paths from os.listdir don't include the name of the directory
        # so it needs to be added in
        image_path = "{}/{}".format(DIR, image)
        
        # save the cropped faces of each image
        get_faces_in_image(image_path)

In [5]:
get_all_faces()

190_01_2000
190_02_2000
190_03_2000
190_04_2000
190_05_2000
190_06_2000
190_07_2000
190_08_2000
190_09_2000
190_10_2000
190_11_2000
190_12_2000
191_01_2001
191_02_2001
191_03_2001
191_04_2001
191_05_2001
191_06_2001
191_07_2001
191_08_2001
191_09_2001
191_10_2001
191_11_2001
191_12_2001
192_01_2002
192_02_2002
192_03_2002
192_04_2002
192_05_2002
192_06_2002
192_07_2002
192_08_2002
192_09_2002
192_10_2002
192_11_2002
192_12_2002
193_01_2003
193_02_2003
193_03_2003
193_04_2003
193_05_2003
193_06_2003
193_07_2003
193_08_2003
193_09_2003
193_10_2003
193_11_2003
193_12_2003
194_01_2004
194_02_2004
194_03_2004
194_04_2004
194_05_2004
194_06_2004
194_07_2004
194_08_2004
194_09_2004
194_10_2004
194_11_2004
194_12_2004
195_01_2005
195_02_2005
195_03_2005
195_04_2005
195_05_2005
195_06_2005
195_07_2005
195_08_2005
195_09_2005
195_10_2005
195_11_2005
195_12_2005
196_01_2006
196_02_2006
196_03_2006
196_04_2006
196_05_2006
196_06_2006
196_07_2006
196_08_2006
196_09_2006
196_10_2006
196_11_2006
196_