In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from tqdm import tqdm
import threading
import pickle

In [10]:
DATA_FOLDER = 'data/wiki_crop/'
FACES_FOLDER = 'faces/'
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml') 
eyes_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')

In [11]:
def get_image_paths(id):
    return [DATA_FOLDER + id + '/' + f for f in os.listdir(DATA_FOLDER + id) if f.endswith('.jpg')]

image_paths = [get_image_paths(str(i).zfill(2)) for i in range(100)]

image_paths = np.concatenate(image_paths)

In [19]:
images = []

if os.path.exists('images.pkl'):
  images = pickle.load(open('images.pkl', 'rb'))

else:
  for image_path in tqdm(image_paths):
    images.append(plt.imread(image_path))
  pickle.dump(images, open('images.pkl', 'wb'))

# images = [plt.imread(image_paths[i]) for i in range(500)]

In [21]:
def detect_face(frame):
    frame_current = frame.copy()
    frame_gray = frame_current
    
    shape_len = len(frame_current.shape)

    if shape_len == 3:
        frame_gray = cv2.cvtColor(frame_current, cv2.COLOR_BGR2GRAY)
        
    frame_gray = cv2.equalizeHist(frame_gray)
    #-- Detect faces
    faces = face_cascade.detectMultiScale(frame_gray)
    # detect eyes
    eyes = eyes_cascade.detectMultiScale(frame_gray)
    # check if there are any faces
    if len(faces) == 0:
        return []
    
    if len(eyes) != 2:
        return []
    
    # Check if the line between eyes centers is horizontal (+-5 deg)
    eye_centers = []
    for (ex,ey,ew,eh) in eyes:
        eye_centers.append((ex + ew//2, ey + eh//2))

    eye_center1 = eye_centers[0]
    eye_center2 = eye_centers[1]

    dy = eye_center2[1] - eye_center1[1]
    dx = eye_center2[0] - eye_center1[0]
    angle = np.arctan2(dy, dx) * 180.0 / np.pi

    if abs(angle) > 5:
        return []
    
    return faces

image_faces = []

for image in tqdm(images):
    image_faces.append(detect_face(image))

100%|██████████| 500/500 [00:10<00:00, 46.94it/s]


In [22]:
cropped_faces = []

for i, image in enumerate(tqdm(images)):
    if len(image_faces[i]) == 0:
        continue
    
    for (x, y, w, h) in image_faces[i]:        
        cropped_faces.append(image[y:y + h, x:x + w])

100%|██████████| 500/500 [00:00<00:00, 501111.59it/s]


In [23]:
for i, face in enumerate(tqdm(cropped_faces)):
    cv2.imwrite(FACES_FOLDER + image_paths[i].split('/')[-1], cv2.cvtColor(face, cv2.COLOR_RGB2BGR))

100%|██████████| 22/22 [00:00<00:00, 680.13it/s]
