In [1]:
import pandas as pd
import numpy as mp
import matplotlib.pyplot as plt
import cv2
import os

In [2]:
source_root = "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/cv2/data/"
face_cascade0 = cv2.CascadeClassifier(source_root + "haarcascade_frontalface_default.xml")
face_cascade1 = cv2.CascadeClassifier(source_root + "haarcascade_frontalface_alt.xml")
face_cascade2 = cv2.CascadeClassifier(source_root + "haarcascade_frontalface_alt2.xml")

In [3]:
def detect_face(im_path, face_cascade=face_cascade1, face_choice=0, adj_factor=0.2, resize=False):
    im = cv2.imread(im_path)
    
    parent_path = os.path.dirname(im_path)
    root_path = os.path.dirname(parent_path)
    folder_name = parent_path[parent_path.rfind("/")+1:]
    im_name = im_path[im_path.rfind("/")+1:]
    new_folder_name = folder_name + "_cropped"
    new_path = os.path.join(root_path, new_folder_name)
    
    if im is None:
        print(f"read error for {im_name}")
        return im_name

    minisize = (im.shape[1], im.shape[0])
    miniframe = cv2.resize(im, minisize)
    faces = face_cascade.detectMultiScale(miniframe)
    
    if len(faces) == 0:
        print(f"no face found in {im_name}")
        return im_name

    
    if new_folder_name not in os.listdir(root_path or None):
        os.mkdir(new_folder_name)
    

    if len(faces) > 1:
        if "check" not in os.listdir(new_path):
            os.mkdir(os.path.join(new_path, "check"))
        new_path = os.path.join(new_path, "check")
        
    face = faces[faces[:, 2].argsort()][-1-face_choice]

    x, y, w, h = [ v for v in face ]

    x_adj = int(adj_factor * w)
    if x - x_adj < 0 or x + w + x_adj > im.shape[1]:
        x_adj = min(x, im.shape[1] - x - w)
    y_adj = int(adj_factor * h)
    if y - y_adj < 0 or y + h + y_adj > im.shape[0]:
        y_adj = min(y, im.shape[0] - y - h)
    adj = min(x_adj, y_adj)
    x -= adj
    y -= adj
    w += adj * 2
    h += adj * 2
    
    cv2.rectangle(im, (x,y), (x+w,y+h), (255,255,255))
    sub_face = im[y:y+h, x:x+w]
    if not resize:
        cv2.imwrite(os.path.join(new_path, im_name), sub_face)
    else:
        resized_im = cv2.resize(sub_face, (224, 224))
        cv2.imwrite(os.path.join(new_path, im_name), resized_im)

    

#     plt.imshow(sub_face)

In [13]:
ims = ["../data/img_raw/Images/" + nm for nm in os.listdir("Images")]

miss_detection = []

for im in ims:
    missed = detect_face(im, face_cascade=face_cascade2, adj_factor=0)
    if missed:
        miss_detection.append(missed)



no face found in AF1248.jpg
no face found in CF427.jpg
no face found in CM42.jpg
no face found in AM1640.jpg
no face found in AF1183.jpg
no face found in CF276.jpg
no face found in AF1180.jpg
no face found in CF113.jpg


In [14]:
ims = ["../data/img_raw/Images/" + nm for nm in miss_detection]
miss_detection = []

for im in ims:
    missed = detect_face(im, face_cascade=face_cascade1, adj_factor=0)
    if missed:
        miss_detection.append(missed)

no face found in AF1248.jpg
no face found in CF427.jpg
no face found in CM42.jpg
no face found in AM1640.jpg
no face found in AF1183.jpg
no face found in AF1180.jpg
no face found in CF113.jpg


In [15]:
ims = ["../data/img_raw/Images/" + nm for nm in miss_detection]
miss_detection = []

for im in ims:
    missed = detect_face(im, face_cascade=face_cascade0, adj_factor=0)
    if missed:
        miss_detection.append(missed)

no face found in AF1248.jpg
no face found in AM1640.jpg
no face found in AF1183.jpg
no face found in AF1180.jpg
no face found in CF113.jpg


In [16]:
detect_face("../data/img_raw/Images/CF691.jpg", face_cascade=face_cascade2, face_choice=1)

In [6]:
im_path = "../data/img_raw/face_data-2"
im_names = os.listdir(im_path)
im_names = list(filter(lambda x: x.endswith(".jpg") or x.endswith(".jpeg") or x.endswith(".png") or x.endswith(".webp"), im_names))

miss_detection = []

for im_name in im_names:
    im_name = os.path.join(im_path, im_name)
    missed = detect_face(im_name, face_cascade=face_cascade2, adj_factor=0, resize=False)
    if missed:
        miss_detection.append(missed)

In [43]:
miss_detection_2 = []

for im_name in miss_detection:
    im_name = os.path.join(im_path, im_name)
    missed = detect_face(im_name, face_cascade=face_cascade1, adj_factor=0)
    if missed:
        miss_detection_2.append(missed)

no face found in 48.jpg
no face found in 259.jpg
no face found in 39.jpeg
no face found in 95.jpg


In [44]:
miss_detection_3 = []

for im_name in miss_detection_2:
    im_name = os.path.join(im_path, im_name)
    missed = detect_face(im_name, face_cascade=face_cascade0, adj_factor=0)
    if missed:
        miss_detection_3.append(missed)

no face found in 39.jpeg


In [45]:
multiple_face_miss_detected = os.listdir("../data/img_raw/face_data_cropped/check")
for im_name in multiple_face_miss_detected:
    im_name = os.path.join(im_path, im_name)
    missed = detect_face(im_name, face_cascade=face_cascade2, adj_factor=0, face_choice=1)

read error for .DS_Store


In [52]:
cropped_im_names = os.listdir("../data/img_raw/face_data_cropped")
cropped_im_names = list(filter(lambda x: x.endswith(".jpg") or x.endswith(".jpeg") or x.endswith(".png") or x.endswith(".webp"), cropped_im_names))
too_small_ims = []
face_sizes = []
for im_name in cropped_im_names:
    im_path = os.path.join("../data/img_raw/face_data_cropped", im_name)
    im = cv2.imread(im_path)
    if im.shape[0] < 224:
        too_small_ims.append(im_name)
        face_sizes.append(im.shape[0])
    

In [53]:
pd.DataFrame({"images": too_small_ims, "face_size": face_sizes}).to_csv("too_small_images.csv")