In [0]:
# Uncoment this when running on google colab
!apt-get -qq install -y libsm6 libxext6
!pip install -q -U opencv-python

 Using OpenCV Haar Cascade to detect faces

 # Notes

 Dataset: https://susanqq.github.io/UTKFace/

 The labels of each face image is embedded in the file name, formated like [age]_[gender]_[race]_[date&time].jpg
 - [age] is an integer from 0 to 116, indicating the age
 - [gender] is either 0 (male) or 1 (female)
 - [race] is an integer from 0 to 4, denoting White, Black, Asian, Indian, and Others (like Hispanic, Latino, Middle Eastern).
 - [date&time] is in the format of yyyymmddHHMMSSFFF, showing the date and time an image was collected to UTKFace

In [0]:
import cv2
import matplotlib.pyplot as plt
import time
import os
get_ipython().run_line_magic('matplotlib', 'inline')

# Change working directory from the workspace root to the ipynb file location.
try:
	os.chdir(os.path.join(os.getcwd(), 'src'))
	print(os.getcwd())
except:
	pass



In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# Base variables
training_file_path = '/content/drive/My Drive/Documentos/UFABC/TG/haarcascade_frontalface_default.xml'
faces_dataset_path_zip = '/content/drive/My Drive/Documentos/UFABC/TG/UTKFace_complete.tar.gz'
faces_dataset_path = '/content/UTKFace_complete'
haar_face_cascade = cv2.CascadeClassifier(training_file_path)
example_img_path = '/content/drive/My Drive/Documentos/UFABC/TG/test1.jpg'
example_img = cv2.imread(example_img_path)



In [0]:
# Unzip dataset
import tarfile
tar = tarfile.open(faces_dataset_path_zip, "r:gz")
tar.extractall()
tar.close()

In [0]:
# Check unzip
print(len([name for name in os.listdir(faces_dataset_path)]))
!ls /content/UTKFace_complete | wc -l

24108
24108


In [0]:
# Fix file names
!mv /content/UTKFace_complete/39_1_20170116174525125.jpg /content/UTKFace_complete/39_1_0_20170116174525125.jpg
!mv /content/UTKFace_complete/53__0_20170116184028385.jpg /content/UTKFace_complete/53_0_0_20170116184028385.jpg
!mv /content/UTKFace_complete/61_3_20170109150557335.jpg /content/UTKFace_complete/61_3_0_20170109150557335.jpg

In [0]:
# Base functions

def detect_faces(f_cascade, img, scaleFactor = 1.1):
    #just making a copy of image passed, so that passed image is not changed
    img_copy = img.copy()
    #convert the test image to gray image as opencv face detector expects gray images
    gray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)       
    #let's detect multiscale (some images may be closer to camera than others) images
    faces = f_cascade.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=3, minSize=(30,30))
    return faces

# go over list of faces and draw them as rectangles on original colored img
def print_faces(img, faces):
    img_copy = cv2.cvtColor(img.copy(), cv2.COLOR_BGR2RGB)
    for (x, y, w, h) in faces:
        cv2.rectangle(img_copy, (x, y), (x+w, y+h), (0, 255, 0), 2)
        plt.figure()
    plt.imshow(img_copy)
    return

def load_element_image(element):
  element["image"] = cv2.imread(element["path"])
  return element

def detect_faces_from_element(element):
    element["faces"] = detect_faces(haar_face_cascade, element["image"])
    element["face_count"] = len(element["faces"])
    return element
  
def load_and_detect_faces(element):
  element["face_count"] = len(detect_faces(haar_face_cascade, cv2.imread(element["path"])))

def filter_age(img_list, range_start, range_end):
    img_list[:] = [x for x in img_list if (range_start <= x["age"] <= range_end)]


In [0]:
# Create a list of images
img_name_list = os.listdir(faces_dataset_path)
img_name_list.sort()
#img_name_list = img_name_list[:10]
img_list = []
start_time = time.time()
for img_name in img_name_list:
  try:
    img_name_split = img_name.split("_")
    img_list.append({
        "name": img_name,
        "path": faces_dataset_path + "/" + img_name,
        "age": int(img_name_split[0]),
        "gender": int(img_name_split[1]),
        "race": int(img_name_split[2])
    })
  except:
    print(img_name)
print(time.time() - start_time)


0.1614546775817871


In [0]:
# Filter list
filter_age(img_list, 18, 60)
count_imgs = len(img_list)
print(count_imgs)


17130


In [0]:
# Load and detect faces from the list
   
start_time = time.time()
counter=0
for img in img_list:
    counter=counter+1
    eta = (time.time() - start_time)*(count_imgs - counter)/counter
    print("\r" + str(counter) + " eta = " + str(eta), end = ' ')
    load_and_detect_faces(img)

print(" ")
print(time.time() - start_time)



17130 eta = 0.0  
3634.049448490143


In [0]:
count_0=0
count_1=0
count_2=0
for img in img_list:
    if img["face_count"] == 0:
      count_0 += 1
    elif img["face_count"] == 1:
      count_1 += 1
    else:
      count_2 += 1

print(str(count_0) + " images with 0 faces (" + str(count_0*100/count_imgs) + "%)")
print(str(count_1) + " images with 1 faces (" + str(count_1*100/count_imgs) + "%)")
print(str(count_2) + " images with 2 or more faces (" + str(count_2*100/count_imgs) + "%)")

283 images with 0 faces (1.6520723876240513%)
8789 images with 1 faces (51.30764740221833%)
8058 images with 2 or more faces (47.04028021015762%)


In [0]:
# Show n faces

for img in img_list[:10]:
  if img["face_count"] > 0:
    print_faces(cv2.imread(img["path"]), img["faces"])

In [0]:
# Export results

import json

img_list_clean = []
for img in img_list:
  img_copy = img.copy()
  #del img_copy["faces"]
  img_list_clean.append(img_copy)

result = {"raw_result": img_list_clean}
with open('result_faces_scaleFactor_105_minNeighbors_3_minSize_30_30.json', 'w') as fp:
    json.dump(result, fp)

In [0]:
!mv /content/result_faces_scaleFactor_105_minNeighbors_3_minSize_30_30.json /content/drive/My Drive/Documentos/UFABC/TG/result_faces_scaleFactor_105_minNeighbors_3_minSize_30_30.json