### Download face images from google search

Python Scrypt for `downloading` images from Google and `saving` faces from the images. 

#### How does it work?
1. Download images from Google by searching `keywords` (Used [google_images_download](https://github.com/hardikvasa/google-images-download))
2. Detect `Frontal` and `Profile` faces from the images (Used [opencv-python](https://github.com/skvark/opencv-python))
3. Crop and save the face images

In [1]:
from google_images_download import google_images_download as gid

def download_images(keywords, limit, output_dir):
    downloader = gid.googleimagesdownload()
    downloader.download({
        'keywords': keywords,
        "limit": limit,
        'output_directory': output_dir
    })

In [2]:
from os import listdir
from os.path import exists, isfile, join
import cv2 as cv
import numpy as np

frontalface_cascade = cv.CascadeClassifier('data/haarcascades/haarcascade_frontalface_default.xml')
profileface_cascade = cv.CascadeClassifier('data/haarcascades/haarcascade_profileface.xml')


def save_faces(img, faces, output_dir, file_id):
    if not exists(output_dir):
        os.makedirs(output_dir)
    
    for i in range(len(faces)):
        x, y, w, h = faces[i]
        face_img = img[y:y+h, x:x+w]
        output_file_path = join(output_dir, '{}_{}.jpeg'.format(file_id, i))
        cv.imwrite(output_file_path, face_img)
    

def detect_and_save_faces(images_dir, faces_dir):
    file_names = [f for f in listdir(images_dir) if isfile(join(images_dir, f))]
    
    for file_name in file_names:
        file_id = file_name.split('.')[0]
        img = cv.imread(join(images_dir, file_name))
        gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
        
        frontal_faces = frontalface_cascade.detectMultiScale(gray, 1.3, 5)
        save_faces(img, frontal_faces, join(faces_dir, 'frontal'), file_id)
        
        profile_faces = profileface_cascade.detectMultiScale(gray, 1.3, 5)
        save_faces(img, profile_faces, join(faces_dir, 'profile'), file_id)

In [3]:
keyword = 'george clooney'
num_search_images = 10
images_dir = './output/images'
faces_dir = './output/faces'

download_images(keyword, num_search_images, images_dir)
detect_and_save_faces(join(images_dir, keyword), join(faces_dir, keyword))


Item no.: 1 --> Item name = george clooney
Evaluating...
Starting Download...
Completed Image ====> 1. 220px-George_Clooney_2016.jpg
Completed Image ====> 2. MV5BMjEyMTEyOTQ0MV5BMl5BanBnXkFtZTcwNzU3NTMzNw@@._V1_.jpg
Completed Image ====> 3. 416x416.jpg
Completed Image ====> 4. 220px-George_Clooney-4_The_Men_Who_Stare_at_Goats_TIFF09_%28cropped%29.jpg
Completed Image ====> 5. _102457094_pa-clooney.jpg
Completed Image ====> 6. george-clooney-net-worth-tequila.jpg
Completed Image ====> 7. George_Clooney-300x300.jpg
Completed Image ====> 8. george-clooney-5-2000.jpg
Completed Image ====> 9. 3a
Completed Image ====> 10. 18

Errors: 0

